diff --git "a/evaluation/run_ner.ipynb" "b/evaluation/run_ner.ipynb" new file mode 100644--- /dev/null +++ "b/evaluation/run_ner.ipynb" @@ -0,0 +1,12144 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "accelerator": "GPU", + "colab": { + "name": "bertin-tests.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "3375353ee2ea43d28775f62c49ee0538": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b57b116c89594c558fb17ee835cec7ae", + "IPY_MODEL_2de683c0aad84a33b5d74c338151cb11" + ], + "layout": "IPY_MODEL_2d6e2ae6f5e24092bda544ced04abab4" + } + }, + "b57b116c89594c558fb17ee835cec7ae": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "Downloading: ", + "description_tooltip": null, + "layout": "IPY_MODEL_ce5c0860d88b4ae594ff9c4f97bc998b", + "max": 1362, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_e908ba524a584e1c82cb2a1e0e48d7d6", + "value": 1362 + } + }, + "2de683c0aad84a33b5d74c338151cb11": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5196bc6355b9487aadc2ac77b84f4c0c", + "placeholder": "​", + "style": "IPY_MODEL_76697b9a49db4f6c9a43f8a102118a45", + "value": " 2.92k/? [00:00<00:00, 6.04kB/s]" + } + }, + "2d6e2ae6f5e24092bda544ced04abab4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ce5c0860d88b4ae594ff9c4f97bc998b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e908ba524a584e1c82cb2a1e0e48d7d6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "initial" + } + }, + "5196bc6355b9487aadc2ac77b84f4c0c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "76697b9a49db4f6c9a43f8a102118a45": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "o4b4a_s2-E0M", + "outputId": "018f4cb9-3f6f-4c6e-ccd3-0e8ba83b9853" + }, + "source": [ + "!pip install -qq wandb\n", + "!wandb login\n", + "!wandb init -p bertin-eval -e versae" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "\u001b[K |████████████████████████████████| 1.8 MB 13.7 MB/s \n", + "\u001b[K |███████████████��████████████████| 133 kB 67.5 MB/s \n", + "\u001b[K |████████████████████████████████| 170 kB 59.0 MB/s \n", + "\u001b[K |████████████████████████████████| 97 kB 9.1 MB/s \n", + "\u001b[K |████████████████████████████████| 138 kB 67.8 MB/s \n", + "\u001b[K |████████████████████████████████| 63 kB 2.4 MB/s \n", + "\u001b[K |████████████████████████████████| 62 kB 1.2 MB/s \n", + "\u001b[?25h Building wheel for subprocess32 (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Building wheel for pathtools (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "google-colab 1.0.0 requires requests~=2.23.0, but you have requests 2.26.0 which is incompatible.\n", + "datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible.\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Paste an API key from your profile and hit enter: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mHkxgp5vcCyC", + "outputId": "eaa6b844-629c-4f3d-f04b-e6f32b13b8ea" + }, + "source": [ + "!pip install -qqU https://github.com/huggingface/transformers/archive/refs/heads/master.zip datasets[streaming] seqeval\n", + "# !pip install -qqU transformers datasets[streaming] seqeval\n", + "# !pip install -qqU git+https://github.com/google/flax.git\n", + "# !pip install -qqU https://github.com/kpu/kenlm/archive/master.zip\n", + "!pip install -qqU torch" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "\u001b[K | 11.2 MB 2.8 MB/s\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing wheel metadata ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[K |████████████████████████████████| 262 kB 14.5 MB/s \n", + "\u001b[K |████████████████████████████████| 43 kB 2.7 MB/s \n", + "\u001b[K |████████████████████████████████| 3.3 MB 15.5 MB/s \n", + "\u001b[K |████████████████████████████████| 636 kB 58.8 MB/s \n", + "\u001b[K |████████████████████████████████| 895 kB 49.8 MB/s \n", + "\u001b[K |████████████████████████████████| 243 kB 59.4 MB/s \n", + "\u001b[K |████████████████████████████████| 118 kB 73.2 MB/s \n", + "\u001b[K |████████████████████████████████| 1.3 MB 59.1 MB/s \n", + "\u001b[K |████████████████████████████████| 294 kB 62.5 MB/s \n", + "\u001b[K |████████████████████████████████| 142 kB 71.2 MB/s \n", + "\u001b[?25h Building wheel for transformers (PEP 517) ... \u001b[?25l\u001b[?25hdone\n", + " Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C5JL_ErE-Erd" + }, + "source": [ + "----" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2Te2T37r-Ehh", + "outputId": "2274444c-685f-479c-d90a-3e9b3850e8ed" + }, + "source": [ + "%%writefile run_ner.py\n", + "#!/usr/bin/env python\n", + "# coding=utf-8\n", + "# Copyright 2020 The HuggingFace Team All rights reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "\"\"\"\n", + "Fine-tuning the library models for token classification.\n", + "\"\"\"\n", + "# You can also adapt this script on your own token classification task and datasets. Pointers for this are left as\n", + "# comments.\n", + "\n", + "import logging\n", + "import os\n", + "import sys\n", + "from dataclasses import dataclass, field\n", + "from typing import Optional\n", + "\n", + "import datasets\n", + "import numpy as np\n", + "from datasets import ClassLabel, load_dataset, load_metric\n", + "\n", + "import transformers\n", + "from transformers import (\n", + " AutoConfig,\n", + " AutoModelForTokenClassification,\n", + " AutoTokenizer,\n", + " DataCollatorForTokenClassification,\n", + " HfArgumentParser,\n", + " PreTrainedTokenizerFast,\n", + " Trainer,\n", + " TrainingArguments,\n", + " set_seed,\n", + ")\n", + "from transformers.trainer_utils import get_last_checkpoint\n", + "from transformers.utils import check_min_version\n", + "from transformers.utils.versions import require_version\n", + "\n", + "\n", + "# Will error if the minimal version of Transformers is not installed. Remove at your own risks.\n", + "check_min_version(\"4.9.0.dev0\")\n", + "\n", + "require_version(\"datasets>=1.8.0\", \"To fix: pip install -r examples/pytorch/token-classification/requirements.txt\")\n", + "\n", + "logger = logging.getLogger(__name__)\n", + "\n", + "\n", + "@dataclass\n", + "class ModelArguments:\n", + " \"\"\"\n", + " Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.\n", + " \"\"\"\n", + "\n", + " model_name_or_path: str = field(\n", + " metadata={\"help\": \"Path to pretrained model or model identifier from huggingface.co/models\"}\n", + " )\n", + " config_name: Optional[str] = field(\n", + " default=None, metadata={\"help\": \"Pretrained config name or path if not the same as model_name\"}\n", + " )\n", + " tokenizer_name: Optional[str] = field(\n", + " default=None, metadata={\"help\": \"Pretrained tokenizer name or path if not the same as model_name\"}\n", + " )\n", + " cache_dir: Optional[str] = field(\n", + " default=None,\n", + " metadata={\"help\": \"Where do you want to store the pretrained models downloaded from huggingface.co\"},\n", + " )\n", + " model_revision: str = field(\n", + " default=\"main\",\n", + " metadata={\"help\": \"The specific model version to use (can be a branch name, tag name or commit id).\"},\n", + " )\n", + " use_auth_token: bool = field(\n", + " default=False,\n", + " metadata={\n", + " \"help\": \"Will use the token generated when running `transformers-cli login` (necessary to use this script \"\n", + " \"with private models).\"\n", + " },\n", + " )\n", + "\n", + "\n", + "@dataclass\n", + "class DataTrainingArguments:\n", + " \"\"\"\n", + " Arguments pertaining to what data we are going to input our model for training and eval.\n", + " \"\"\"\n", + "\n", + " task_name: Optional[str] = field(default=\"ner\", metadata={\"help\": \"The name of the task (ner, pos...).\"})\n", + " dataset_name: Optional[str] = field(\n", + " default=None, metadata={\"help\": \"The name of the dataset to use (via the datasets library).\"}\n", + " )\n", + " dataset_config_name: Optional[str] = field(\n", + " default=None, metadata={\"help\": \"The configuration name of the dataset to use (via the datasets library).\"}\n", + " )\n", + " train_file: Optional[str] = field(\n", + " default=None, metadata={\"help\": \"The input training data file (a csv or JSON file).\"}\n", + " )\n", + " validation_file: Optional[str] = field(\n", + " default=None,\n", + " metadata={\"help\": \"An optional input evaluation data file to evaluate on (a csv or JSON file).\"},\n", + " )\n", + " test_file: Optional[str] = field(\n", + " default=None,\n", + " metadata={\"help\": \"An optional input test data file to predict on (a csv or JSON file).\"},\n", + " )\n", + " text_column_name: Optional[str] = field(\n", + " default=None, metadata={\"help\": \"The column name of text to input in the file (a csv or JSON file).\"}\n", + " )\n", + " label_column_name: Optional[str] = field(\n", + " default=None, metadata={\"help\": \"The column name of label to input in the file (a csv or JSON file).\"}\n", + " )\n", + " overwrite_cache: bool = field(\n", + " default=False, metadata={\"help\": \"Overwrite the cached training and evaluation sets\"}\n", + " )\n", + " preprocessing_num_workers: Optional[int] = field(\n", + " default=None,\n", + " metadata={\"help\": \"The number of processes to use for the preprocessing.\"},\n", + " )\n", + " pad_to_max_length: bool = field(\n", + " default=False,\n", + " metadata={\n", + " \"help\": \"Whether to pad all samples to model maximum sentence length. \"\n", + " \"If False, will pad the samples dynamically when batching to the maximum length in the batch. More \"\n", + " \"efficient on GPU but very bad for TPU.\"\n", + " },\n", + " )\n", + " max_train_samples: Optional[int] = field(\n", + " default=None,\n", + " metadata={\n", + " \"help\": \"For debugging purposes or quicker training, truncate the number of training examples to this \"\n", + " \"value if set.\"\n", + " },\n", + " )\n", + " max_eval_samples: Optional[int] = field(\n", + " default=None,\n", + " metadata={\n", + " \"help\": \"For debugging purposes or quicker training, truncate the number of evaluation examples to this \"\n", + " \"value if set.\"\n", + " },\n", + " )\n", + " max_predict_samples: Optional[int] = field(\n", + " default=None,\n", + " metadata={\n", + " \"help\": \"For debugging purposes or quicker training, truncate the number of prediction examples to this \"\n", + " \"value if set.\"\n", + " },\n", + " )\n", + " label_all_tokens: bool = field(\n", + " default=False,\n", + " metadata={\n", + " \"help\": \"Whether to put the label for one word on all tokens of generated by that word or just on the \"\n", + " \"one (in which case the other tokens will have a padding index).\"\n", + " },\n", + " )\n", + " return_entity_level_metrics: bool = field(\n", + " default=False,\n", + " metadata={\"help\": \"Whether to return all the entity levels during evaluation or just the overall ones.\"},\n", + " )\n", + "\n", + " def __post_init__(self):\n", + " if self.dataset_name is None and self.train_file is None and self.validation_file is None:\n", + " raise ValueError(\"Need either a dataset name or a training/validation file.\")\n", + " else:\n", + " if self.train_file is not None:\n", + " extension = self.train_file.split(\".\")[-1]\n", + " assert extension in [\"csv\", \"json\"], \"`train_file` should be a csv or a json file.\"\n", + " if self.validation_file is not None:\n", + " extension = self.validation_file.split(\".\")[-1]\n", + " assert extension in [\"csv\", \"json\"], \"`validation_file` should be a csv or a json file.\"\n", + " self.task_name = self.task_name.lower()\n", + "\n", + "\n", + "def main():\n", + " # See all possible arguments in src/transformers/training_args.py\n", + " # or by passing the --help flag to this script.\n", + " # We now keep distinct sets of args, for a cleaner separation of concerns.\n", + "\n", + " parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))\n", + " if len(sys.argv) == 2 and sys.argv[1].endswith(\".json\"):\n", + " # If we pass only one argument to the script and it's the path to a json file,\n", + " # let's parse it to get our arguments.\n", + " model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))\n", + " else:\n", + " model_args, data_args, training_args = parser.parse_args_into_dataclasses()\n", + "\n", + " # Setup logging\n", + " logging.basicConfig(\n", + " format=\"%(asctime)s - %(levelname)s - %(name)s - %(message)s\",\n", + " datefmt=\"%m/%d/%Y %H:%M:%S\",\n", + " handlers=[logging.StreamHandler(sys.stdout)],\n", + " )\n", + "\n", + " log_level = training_args.get_process_log_level()\n", + " logger.setLevel(log_level)\n", + " datasets.utils.logging.set_verbosity(log_level)\n", + " transformers.utils.logging.set_verbosity(log_level)\n", + " transformers.utils.logging.enable_default_handler()\n", + " transformers.utils.logging.enable_explicit_format()\n", + "\n", + " # Log on each process the small summary:\n", + " logger.warning(\n", + " f\"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}\"\n", + " + f\"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}\"\n", + " )\n", + " logger.info(f\"Training/evaluation parameters {training_args}\")\n", + "\n", + " # Detecting last checkpoint.\n", + " last_checkpoint = None\n", + " if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:\n", + " last_checkpoint = get_last_checkpoint(training_args.output_dir)\n", + " if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:\n", + " raise ValueError(\n", + " f\"Output directory ({training_args.output_dir}) already exists and is not empty. \"\n", + " \"Use --overwrite_output_dir to overcome.\"\n", + " )\n", + " elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:\n", + " logger.info(\n", + " f\"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change \"\n", + " \"the `--output_dir` or add `--overwrite_output_dir` to train from scratch.\"\n", + " )\n", + "\n", + " # Set seed before initializing model.\n", + " set_seed(training_args.seed)\n", + "\n", + " # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)\n", + " # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/\n", + " # (the dataset will be downloaded automatically from the datasets Hub).\n", + " #\n", + " # For CSV/JSON files, this script will use the column called 'text' or the first column if no column called\n", + " # 'text' is found. You can easily tweak this behavior (see below).\n", + " #\n", + " # In distributed training, the load_dataset function guarantee that only one local process can concurrently\n", + " # download the dataset.\n", + " if data_args.dataset_name is not None:\n", + " # Downloading and loading a dataset from the hub.\n", + " raw_datasets = load_dataset(\n", + " data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir\n", + " )\n", + " else:\n", + " data_files = {}\n", + " if data_args.train_file is not None:\n", + " data_files[\"train\"] = data_args.train_file\n", + " if data_args.validation_file is not None:\n", + " data_files[\"validation\"] = data_args.validation_file\n", + " if data_args.test_file is not None:\n", + " data_files[\"test\"] = data_args.test_file\n", + " extension = data_args.train_file.split(\".\")[-1]\n", + " raw_datasets = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir)\n", + " # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at\n", + " # https://huggingface.co/docs/datasets/loading_datasets.html.\n", + "\n", + " if training_args.do_train:\n", + " column_names = raw_datasets[\"train\"].column_names\n", + " features = raw_datasets[\"train\"].features\n", + " else:\n", + " column_names = raw_datasets[\"validation\"].column_names\n", + " features = raw_datasets[\"validation\"].features\n", + "\n", + " if data_args.text_column_name is not None:\n", + " text_column_name = data_args.text_column_name\n", + " elif \"tokens\" in column_names:\n", + " text_column_name = \"tokens\"\n", + " else:\n", + " text_column_name = column_names[0]\n", + "\n", + " if data_args.label_column_name is not None:\n", + " label_column_name = data_args.label_column_name\n", + " elif f\"{data_args.task_name}_tags\" in column_names:\n", + " label_column_name = f\"{data_args.task_name}_tags\"\n", + " else:\n", + " label_column_name = column_names[1]\n", + "\n", + " # In the event the labels are not a `Sequence[ClassLabel]`, we will need to go through the dataset to get the\n", + " # unique labels.\n", + " def get_label_list(labels):\n", + " unique_labels = set()\n", + " for label in labels:\n", + " unique_labels = unique_labels | set(label)\n", + " label_list = list(unique_labels)\n", + " label_list.sort()\n", + " return label_list\n", + "\n", + " if isinstance(features[label_column_name].feature, ClassLabel):\n", + " label_list = features[label_column_name].feature.names\n", + " # No need to convert the labels since they are already ints.\n", + " label_to_id = {i: i for i in range(len(label_list))}\n", + " else:\n", + " label_list = get_label_list(raw_datasets[\"train\"][label_column_name])\n", + " label_to_id = {l: i for i, l in enumerate(label_list)}\n", + " num_labels = len(label_list)\n", + "\n", + " # Load pretrained model and tokenizer\n", + " #\n", + " # Distributed training:\n", + " # The .from_pretrained methods guarantee that only one local process can concurrently\n", + " # download model & vocab.\n", + " config = AutoConfig.from_pretrained(\n", + " model_args.config_name if model_args.config_name else model_args.model_name_or_path,\n", + " num_labels=num_labels,\n", + " label2id=label_to_id,\n", + " id2label={i: l for l, i in label_to_id.items()},\n", + " finetuning_task=data_args.task_name,\n", + " cache_dir=model_args.cache_dir,\n", + " revision=model_args.model_revision,\n", + " use_auth_token=True if model_args.use_auth_token else None,\n", + " )\n", + "\n", + " tokenizer_name_or_path = model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path\n", + " if config.model_type in {\"gpt2\", \"roberta\"}:\n", + " tokenizer = AutoTokenizer.from_pretrained(\n", + " tokenizer_name_or_path,\n", + " cache_dir=model_args.cache_dir,\n", + " use_fast=True,\n", + " revision=model_args.model_revision,\n", + " use_auth_token=True if model_args.use_auth_token else None,\n", + " add_prefix_space=True,\n", + " )\n", + " else:\n", + " tokenizer = AutoTokenizer.from_pretrained(\n", + " tokenizer_name_or_path,\n", + " cache_dir=model_args.cache_dir,\n", + " use_fast=True,\n", + " revision=model_args.model_revision,\n", + " use_auth_token=True if model_args.use_auth_token else None,\n", + " )\n", + " tokenizer.model_max_length = 512\n", + "\n", + " model = AutoModelForTokenClassification.from_pretrained(\n", + " model_args.model_name_or_path,\n", + " from_tf=bool(\".ckpt\" in model_args.model_name_or_path),\n", + " config=config,\n", + " cache_dir=model_args.cache_dir,\n", + " revision=model_args.model_revision,\n", + " use_auth_token=True if model_args.use_auth_token else None,\n", + " )\n", + "\n", + " # Tokenizer check: this script requires a fast tokenizer.\n", + " if not isinstance(tokenizer, PreTrainedTokenizerFast):\n", + " raise ValueError(\n", + " \"This example script only works for models that have a fast tokenizer. Checkout the big table of models \"\n", + " \"at https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet this \"\n", + " \"requirement\"\n", + " )\n", + "\n", + " # Preprocessing the dataset\n", + " # Padding strategy\n", + " padding = \"max_length\" if data_args.pad_to_max_length else False\n", + "\n", + " # Tokenize all texts and align the labels with them.\n", + " def tokenize_and_align_labels(examples):\n", + " tokenized_inputs = tokenizer(\n", + " examples[text_column_name],\n", + " padding=padding,\n", + " max_length=512,\n", + " truncation=True,\n", + " # We use this argument because the texts in our dataset are lists of words (with a label for each word).\n", + " is_split_into_words=True,\n", + " )\n", + " labels = []\n", + " for i, label in enumerate(examples[label_column_name]):\n", + " word_ids = tokenized_inputs.word_ids(batch_index=i)\n", + " previous_word_idx = None\n", + " label_ids = []\n", + " for word_idx in word_ids:\n", + " # Special tokens have a word id that is None. We set the label to -100 so they are automatically\n", + " # ignored in the loss function.\n", + " if word_idx is None:\n", + " label_ids.append(-100)\n", + " # We set the label for the first token of each word.\n", + " elif word_idx != previous_word_idx:\n", + " label_ids.append(label_to_id[label[word_idx]])\n", + " # For the other tokens in a word, we set the label to either the current label or -100, depending on\n", + " # the label_all_tokens flag.\n", + " else:\n", + " label_ids.append(label_to_id[label[word_idx]] if data_args.label_all_tokens else -100)\n", + " previous_word_idx = word_idx\n", + "\n", + " labels.append(label_ids)\n", + " tokenized_inputs[\"labels\"] = labels\n", + " return tokenized_inputs\n", + "\n", + " if training_args.do_train:\n", + " if \"train\" not in raw_datasets:\n", + " raise ValueError(\"--do_train requires a train dataset\")\n", + " train_dataset = raw_datasets[\"train\"]\n", + " if data_args.max_train_samples is not None:\n", + " train_dataset = train_dataset.select(range(data_args.max_train_samples))\n", + " with training_args.main_process_first(desc=\"train dataset map pre-processing\"):\n", + " train_dataset = train_dataset.map(\n", + " tokenize_and_align_labels,\n", + " batched=True,\n", + " num_proc=data_args.preprocessing_num_workers,\n", + " load_from_cache_file=not data_args.overwrite_cache,\n", + " desc=\"Running tokenizer on train dataset\",\n", + " )\n", + "\n", + " if training_args.do_eval:\n", + " if \"validation\" not in raw_datasets:\n", + " raise ValueError(\"--do_eval requires a validation dataset\")\n", + " eval_dataset = raw_datasets[\"validation\"]\n", + " if data_args.max_eval_samples is not None:\n", + " eval_dataset = eval_dataset.select(range(data_args.max_eval_samples))\n", + " with training_args.main_process_first(desc=\"validation dataset map pre-processing\"):\n", + " eval_dataset = eval_dataset.map(\n", + " tokenize_and_align_labels,\n", + " batched=True,\n", + " num_proc=data_args.preprocessing_num_workers,\n", + " load_from_cache_file=not data_args.overwrite_cache,\n", + " desc=\"Running tokenizer on validation dataset\",\n", + " )\n", + "\n", + " if training_args.do_predict:\n", + " if \"test\" not in raw_datasets:\n", + " raise ValueError(\"--do_predict requires a test dataset\")\n", + " predict_dataset = raw_datasets[\"test\"]\n", + " if data_args.max_predict_samples is not None:\n", + " predict_dataset = predict_dataset.select(range(data_args.max_predict_samples))\n", + " with training_args.main_process_first(desc=\"prediction dataset map pre-processing\"):\n", + " predict_dataset = predict_dataset.map(\n", + " tokenize_and_align_labels,\n", + " batched=True,\n", + " num_proc=data_args.preprocessing_num_workers,\n", + " load_from_cache_file=not data_args.overwrite_cache,\n", + " desc=\"Running tokenizer on prediction dataset\",\n", + " )\n", + "\n", + " # Data collator\n", + " data_collator = DataCollatorForTokenClassification(tokenizer, pad_to_multiple_of=8 if training_args.fp16 else None)\n", + "\n", + " # Metrics\n", + " metric = load_metric(\"seqeval\")\n", + "\n", + " def compute_metrics(p):\n", + " predictions, labels = p\n", + " predictions = np.argmax(predictions, axis=2)\n", + "\n", + " # Remove ignored index (special tokens)\n", + " true_predictions = [\n", + " [label_list[p] for (p, l) in zip(prediction, label) if l != -100]\n", + " for prediction, label in zip(predictions, labels)\n", + " ]\n", + " true_labels = [\n", + " [label_list[l] for (p, l) in zip(prediction, label) if l != -100]\n", + " for prediction, label in zip(predictions, labels)\n", + " ]\n", + "\n", + " results = metric.compute(predictions=true_predictions, references=true_labels)\n", + " if data_args.return_entity_level_metrics:\n", + " # Unpack nested dictionaries\n", + " final_results = {}\n", + " for key, value in results.items():\n", + " if isinstance(value, dict):\n", + " for n, v in value.items():\n", + " final_results[f\"{key}_{n}\"] = v\n", + " else:\n", + " final_results[key] = value\n", + " return final_results\n", + " else:\n", + " return {\n", + " \"precision\": results[\"overall_precision\"],\n", + " \"recall\": results[\"overall_recall\"],\n", + " \"f1\": results[\"overall_f1\"],\n", + " \"accuracy\": results[\"overall_accuracy\"],\n", + " }\n", + "\n", + " # Initialize our Trainer\n", + " trainer = Trainer(\n", + " model=model,\n", + " args=training_args,\n", + " train_dataset=train_dataset if training_args.do_train else None,\n", + " eval_dataset=eval_dataset if training_args.do_eval else None,\n", + " tokenizer=tokenizer,\n", + " data_collator=data_collator,\n", + " compute_metrics=compute_metrics,\n", + " )\n", + "\n", + " # Training\n", + " if training_args.do_train:\n", + " checkpoint = None\n", + " if training_args.resume_from_checkpoint is not None:\n", + " checkpoint = training_args.resume_from_checkpoint\n", + " elif last_checkpoint is not None:\n", + " checkpoint = last_checkpoint\n", + " train_result = trainer.train(resume_from_checkpoint=checkpoint)\n", + " metrics = train_result.metrics\n", + " trainer.save_model() # Saves the tokenizer too for easy upload\n", + "\n", + " max_train_samples = (\n", + " data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset)\n", + " )\n", + " metrics[\"train_samples\"] = min(max_train_samples, len(train_dataset))\n", + "\n", + " trainer.log_metrics(\"train\", metrics)\n", + " trainer.save_metrics(\"train\", metrics)\n", + " trainer.save_state()\n", + "\n", + " # Evaluation\n", + " if training_args.do_eval:\n", + " logger.info(\"*** Evaluate ***\")\n", + "\n", + " metrics = trainer.evaluate()\n", + "\n", + " max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)\n", + " metrics[\"eval_samples\"] = min(max_eval_samples, len(eval_dataset))\n", + "\n", + " trainer.log_metrics(\"eval\", metrics)\n", + " trainer.save_metrics(\"eval\", metrics)\n", + "\n", + " # Predict\n", + " if training_args.do_predict:\n", + " logger.info(\"*** Predict ***\")\n", + "\n", + " predictions, labels, metrics = trainer.predict(predict_dataset, metric_key_prefix=\"predict\")\n", + " predictions = np.argmax(predictions, axis=2)\n", + "\n", + " # Remove ignored index (special tokens)\n", + " true_predictions = [\n", + " [label_list[p] for (p, l) in zip(prediction, label) if l != -100]\n", + " for prediction, label in zip(predictions, labels)\n", + " ]\n", + "\n", + " trainer.log_metrics(\"predict\", metrics)\n", + " trainer.save_metrics(\"predict\", metrics)\n", + "\n", + " # Save predictions\n", + " output_predictions_file = os.path.join(training_args.output_dir, \"predictions.txt\")\n", + " if trainer.is_world_process_zero():\n", + " with open(output_predictions_file, \"w\") as writer:\n", + " for prediction in true_predictions:\n", + " writer.write(\" \".join(prediction) + \"\\n\")\n", + "\n", + " if training_args.push_to_hub:\n", + " kwargs = {\"finetuned_from\": model_args.model_name_or_path, \"tasks\": \"token-classification\"}\n", + " if data_args.dataset_name is not None:\n", + " kwargs[\"dataset_tags\"] = data_args.dataset_name\n", + " if data_args.dataset_config_name is not None:\n", + " kwargs[\"dataset_args\"] = data_args.dataset_config_name\n", + " kwargs[\"dataset\"] = f\"{data_args.dataset_name} {data_args.dataset_config_name}\"\n", + " else:\n", + " kwargs[\"dataset\"] = data_args.dataset_name\n", + "\n", + " trainer.push_to_hub(**kwargs)\n", + "\n", + "\n", + "def _mp_fn(index):\n", + " # For xla_spawn (TPUs)\n", + " main()\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " main()\n" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Writing run_ner.py\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "y7Yy3BzmCOkb" + }, + "source": [ + "models = [\n", + " \"bertin-project/bertin-base-gaussian-exp-512seqlen\",\n", + " \"bertin-project/bertin-base-random-exp-512seqlen\",\n", + " \"bertin-project/bertin-base-gaussian\",\n", + " \"bertin-project/bertin-base-stepwise\",\n", + " \"bertin-project/bertin-base-random\",\n", + " \"bertin-project/bertin-roberta-base-spanish\",\n", + " \"flax-community/bertin-roberta-large-spanish\",\n", + " \"BSC-TeMU/roberta-base-bne\",\n", + " \"dccuchile/bert-base-spanish-wwm-cased\",\n", + " \"bert-base-multilingual-cased\",\n", + "]" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TFxp2fElBJb3" + }, + "source": [ + "## NER" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tpygu0_z7sbu", + "outputId": "fd9b2852-8b50-471c-c1f0-28b4c880b4f9" + }, + "source": [ + "#!wget -O run_ner.py https://raw.githubusercontent.com/huggingface/transformers/master/examples/pytorch/token-classification/run_ner.py\n", + "for model in models:\n", + " !WANDB_PROJECT=bertin-eval TOKENIZERS_PARALLELISM=false CUDA_LAUNCH_BLOCKING=1 python run_ner.py \\\n", + " --model_name_or_path $model \\\n", + " --dataset_name conll2002 \\\n", + " --dataset_config_name es \\\n", + " --output_dir ./outputs \\\n", + " --overwrite_output_dir \\\n", + " --pad_to_max_length \\\n", + " --num_train_epochs 5 \\\n", + " --do_train \\\n", + " --do_eval" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "2021-07-19 08:22:26.414910: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "07/19/2021 08:22:28 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "07/19/2021 08:22:28 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_find_unused_parameters=None,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=False,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_steps=500,\n", + "evaluation_strategy=IntervalStrategy.NO,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "gradient_accumulation_steps=1,\n", + "greater_is_better=None,\n", + "group_by_length=False,\n", + "ignore_data_skip=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=5e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=False,\n", + "local_rank=-1,\n", + "log_level=-1,\n", + "log_level_replica=-1,\n", + "log_on_each_node=True,\n", + "logging_dir=./outputs/runs/Jul19_08-22-28_eab5184de9ff,\n", + "logging_first_step=False,\n", + "logging_steps=500,\n", + "logging_strategy=IntervalStrategy.STEPS,\n", + "lr_scheduler_type=SchedulerType.LINEAR,\n", + "max_grad_norm=1.0,\n", + "max_steps=-1,\n", + "metric_for_best_model=None,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=5.0,\n", + "output_dir=./outputs,\n", + "overwrite_output_dir=True,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=outputs,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=None,\n", + "remove_unused_columns=True,\n", + "report_to=['tensorboard', 'wandb'],\n", + "resume_from_checkpoint=None,\n", + "run_name=./outputs,\n", + "save_on_each_node=False,\n", + "save_steps=500,\n", + "save_strategy=IntervalStrategy.STEPS,\n", + "save_total_limit=None,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_legacy_prediction_loop=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + ")\n", + "07/19/2021 08:22:30 - INFO - datasets.utils.file_utils - https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py not found in cache or force_download set to True, downloading to /root/.cache/huggingface/datasets/downloads/tmpruzr4ail\n", + "Downloading: 9.23kB [00:00, 6.35MB/s] \n", + "07/19/2021 08:22:30 - INFO - datasets.utils.file_utils - storing https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py in cache at /root/.cache/huggingface/datasets/downloads/519ce586cff9011c0a219975915c0003e536279b4bd42403835d1cf43fff22f9.ace7fba2d67dd937707ecbe9d69d7b5ffaa6a9a0958eef09d32307e5a9e91d1b.py\n", + "07/19/2021 08:22:30 - INFO - datasets.utils.file_utils - creating metadata file for /root/.cache/huggingface/datasets/downloads/519ce586cff9011c0a219975915c0003e536279b4bd42403835d1cf43fff22f9.ace7fba2d67dd937707ecbe9d69d7b5ffaa6a9a0958eef09d32307e5a9e91d1b.py\n", + "07/19/2021 08:22:30 - INFO - datasets.utils.file_utils - https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/datasets/downloads/tmpo9nulrxu\n", + "Downloading: 7.46kB [00:00, 5.42MB/s] \n", + "07/19/2021 08:22:30 - INFO - datasets.utils.file_utils - storing https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json in cache at /root/.cache/huggingface/datasets/downloads/83927a8859df2b335b5a3ac3be3b6ee9463fe03465d5a67850ae8b3fc6415900.bbc007f319f56bfa8ac0177418ca9c3bbddb8a3887d6e89db0422e626b610a47\n", + "07/19/2021 08:22:30 - INFO - datasets.utils.file_utils - creating metadata file for /root/.cache/huggingface/datasets/downloads/83927a8859df2b335b5a3ac3be3b6ee9463fe03465d5a67850ae8b3fc6415900.bbc007f319f56bfa8ac0177418ca9c3bbddb8a3887d6e89db0422e626b610a47\n", + "07/19/2021 08:22:30 - INFO - datasets.load - Creating main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 08:22:30 - INFO - datasets.load - Creating specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 08:22:30 - INFO - datasets.load - Copying script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 08:22:30 - INFO - datasets.load - Copying dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 08:22:30 - INFO - datasets.load - Creating metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 08:22:31 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 08:22:31 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 08:22:31 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 08:22:31 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 08:22:31 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 08:22:31 - INFO - datasets.info - Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 08:22:31 - INFO - datasets.builder - Generating dataset conll2002 (/root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5)\n", + "Downloading and preparing dataset conll2002/es (download: 3.95 MiB, generated: 8.87 MiB, post-processed: Unknown size, total: 12.82 MiB) to /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5...\n", + "07/19/2021 08:22:31 - INFO - datasets.builder - Dataset not on Hf google storage. Downloading and preparing it from source\n", + " 0% 0/3 [00:00> https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpqymmuxy6\n", + "Downloading: 100% 618/618 [00:00<00:00, 441kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 08:22:39,529 >> storing https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/ff85ac64e56df502ec043af591c8b7be85583b22e6a4f5715146d461cd789f97.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|file_utils.py:1636] 2021-07-19 08:22:39,529 >> creating metadata file for /root/.cache/huggingface/transformers/ff85ac64e56df502ec043af591c8b7be85583b22e6a4f5715146d461cd789f97.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|configuration_utils.py:545] 2021-07-19 08:22:39,529 >> loading configuration file https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/ff85ac64e56df502ec043af591c8b7be85583b22e6a4f5715146d461cd789f97.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|configuration_utils.py:581] 2021-07-19 08:22:39,530 >> Model config RobertaConfig {\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"bos_token_id\": 0,\n", + " \"eos_token_id\": 2,\n", + " \"finetuning_task\": \"ner\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"id2label\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8\n", + " },\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"label2id\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8\n", + " },\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50265\n", + "}\n", + "\n", + "[INFO|file_utils.py:1624] 2021-07-19 08:22:40,247 >> https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp2hh8jjda\n", + "Downloading: 100% 292/292 [00:00<00:00, 219kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 08:22:40,963 >> storing https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/tokenizer_config.json in cache at /root/.cache/huggingface/transformers/1d0cecadbe0c9f16993a436d0ab40b879322ac605869d265787a93ea1e00ec7a.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|file_utils.py:1636] 2021-07-19 08:22:40,964 >> creating metadata file for /root/.cache/huggingface/transformers/1d0cecadbe0c9f16993a436d0ab40b879322ac605869d265787a93ea1e00ec7a.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|file_utils.py:1624] 2021-07-19 08:22:41,681 >> https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/vocab.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmppzfduoru\n", + "Downloading: 100% 855k/855k [00:00<00:00, 1.50MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 08:22:42,977 >> storing https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/vocab.json in cache at /root/.cache/huggingface/transformers/6eaae493de84fa6ec66bcb5673055437acaefce1f59d8601bc2fe5c67e118d1c.a80f232f572026f92499b14999a8ed4e044e04cf3d01b9f2be298c98e78e8498\n", + "[INFO|file_utils.py:1636] 2021-07-19 08:22:42,977 >> creating metadata file for /root/.cache/huggingface/transformers/6eaae493de84fa6ec66bcb5673055437acaefce1f59d8601bc2fe5c67e118d1c.a80f232f572026f92499b14999a8ed4e044e04cf3d01b9f2be298c98e78e8498\n", + "[INFO|file_utils.py:1624] 2021-07-19 08:22:43,692 >> https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/merges.txt not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpcd4r3rkd\n", + "Downloading: 100% 514k/514k [00:00<00:00, 1.29MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 08:22:45,076 >> storing https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/merges.txt in cache at /root/.cache/huggingface/transformers/6f7eacc3a8be0f2ccac79d197ccc70f65831409c32f4f49f8a43968d0ec8d04e.a0dfc41f9d0f03a56ba7a5401d770f6e43071045a0bd79073380d408d17a0d92\n", + "[INFO|file_utils.py:1636] 2021-07-19 08:22:45,076 >> creating metadata file for /root/.cache/huggingface/transformers/6f7eacc3a8be0f2ccac79d197ccc70f65831409c32f4f49f8a43968d0ec8d04e.a0dfc41f9d0f03a56ba7a5401d770f6e43071045a0bd79073380d408d17a0d92\n", + "[INFO|file_utils.py:1624] 2021-07-19 08:22:45,805 >> https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/tokenizer.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpz9nq0sop\n", + "Downloading: 100% 1.47M/1.47M [00:00<00:00, 2.08MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 08:22:47,243 >> storing https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/tokenizer.json in cache at /root/.cache/huggingface/transformers/fa0ebe33b40c5fb911a969102ec8f72a5a7de108098917d817b5924edf9fe90d.2a5dc806edc00ab3a329cb22b9973596ca75b24ba0e5e4963bf1308de7237a3d\n", + "[INFO|file_utils.py:1636] 2021-07-19 08:22:47,243 >> creating metadata file for /root/.cache/huggingface/transformers/fa0ebe33b40c5fb911a969102ec8f72a5a7de108098917d817b5924edf9fe90d.2a5dc806edc00ab3a329cb22b9973596ca75b24ba0e5e4963bf1308de7237a3d\n", + "[INFO|file_utils.py:1624] 2021-07-19 08:22:48,678 >> https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/special_tokens_map.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpbc7bz1ta\n", + "Downloading: 100% 239/239 [00:00<00:00, 196kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 08:22:49,674 >> storing https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/special_tokens_map.json in cache at /root/.cache/huggingface/transformers/051ab3c041e9debc74f58f307de15b70a96de57e0f4b31ceaae21fe4eea531ec.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|file_utils.py:1636] 2021-07-19 08:22:49,674 >> creating metadata file for /root/.cache/huggingface/transformers/051ab3c041e9debc74f58f307de15b70a96de57e0f4b31ceaae21fe4eea531ec.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 08:22:50,391 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/6eaae493de84fa6ec66bcb5673055437acaefce1f59d8601bc2fe5c67e118d1c.a80f232f572026f92499b14999a8ed4e044e04cf3d01b9f2be298c98e78e8498\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 08:22:50,391 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/6f7eacc3a8be0f2ccac79d197ccc70f65831409c32f4f49f8a43968d0ec8d04e.a0dfc41f9d0f03a56ba7a5401d770f6e43071045a0bd79073380d408d17a0d92\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 08:22:50,391 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/fa0ebe33b40c5fb911a969102ec8f72a5a7de108098917d817b5924edf9fe90d.2a5dc806edc00ab3a329cb22b9973596ca75b24ba0e5e4963bf1308de7237a3d\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 08:22:50,391 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 08:22:50,391 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/051ab3c041e9debc74f58f307de15b70a96de57e0f4b31ceaae21fe4eea531ec.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 08:22:50,391 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/1d0cecadbe0c9f16993a436d0ab40b879322ac605869d265787a93ea1e00ec7a.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|file_utils.py:1624] 2021-07-19 08:22:51,174 >> https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/pytorch_model.bin not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmperk_2wag\n", + "Downloading: 100% 499M/499M [00:47<00:00, 10.4MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 08:23:40,401 >> storing https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/c4dacb0e6c084991812c2661ba4b4e6fc953317a1ed82b01bba8d1ceb63b27f9.18c798ebfb044aa6dc4cff70b4b5dc2720424580a68b7e3d86de6a3d0c05f6b1\n", + "[INFO|file_utils.py:1636] 2021-07-19 08:23:40,402 >> creating metadata file for /root/.cache/huggingface/transformers/c4dacb0e6c084991812c2661ba4b4e6fc953317a1ed82b01bba8d1ceb63b27f9.18c798ebfb044aa6dc4cff70b4b5dc2720424580a68b7e3d86de6a3d0c05f6b1\n", + "[INFO|modeling_utils.py:1271] 2021-07-19 08:23:40,402 >> loading weights file https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/c4dacb0e6c084991812c2661ba4b4e6fc953317a1ed82b01bba8d1ceb63b27f9.18c798ebfb044aa6dc4cff70b4b5dc2720424580a68b7e3d86de6a3d0c05f6b1\n", + "[WARNING|modeling_utils.py:1502] 2021-07-19 08:23:41,907 >> Some weights of the model checkpoint at bertin-project/bertin-base-gaussian-exp-512seqlen were not used when initializing RobertaForTokenClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias']\n", + "- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:1513] 2021-07-19 08:23:41,907 >> Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at bertin-project/bertin-base-gaussian-exp-512seqlen and are newly initialized: ['classifier.weight', 'classifier.bias']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "Running tokenizer on train dataset: 0% 0/9 [00:00> The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, pos_tags, id, tokens.\n", + "[INFO|trainer.py:1162] 2021-07-19 08:23:55,930 >> ***** Running training *****\n", + "[INFO|trainer.py:1163] 2021-07-19 08:23:55,930 >> Num examples = 8324\n", + "[INFO|trainer.py:1164] 2021-07-19 08:23:55,930 >> Num Epochs = 5\n", + "[INFO|trainer.py:1165] 2021-07-19 08:23:55,931 >> Instantaneous batch size per device = 8\n", + "[INFO|trainer.py:1166] 2021-07-19 08:23:55,931 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "[INFO|trainer.py:1167] 2021-07-19 08:23:55,931 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1168] 2021-07-19 08:23:55,931 >> Total optimization steps = 5205\n", + "[INFO|integrations.py:417] 2021-07-19 08:23:55,947 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mversae\u001b[0m (use `wandb login --relogin` to force relogin)\n", + "2021-07-19 08:23:57.438203: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m./outputs\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval/runs/ble6jobx\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in /content/wandb/run-20210719_082356-ble6jobx\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run `wandb offline` to turn off syncing.\n", + "\n", + " 10% 500/5205 [02:49<30:20, 2.58it/s]{'loss': 0.1317, 'learning_rate': 4.5196926032660905e-05, 'epoch': 0.48}\n", + " 10% 500/5205 [02:49<30:20, 2.58it/s][INFO|trainer.py:1917] 2021-07-19 08:26:48,133 >> Saving model checkpoint to ./outputs/checkpoint-500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 08:26:48,135 >> Configuration saved in ./outputs/checkpoint-500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 08:26:49,681 >> Model weights saved in ./outputs/checkpoint-500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 08:26:49,682 >> tokenizer config file saved in ./outputs/checkpoint-500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 08:26:49,683 >> Special tokens file saved in ./outputs/checkpoint-500/special_tokens_map.json\n", + " 19% 1000/5205 [05:43<26:49, 2.61it/s]{'loss': 0.0671, 'learning_rate': 4.039385206532181e-05, 'epoch': 0.96}\n", + " 19% 1000/5205 [05:43<26:49, 2.61it/s][INFO|trainer.py:1917] 2021-07-19 08:29:42,129 >> Saving model checkpoint to ./outputs/checkpoint-1000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 08:29:42,130 >> Configuration saved in ./outputs/checkpoint-1000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 08:29:43,439 >> Model weights saved in ./outputs/checkpoint-1000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 08:29:43,440 >> tokenizer config file saved in ./outputs/checkpoint-1000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 08:29:43,440 >> Special tokens file saved in ./outputs/checkpoint-1000/special_tokens_map.json\n", + " 29% 1500/5205 [08:37<24:02, 2.57it/s]{'loss': 0.0414, 'learning_rate': 3.5590778097982716e-05, 'epoch': 1.44}\n", + "[INFO|trainer.py:1917] 2021-07-19 08:32:35,943 >> Saving model checkpoint to ./outputs/checkpoint-1500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 08:32:35,948 >> Configuration saved in ./outputs/checkpoint-1500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 08:32:37,409 >> Model weights saved in ./outputs/checkpoint-1500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 08:32:37,410 >> tokenizer config file saved in ./outputs/checkpoint-1500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 08:32:37,410 >> Special tokens file saved in ./outputs/checkpoint-1500/special_tokens_map.json\n", + "{'loss': 0.0406, 'learning_rate': 3.078770413064361e-05, 'epoch': 1.92}\n", + " 38% 2000/5205 [11:31<20:19, 2.63it/s][INFO|trainer.py:1917] 2021-07-19 08:35:29,836 >> Saving model checkpoint to ./outputs/checkpoint-2000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 08:35:29,837 >> Configuration saved in ./outputs/checkpoint-2000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 08:35:31,276 >> Model weights saved in ./outputs/checkpoint-2000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 08:35:31,277 >> tokenizer config file saved in ./outputs/checkpoint-2000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 08:35:31,277 >> Special tokens file saved in ./outputs/checkpoint-2000/special_tokens_map.json\n", + "{'loss': 0.0254, 'learning_rate': 2.5984630163304517e-05, 'epoch': 2.4}\n", + " 48% 2500/5205 [14:24<17:11, 2.62it/s][INFO|trainer.py:1917] 2021-07-19 08:38:23,383 >> Saving model checkpoint to ./outputs/checkpoint-2500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 08:38:23,385 >> Configuration saved in ./outputs/checkpoint-2500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 08:38:24,711 >> Model weights saved in ./outputs/checkpoint-2500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 08:38:24,712 >> tokenizer config file saved in ./outputs/checkpoint-2500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 08:38:24,712 >> Special tokens file saved in ./outputs/checkpoint-2500/special_tokens_map.json\n", + "{'loss': 0.0217, 'learning_rate': 2.118155619596542e-05, 'epoch': 2.88}\n", + " 58% 3000/5205 [17:18<13:52, 2.65it/s][INFO|trainer.py:1917] 2021-07-19 08:41:17,051 >> Saving model checkpoint to ./outputs/checkpoint-3000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 08:41:17,052 >> Configuration saved in ./outputs/checkpoint-3000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 08:41:18,441 >> Model weights saved in ./outputs/checkpoint-3000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 08:41:18,442 >> tokenizer config file saved in ./outputs/checkpoint-3000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 08:41:18,442 >> Special tokens file saved in ./outputs/checkpoint-3000/special_tokens_map.json\n", + " 67% 3500/5205 [20:10<10:42, 2.65it/s]{'loss': 0.0183, 'learning_rate': 1.6378482228626322e-05, 'epoch': 3.36}\n", + "[INFO|trainer.py:1917] 2021-07-19 08:44:09,688 >> Saving model checkpoint to ./outputs/checkpoint-3500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 08:44:09,689 >> Configuration saved in ./outputs/checkpoint-3500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 08:44:11,008 >> Model weights saved in ./outputs/checkpoint-3500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 08:44:11,009 >> tokenizer config file saved in ./outputs/checkpoint-3500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 08:44:11,009 >> Special tokens file saved in ./outputs/checkpoint-3500/special_tokens_map.json\n", + " 77% 4000/5205 [23:03<07:34, 2.65it/s]{'loss': 0.013, 'learning_rate': 1.1575408261287224e-05, 'epoch': 3.84}\n", + "[INFO|trainer.py:1917] 2021-07-19 08:47:02,389 >> Saving model checkpoint to ./outputs/checkpoint-4000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 08:47:02,390 >> Configuration saved in ./outputs/checkpoint-4000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 08:47:03,697 >> Model weights saved in ./outputs/checkpoint-4000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 08:47:03,698 >> tokenizer config file saved in ./outputs/checkpoint-4000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 08:47:03,699 >> Special tokens file saved in ./outputs/checkpoint-4000/special_tokens_map.json\n", + " 86% 4500/5205 [25:55<04:26, 2.64it/s]{'loss': 0.0102, 'learning_rate': 6.7723342939481265e-06, 'epoch': 4.32}\n", + "[INFO|trainer.py:1917] 2021-07-19 08:49:54,678 >> Saving model checkpoint to ./outputs/checkpoint-4500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 08:49:54,680 >> Configuration saved in ./outputs/checkpoint-4500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 08:49:56,079 >> Model weights saved in ./outputs/checkpoint-4500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 08:49:56,080 >> tokenizer config file saved in ./outputs/checkpoint-4500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 08:49:56,080 >> Special tokens file saved in ./outputs/checkpoint-4500/special_tokens_map.json\n", + " 96% 5000/5205 [28:49<01:18, 2.62it/s]{'loss': 0.0087, 'learning_rate': 1.96926032660903e-06, 'epoch': 4.8}\n", + "[INFO|trainer.py:1917] 2021-07-19 08:52:47,854 >> Saving model checkpoint to ./outputs/checkpoint-5000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 08:52:47,856 >> Configuration saved in ./outputs/checkpoint-5000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 08:52:49,283 >> Model weights saved in ./outputs/checkpoint-5000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 08:52:49,284 >> tokenizer config file saved in ./outputs/checkpoint-5000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 08:52:49,285 >> Special tokens file saved in ./outputs/checkpoint-5000/special_tokens_map.json\n", + "100% 5205/5205 [30:03<00:00, 3.34it/s][INFO|trainer.py:1358] 2021-07-19 08:54:01,973 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "{'train_runtime': 1806.0427, 'train_samples_per_second': 23.045, 'train_steps_per_second': 2.882, 'train_loss': 0.03659094202186372, 'epoch': 5.0}\n", + "100% 5205/5205 [30:03<00:00, 2.89it/s]\n", + "[INFO|trainer.py:1917] 2021-07-19 08:54:01,976 >> Saving model checkpoint to ./outputs\n", + "[INFO|configuration_utils.py:379] 2021-07-19 08:54:01,978 >> Configuration saved in ./outputs/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 08:54:03,477 >> Model weights saved in ./outputs/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 08:54:03,478 >> tokenizer config file saved in ./outputs/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 08:54:03,478 >> Special tokens file saved in ./outputs/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 5.0\n", + " train_loss = 0.0366\n", + " train_runtime = 0:30:06.04\n", + " train_samples = 8324\n", + " train_samples_per_second = 23.045\n", + " train_steps_per_second = 2.882\n", + "07/19/2021 08:54:03 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:522] 2021-07-19 08:54:03,633 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, pos_tags, id, tokens.\n", + "[INFO|trainer.py:2163] 2021-07-19 08:54:03,730 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2165] 2021-07-19 08:54:03,730 >> Num examples = 1916\n", + "[INFO|trainer.py:2168] 2021-07-19 08:54:03,730 >> Batch size = 8\n", + "100% 240/240 [00:26<00:00, 9.08it/s]\n", + "***** eval metrics *****\n", + " epoch = 5.0\n", + " eval_accuracy = 0.9819\n", + " eval_f1 = 0.8764\n", + " eval_loss = 0.1065\n", + " eval_precision = 0.8699\n", + " eval_recall = 0.883\n", + " eval_runtime = 0:00:26.54\n", + " eval_samples = 1916\n", + " eval_samples_per_second = 72.184\n", + " eval_steps_per_second = 9.042\n", + "\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish, PID 238\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Program ended successfully.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find user logs for this run at: /content/wandb/run-20210719_082356-ble6jobx/logs/debug.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find internal logs for this run at: /content/wandb/run-20210719_082356-ble6jobx/logs/debug-internal.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.0087\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 5205\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime 1834\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp 1626684870\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step 11\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime 1806.0427\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second 23.045\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second 2.882\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos 1.087585926764544e+16\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss 0.03659\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 0.10645\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision 0.86985\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall 0.88304\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 0.8764\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy 0.98188\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 26.5433\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 72.184\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 9.042\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▄▃▃▂▂▂▁▁▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate █▇▆▆▅▄▃▃▂▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step ▁▂▂▃▄▄▅▅▆▇▇█\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced \u001b[33m./outputs\u001b[0m: \u001b[34mhttps://wandb.ai/versae/bertin-eval/runs/ble6jobx\u001b[0m\n", + "2021-07-19 08:54:44.400990: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "07/19/2021 08:54:48 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "07/19/2021 08:54:48 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_find_unused_parameters=None,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=False,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_steps=500,\n", + "evaluation_strategy=IntervalStrategy.NO,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "gradient_accumulation_steps=1,\n", + "greater_is_better=None,\n", + "group_by_length=False,\n", + "ignore_data_skip=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=5e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=False,\n", + "local_rank=-1,\n", + "log_level=-1,\n", + "log_level_replica=-1,\n", + "log_on_each_node=True,\n", + "logging_dir=./outputs/runs/Jul19_08-54-48_eab5184de9ff,\n", + "logging_first_step=False,\n", + "logging_steps=500,\n", + "logging_strategy=IntervalStrategy.STEPS,\n", + "lr_scheduler_type=SchedulerType.LINEAR,\n", + "max_grad_norm=1.0,\n", + "max_steps=-1,\n", + "metric_for_best_model=None,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=5.0,\n", + "output_dir=./outputs,\n", + "overwrite_output_dir=True,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=outputs,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=None,\n", + "remove_unused_columns=True,\n", + "report_to=['tensorboard', 'wandb'],\n", + "resume_from_checkpoint=None,\n", + "run_name=./outputs,\n", + "save_on_each_node=False,\n", + "save_steps=500,\n", + "save_strategy=IntervalStrategy.STEPS,\n", + "save_total_limit=None,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_legacy_prediction_loop=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + ")\n", + "07/19/2021 08:54:50 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 08:54:50 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 08:54:50 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 08:54:50 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 08:54:50 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 08:54:50 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 08:54:50 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 08:54:50 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 08:54:50 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 08:54:50 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 08:54:50 - INFO - datasets.info - Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 08:54:50 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "07/19/2021 08:54:50 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 08:54:50 - WARNING - datasets.builder - Reusing dataset conll2002 (/root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5)\n", + "07/19/2021 08:54:50 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "100% 3/3 [00:00<00:00, 27.51it/s]\n", + "[INFO|file_utils.py:1624] 2021-07-19 08:54:51,773 >> https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp3ne9bsxv\n", + "Downloading: 100% 618/618 [00:00<00:00, 466kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 08:54:52,493 >> storing https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/a3b98490ab467f825ce932e6e6e7de25a6ea47beeceb6f5cd521b8ee4f61f95e.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|file_utils.py:1636] 2021-07-19 08:54:52,493 >> creating metadata file for /root/.cache/huggingface/transformers/a3b98490ab467f825ce932e6e6e7de25a6ea47beeceb6f5cd521b8ee4f61f95e.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|configuration_utils.py:545] 2021-07-19 08:54:52,493 >> loading configuration file https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/a3b98490ab467f825ce932e6e6e7de25a6ea47beeceb6f5cd521b8ee4f61f95e.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|configuration_utils.py:581] 2021-07-19 08:54:52,494 >> Model config RobertaConfig {\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"bos_token_id\": 0,\n", + " \"eos_token_id\": 2,\n", + " \"finetuning_task\": \"ner\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"id2label\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8\n", + " },\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"label2id\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8\n", + " },\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50265\n", + "}\n", + "\n", + "[INFO|file_utils.py:1624] 2021-07-19 08:54:53,479 >> https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp3534979z\n", + "Downloading: 100% 292/292 [00:00<00:00, 226kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 08:54:54,196 >> storing https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/tokenizer_config.json in cache at /root/.cache/huggingface/transformers/a9d7a6740959c8c347993f62fbd5620bffa2d10c35c2e579a2ecec181299c9a1.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|file_utils.py:1636] 2021-07-19 08:54:54,196 >> creating metadata file for /root/.cache/huggingface/transformers/a9d7a6740959c8c347993f62fbd5620bffa2d10c35c2e579a2ecec181299c9a1.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|file_utils.py:1624] 2021-07-19 08:54:54,922 >> https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/vocab.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp7i8zte64\n", + "Downloading: 100% 855k/855k [00:00<00:00, 1.62MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 08:54:56,171 >> storing https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/vocab.json in cache at /root/.cache/huggingface/transformers/c1ba808baa4a9c0f3062f1881d448087c30a1443644365bd41cf366491ab4063.a80f232f572026f92499b14999a8ed4e044e04cf3d01b9f2be298c98e78e8498\n", + "[INFO|file_utils.py:1636] 2021-07-19 08:54:56,172 >> creating metadata file for /root/.cache/huggingface/transformers/c1ba808baa4a9c0f3062f1881d448087c30a1443644365bd41cf366491ab4063.a80f232f572026f92499b14999a8ed4e044e04cf3d01b9f2be298c98e78e8498\n", + "[INFO|file_utils.py:1624] 2021-07-19 08:54:56,897 >> https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/merges.txt not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpjqwc23bp\n", + "Downloading: 100% 514k/514k [00:00<00:00, 983kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 08:54:58,148 >> storing https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/merges.txt in cache at /root/.cache/huggingface/transformers/17ffd9604d64364336252e5a3859c3c55be07c457328ab5fc37e4aaf39913d28.a0dfc41f9d0f03a56ba7a5401d770f6e43071045a0bd79073380d408d17a0d92\n", + "[INFO|file_utils.py:1636] 2021-07-19 08:54:58,149 >> creating metadata file for /root/.cache/huggingface/transformers/17ffd9604d64364336252e5a3859c3c55be07c457328ab5fc37e4aaf39913d28.a0dfc41f9d0f03a56ba7a5401d770f6e43071045a0bd79073380d408d17a0d92\n", + "[INFO|file_utils.py:1624] 2021-07-19 08:54:59,147 >> https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/tokenizer.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp_zvxd0ip\n", + "Downloading: 100% 1.47M/1.47M [00:00<00:00, 2.10MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 08:55:00,578 >> storing https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/tokenizer.json in cache at /root/.cache/huggingface/transformers/59e3c1ec6ec0fe2653924dcd348a763dd43f51d8eae6ab758e2d962ec7c14d5e.2a5dc806edc00ab3a329cb22b9973596ca75b24ba0e5e4963bf1308de7237a3d\n", + "[INFO|file_utils.py:1636] 2021-07-19 08:55:00,578 >> creating metadata file for /root/.cache/huggingface/transformers/59e3c1ec6ec0fe2653924dcd348a763dd43f51d8eae6ab758e2d962ec7c14d5e.2a5dc806edc00ab3a329cb22b9973596ca75b24ba0e5e4963bf1308de7237a3d\n", + "[INFO|file_utils.py:1624] 2021-07-19 08:55:02,011 >> https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/special_tokens_map.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmptr5agv7z\n", + "Downloading: 100% 239/239 [00:00<00:00, 186kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 08:55:02,736 >> storing https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/special_tokens_map.json in cache at /root/.cache/huggingface/transformers/39ddc268aab2655adb602f93d771480d4db157c1b6fae9a5ae9fc2112c645a69.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|file_utils.py:1636] 2021-07-19 08:55:02,737 >> creating metadata file for /root/.cache/huggingface/transformers/39ddc268aab2655adb602f93d771480d4db157c1b6fae9a5ae9fc2112c645a69.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 08:55:03,457 >> loading file https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/c1ba808baa4a9c0f3062f1881d448087c30a1443644365bd41cf366491ab4063.a80f232f572026f92499b14999a8ed4e044e04cf3d01b9f2be298c98e78e8498\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 08:55:03,457 >> loading file https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/17ffd9604d64364336252e5a3859c3c55be07c457328ab5fc37e4aaf39913d28.a0dfc41f9d0f03a56ba7a5401d770f6e43071045a0bd79073380d408d17a0d92\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 08:55:03,458 >> loading file https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/59e3c1ec6ec0fe2653924dcd348a763dd43f51d8eae6ab758e2d962ec7c14d5e.2a5dc806edc00ab3a329cb22b9973596ca75b24ba0e5e4963bf1308de7237a3d\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 08:55:03,458 >> loading file https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 08:55:03,458 >> loading file https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/39ddc268aab2655adb602f93d771480d4db157c1b6fae9a5ae9fc2112c645a69.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 08:55:03,458 >> loading file https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/a9d7a6740959c8c347993f62fbd5620bffa2d10c35c2e579a2ecec181299c9a1.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|file_utils.py:1624] 2021-07-19 08:55:04,247 >> https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/pytorch_model.bin not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp0wclon8v\n", + "Downloading: 100% 499M/499M [00:48<00:00, 10.3MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 08:55:54,112 >> storing https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/6dd5d03f2c36b42a305cf20636d35935ad2d998d2ab0588b28eeed0fc164db43.5b6b77533b091cc9204533514d844abfe875ebba66e044b251306c4228bd3221\n", + "[INFO|file_utils.py:1636] 2021-07-19 08:55:54,113 >> creating metadata file for /root/.cache/huggingface/transformers/6dd5d03f2c36b42a305cf20636d35935ad2d998d2ab0588b28eeed0fc164db43.5b6b77533b091cc9204533514d844abfe875ebba66e044b251306c4228bd3221\n", + "[INFO|modeling_utils.py:1271] 2021-07-19 08:55:54,113 >> loading weights file https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/6dd5d03f2c36b42a305cf20636d35935ad2d998d2ab0588b28eeed0fc164db43.5b6b77533b091cc9204533514d844abfe875ebba66e044b251306c4228bd3221\n", + "[WARNING|modeling_utils.py:1502] 2021-07-19 08:55:55,525 >> Some weights of the model checkpoint at bertin-project/bertin-base-random-exp-512seqlen were not used when initializing RobertaForTokenClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias']\n", + "- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:1513] 2021-07-19 08:55:55,525 >> Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at bertin-project/bertin-base-random-exp-512seqlen and are newly initialized: ['classifier.bias', 'classifier.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "Running tokenizer on train dataset: 0% 0/9 [00:00> The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: id, pos_tags, ner_tags, tokens.\n", + "[INFO|trainer.py:1162] 2021-07-19 08:56:05,765 >> ***** Running training *****\n", + "[INFO|trainer.py:1163] 2021-07-19 08:56:05,765 >> Num examples = 8324\n", + "[INFO|trainer.py:1164] 2021-07-19 08:56:05,765 >> Num Epochs = 5\n", + "[INFO|trainer.py:1165] 2021-07-19 08:56:05,765 >> Instantaneous batch size per device = 8\n", + "[INFO|trainer.py:1166] 2021-07-19 08:56:05,765 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "[INFO|trainer.py:1167] 2021-07-19 08:56:05,765 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1168] 2021-07-19 08:56:05,765 >> Total optimization steps = 5205\n", + "[INFO|integrations.py:417] 2021-07-19 08:56:05,779 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mversae\u001b[0m (use `wandb login --relogin` to force relogin)\n", + "2021-07-19 08:56:07.337961: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m./outputs\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval/runs/2o4ygylu\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in /content/wandb/run-20210719_085605-2o4ygylu\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run `wandb offline` to turn off syncing.\n", + "\n", + "{'loss': 0.1394, 'learning_rate': 4.5196926032660905e-05, 'epoch': 0.48}\n", + " 10% 500/5205 [02:49<30:06, 2.61it/s][INFO|trainer.py:1917] 2021-07-19 08:58:58,379 >> Saving model checkpoint to ./outputs/checkpoint-500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 08:58:58,380 >> Configuration saved in ./outputs/checkpoint-500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 08:58:59,853 >> Model weights saved in ./outputs/checkpoint-500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 08:58:59,854 >> tokenizer config file saved in ./outputs/checkpoint-500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 08:58:59,854 >> Special tokens file saved in ./outputs/checkpoint-500/special_tokens_map.json\n", + " 19% 1000/5205 [05:45<26:45, 2.62it/s]{'loss': 0.0732, 'learning_rate': 4.039385206532181e-05, 'epoch': 0.96}\n", + "[INFO|trainer.py:1917] 2021-07-19 09:01:54,171 >> Saving model checkpoint to ./outputs/checkpoint-1000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 09:01:54,172 >> Configuration saved in ./outputs/checkpoint-1000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 09:01:55,552 >> Model weights saved in ./outputs/checkpoint-1000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 09:01:55,553 >> tokenizer config file saved in ./outputs/checkpoint-1000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 09:01:55,553 >> Special tokens file saved in ./outputs/checkpoint-1000/special_tokens_map.json\n", + " 29% 1500/5205 [08:40<23:13, 2.66it/s]{'loss': 0.0498, 'learning_rate': 3.5590778097982716e-05, 'epoch': 1.44}\n", + "[INFO|trainer.py:1917] 2021-07-19 09:04:48,666 >> Saving model checkpoint to ./outputs/checkpoint-1500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 09:04:48,667 >> Configuration saved in ./outputs/checkpoint-1500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 09:04:49,995 >> Model weights saved in ./outputs/checkpoint-1500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 09:04:49,996 >> tokenizer config file saved in ./outputs/checkpoint-1500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 09:04:49,996 >> Special tokens file saved in ./outputs/checkpoint-1500/special_tokens_map.json\n", + " 38% 2000/5205 [11:33<20:30, 2.61it/s]{'loss': 0.0471, 'learning_rate': 3.078770413064361e-05, 'epoch': 1.92}\n", + "[INFO|trainer.py:1917] 2021-07-19 09:07:42,046 >> Saving model checkpoint to ./outputs/checkpoint-2000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 09:07:42,047 >> Configuration saved in ./outputs/checkpoint-2000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 09:07:43,645 >> Model weights saved in ./outputs/checkpoint-2000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 09:07:43,646 >> tokenizer config file saved in ./outputs/checkpoint-2000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 09:07:43,646 >> Special tokens file saved in ./outputs/checkpoint-2000/special_tokens_map.json\n", + " 48% 2500/5205 [14:27<17:19, 2.60it/s][INFO|trainer.py:1917] 2021-07-19 09:10:35,810 >> Saving model checkpoint to ./outputs/checkpoint-2500\n", + "{'loss': 0.0326, 'learning_rate': 2.5984630163304517e-05, 'epoch': 2.4}\n", + "[INFO|configuration_utils.py:379] 2021-07-19 09:10:35,814 >> Configuration saved in ./outputs/checkpoint-2500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 09:10:37,154 >> Model weights saved in ./outputs/checkpoint-2500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 09:10:37,155 >> tokenizer config file saved in ./outputs/checkpoint-2500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 09:10:37,155 >> Special tokens file saved in ./outputs/checkpoint-2500/special_tokens_map.json\n", + "{'loss': 0.0286, 'learning_rate': 2.118155619596542e-05, 'epoch': 2.88}\n", + " 58% 3000/5205 [17:20<13:57, 2.63it/s][INFO|trainer.py:1917] 2021-07-19 09:13:29,401 >> Saving model checkpoint to ./outputs/checkpoint-3000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 09:13:29,403 >> Configuration saved in ./outputs/checkpoint-3000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 09:13:30,776 >> Model weights saved in ./outputs/checkpoint-3000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 09:13:30,777 >> tokenizer config file saved in ./outputs/checkpoint-3000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 09:13:30,777 >> Special tokens file saved in ./outputs/checkpoint-3000/special_tokens_map.json\n", + "{'loss': 0.0213, 'learning_rate': 1.6378482228626322e-05, 'epoch': 3.36}\n", + " 67% 3500/5205 [20:14<11:08, 2.55it/s][INFO|trainer.py:1917] 2021-07-19 09:16:22,813 >> Saving model checkpoint to ./outputs/checkpoint-3500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 09:16:22,815 >> Configuration saved in ./outputs/checkpoint-3500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 09:16:24,401 >> Model weights saved in ./outputs/checkpoint-3500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 09:16:24,402 >> tokenizer config file saved in ./outputs/checkpoint-3500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 09:16:24,402 >> Special tokens file saved in ./outputs/checkpoint-3500/special_tokens_map.json\n", + "{'loss': 0.0179, 'learning_rate': 1.1575408261287224e-05, 'epoch': 3.84}\n", + " 77% 4000/5205 [23:07<07:34, 2.65it/s][INFO|trainer.py:1917] 2021-07-19 09:19:16,222 >> Saving model checkpoint to ./outputs/checkpoint-4000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 09:19:16,223 >> Configuration saved in ./outputs/checkpoint-4000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 09:19:17,545 >> Model weights saved in ./outputs/checkpoint-4000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 09:19:17,546 >> tokenizer config file saved in ./outputs/checkpoint-4000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 09:19:17,546 >> Special tokens file saved in ./outputs/checkpoint-4000/special_tokens_map.json\n", + "{'loss': 0.0148, 'learning_rate': 6.7723342939481265e-06, 'epoch': 4.32}\n", + " 86% 4500/5205 [26:00<04:28, 2.63it/s][INFO|trainer.py:1917] 2021-07-19 09:22:09,568 >> Saving model checkpoint to ./outputs/checkpoint-4500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 09:22:09,569 >> Configuration saved in ./outputs/checkpoint-4500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 09:22:10,963 >> Model weights saved in ./outputs/checkpoint-4500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 09:22:10,964 >> tokenizer config file saved in ./outputs/checkpoint-4500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 09:22:10,964 >> Special tokens file saved in ./outputs/checkpoint-4500/special_tokens_map.json\n", + " 96% 5000/5205 [28:54<01:17, 2.65it/s]{'loss': 0.0124, 'learning_rate': 1.96926032660903e-06, 'epoch': 4.8}\n", + "[INFO|trainer.py:1917] 2021-07-19 09:25:02,826 >> Saving model checkpoint to ./outputs/checkpoint-5000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 09:25:02,833 >> Configuration saved in ./outputs/checkpoint-5000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 09:25:04,501 >> Model weights saved in ./outputs/checkpoint-5000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 09:25:04,501 >> tokenizer config file saved in ./outputs/checkpoint-5000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 09:25:04,502 >> Special tokens file saved in ./outputs/checkpoint-5000/special_tokens_map.json\n", + "100% 5205/5205 [30:08<00:00, 3.32it/s][INFO|trainer.py:1358] 2021-07-19 09:26:17,362 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "100% 5205/5205 [30:08<00:00, 3.32it/s]{'train_runtime': 1811.5969, 'train_samples_per_second': 22.974, 'train_steps_per_second': 2.873, 'train_loss': 0.04241216052162544, 'epoch': 5.0}\n", + "100% 5205/5205 [30:08<00:00, 2.88it/s]\n", + "[INFO|trainer.py:1917] 2021-07-19 09:26:17,386 >> Saving model checkpoint to ./outputs\n", + "[INFO|configuration_utils.py:379] 2021-07-19 09:26:17,388 >> Configuration saved in ./outputs/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 09:26:18,952 >> Model weights saved in ./outputs/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 09:26:18,953 >> tokenizer config file saved in ./outputs/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 09:26:18,953 >> Special tokens file saved in ./outputs/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 5.0\n", + " train_loss = 0.0424\n", + " train_runtime = 0:30:11.59\n", + " train_samples = 8324\n", + " train_samples_per_second = 22.974\n", + " train_steps_per_second = 2.873\n", + "07/19/2021 09:26:19 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:522] 2021-07-19 09:26:19,106 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: id, pos_tags, ner_tags, tokens.\n", + "[INFO|trainer.py:2163] 2021-07-19 09:26:19,211 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2165] 2021-07-19 09:26:19,211 >> Num examples = 1916\n", + "[INFO|trainer.py:2168] 2021-07-19 09:26:19,211 >> Batch size = 8\n", + "100% 240/240 [00:24<00:00, 10.65it/s]***** eval metrics *****\n", + " epoch = 5.0\n", + " eval_accuracy = 0.9803\n", + "100% 240/240 [00:26<00:00, 9.17it/s]\n", + " eval_f1 = 0.8616\n", + " eval_loss = 0.1099\n", + " eval_precision = 0.8557\n", + " eval_recall = 0.8676\n", + " eval_runtime = 0:00:26.28\n", + " eval_samples = 1916\n", + " eval_samples_per_second = 72.894\n", + " eval_steps_per_second = 9.131\n", + "\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish, PID 313\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Program ended successfully.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find user logs for this run at: /content/wandb/run-20210719_085605-2o4ygylu/logs/debug.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find internal logs for this run at: /content/wandb/run-20210719_085605-2o4ygylu/logs/debug-internal.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.0124\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 5205\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime 1839\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp 1626686805\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step 11\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime 1811.5969\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second 22.974\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second 2.873\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos 1.087585926764544e+16\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss 0.04241\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 0.10993\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision 0.85565\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall 0.86765\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 0.86161\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy 0.98033\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 26.2847\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 72.894\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 9.131\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▄▃▃▂▂▁▁▁▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate █▇▆▆▅▄▃▃▂▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step ▁▂▂▃▄▄▅▅▆▇▇█\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced \u001b[33m./outputs\u001b[0m: \u001b[34mhttps://wandb.ai/versae/bertin-eval/runs/2o4ygylu\u001b[0m\n", + "2021-07-19 09:26:58.817549: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "07/19/2021 09:27:01 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "07/19/2021 09:27:01 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_find_unused_parameters=None,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=False,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_steps=500,\n", + "evaluation_strategy=IntervalStrategy.NO,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "gradient_accumulation_steps=1,\n", + "greater_is_better=None,\n", + "group_by_length=False,\n", + "ignore_data_skip=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=5e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=False,\n", + "local_rank=-1,\n", + "log_level=-1,\n", + "log_level_replica=-1,\n", + "log_on_each_node=True,\n", + "logging_dir=./outputs/runs/Jul19_09-27-01_eab5184de9ff,\n", + "logging_first_step=False,\n", + "logging_steps=500,\n", + "logging_strategy=IntervalStrategy.STEPS,\n", + "lr_scheduler_type=SchedulerType.LINEAR,\n", + "max_grad_norm=1.0,\n", + "max_steps=-1,\n", + "metric_for_best_model=None,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=5.0,\n", + "output_dir=./outputs,\n", + "overwrite_output_dir=True,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=outputs,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=None,\n", + "remove_unused_columns=True,\n", + "report_to=['tensorboard', 'wandb'],\n", + "resume_from_checkpoint=None,\n", + "run_name=./outputs,\n", + "save_on_each_node=False,\n", + "save_steps=500,\n", + "save_strategy=IntervalStrategy.STEPS,\n", + "save_total_limit=None,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_legacy_prediction_loop=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + ")\n", + "07/19/2021 09:27:03 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 09:27:03 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 09:27:03 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 09:27:03 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 09:27:03 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 09:27:04 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 09:27:04 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 09:27:04 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 09:27:04 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 09:27:04 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 09:27:04 - INFO - datasets.info - Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 09:27:04 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "07/19/2021 09:27:04 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 09:27:04 - WARNING - datasets.builder - Reusing dataset conll2002 (/root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5)\n", + "07/19/2021 09:27:04 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "100% 3/3 [00:00<00:00, 26.10it/s]\n", + "[INFO|file_utils.py:1624] 2021-07-19 09:27:05,503 >> https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpj3btr4rs\n", + "Downloading: 100% 618/618 [00:00<00:00, 445kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 09:27:06,231 >> storing https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/cbd56d68ce5dcd2626aba4c4b188db63f2ba2c49a604b36e7cdc6e52578ee306.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|file_utils.py:1636] 2021-07-19 09:27:06,231 >> creating metadata file for /root/.cache/huggingface/transformers/cbd56d68ce5dcd2626aba4c4b188db63f2ba2c49a604b36e7cdc6e52578ee306.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|configuration_utils.py:545] 2021-07-19 09:27:06,232 >> loading configuration file https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/cbd56d68ce5dcd2626aba4c4b188db63f2ba2c49a604b36e7cdc6e52578ee306.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|configuration_utils.py:581] 2021-07-19 09:27:06,233 >> Model config RobertaConfig {\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"bos_token_id\": 0,\n", + " \"eos_token_id\": 2,\n", + " \"finetuning_task\": \"ner\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"id2label\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8\n", + " },\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"label2id\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8\n", + " },\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50265\n", + "}\n", + "\n", + "[INFO|file_utils.py:1624] 2021-07-19 09:27:06,949 >> https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmps__yinbf\n", + "Downloading: 100% 292/292 [00:00<00:00, 219kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 09:27:07,667 >> storing https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/tokenizer_config.json in cache at /root/.cache/huggingface/transformers/7a419a17bf4372869932365630632f434d402e93dd7e609e73607cf71ec1bdf7.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|file_utils.py:1636] 2021-07-19 09:27:07,667 >> creating metadata file for /root/.cache/huggingface/transformers/7a419a17bf4372869932365630632f434d402e93dd7e609e73607cf71ec1bdf7.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|file_utils.py:1624] 2021-07-19 09:27:08,391 >> https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/vocab.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp59ug3qfu\n", + "Downloading: 100% 855k/855k [00:00<00:00, 1.60MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 09:27:09,650 >> storing https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/vocab.json in cache at /root/.cache/huggingface/transformers/dae6454603d0b1d10a2446ffc1a21ccd636b0ca6a4c77a79fb9dfde03f4a51b8.a80f232f572026f92499b14999a8ed4e044e04cf3d01b9f2be298c98e78e8498\n", + "[INFO|file_utils.py:1636] 2021-07-19 09:27:09,650 >> creating metadata file for /root/.cache/huggingface/transformers/dae6454603d0b1d10a2446ffc1a21ccd636b0ca6a4c77a79fb9dfde03f4a51b8.a80f232f572026f92499b14999a8ed4e044e04cf3d01b9f2be298c98e78e8498\n", + "[INFO|file_utils.py:1624] 2021-07-19 09:27:10,373 >> https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/merges.txt not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpsmpe0ptd\n", + "Downloading: 100% 514k/514k [00:00<00:00, 980kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 09:27:11,621 >> storing https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/merges.txt in cache at /root/.cache/huggingface/transformers/2e77e6f778d3fd8875349675d408521ca20c1f1acac2fd57d60ca945d82b926e.a0dfc41f9d0f03a56ba7a5401d770f6e43071045a0bd79073380d408d17a0d92\n", + "[INFO|file_utils.py:1636] 2021-07-19 09:27:11,621 >> creating metadata file for /root/.cache/huggingface/transformers/2e77e6f778d3fd8875349675d408521ca20c1f1acac2fd57d60ca945d82b926e.a0dfc41f9d0f03a56ba7a5401d770f6e43071045a0bd79073380d408d17a0d92\n", + "[INFO|file_utils.py:1624] 2021-07-19 09:27:12,348 >> https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/tokenizer.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpcvykdoxa\n", + "Downloading: 100% 1.47M/1.47M [00:00<00:00, 2.08MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 09:27:13,783 >> storing https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/tokenizer.json in cache at /root/.cache/huggingface/transformers/98a6808b3aa08b6d84e8b30dfa6892d15e9e631eebff8652b37ab29d75a0b98a.2a5dc806edc00ab3a329cb22b9973596ca75b24ba0e5e4963bf1308de7237a3d\n", + "[INFO|file_utils.py:1636] 2021-07-19 09:27:13,784 >> creating metadata file for /root/.cache/huggingface/transformers/98a6808b3aa08b6d84e8b30dfa6892d15e9e631eebff8652b37ab29d75a0b98a.2a5dc806edc00ab3a329cb22b9973596ca75b24ba0e5e4963bf1308de7237a3d\n", + "[INFO|file_utils.py:1624] 2021-07-19 09:27:15,496 >> https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/special_tokens_map.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpzlz4yibg\n", + "Downloading: 100% 239/239 [00:00<00:00, 176kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 09:27:16,213 >> storing https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/special_tokens_map.json in cache at /root/.cache/huggingface/transformers/1f5a4bde3e85f3c7b914d0e6b43b2f72d6b3b2f9ddbec7be9e4b0521a429f67f.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|file_utils.py:1636] 2021-07-19 09:27:16,213 >> creating metadata file for /root/.cache/huggingface/transformers/1f5a4bde3e85f3c7b914d0e6b43b2f72d6b3b2f9ddbec7be9e4b0521a429f67f.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 09:27:16,931 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/dae6454603d0b1d10a2446ffc1a21ccd636b0ca6a4c77a79fb9dfde03f4a51b8.a80f232f572026f92499b14999a8ed4e044e04cf3d01b9f2be298c98e78e8498\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 09:27:16,931 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/2e77e6f778d3fd8875349675d408521ca20c1f1acac2fd57d60ca945d82b926e.a0dfc41f9d0f03a56ba7a5401d770f6e43071045a0bd79073380d408d17a0d92\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 09:27:16,931 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/98a6808b3aa08b6d84e8b30dfa6892d15e9e631eebff8652b37ab29d75a0b98a.2a5dc806edc00ab3a329cb22b9973596ca75b24ba0e5e4963bf1308de7237a3d\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 09:27:16,931 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 09:27:16,931 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/1f5a4bde3e85f3c7b914d0e6b43b2f72d6b3b2f9ddbec7be9e4b0521a429f67f.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 09:27:16,931 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/7a419a17bf4372869932365630632f434d402e93dd7e609e73607cf71ec1bdf7.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|file_utils.py:1624] 2021-07-19 09:27:17,726 >> https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/pytorch_model.bin not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpmiziudjg\n", + "Downloading: 100% 499M/499M [00:48<00:00, 10.4MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 09:28:07,298 >> storing https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/d61c66f6163d7933bcffb5de3a666094c2c7c8d54145ec0cea640f72204427e0.50b5552cf09535e0a4b85cc39c83be233674d4cab0836dd8fedc97aa778c802c\n", + "[INFO|file_utils.py:1636] 2021-07-19 09:28:07,299 >> creating metadata file for /root/.cache/huggingface/transformers/d61c66f6163d7933bcffb5de3a666094c2c7c8d54145ec0cea640f72204427e0.50b5552cf09535e0a4b85cc39c83be233674d4cab0836dd8fedc97aa778c802c\n", + "[INFO|modeling_utils.py:1271] 2021-07-19 09:28:07,299 >> loading weights file https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/d61c66f6163d7933bcffb5de3a666094c2c7c8d54145ec0cea640f72204427e0.50b5552cf09535e0a4b85cc39c83be233674d4cab0836dd8fedc97aa778c802c\n", + "[WARNING|modeling_utils.py:1502] 2021-07-19 09:28:08,815 >> Some weights of the model checkpoint at bertin-project/bertin-base-gaussian were not used when initializing RobertaForTokenClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.dense.weight']\n", + "- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:1513] 2021-07-19 09:28:08,815 >> Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at bertin-project/bertin-base-gaussian and are newly initialized: ['classifier.bias', 'classifier.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "Running tokenizer on train dataset: 0% 0/9 [00:00> The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: pos_tags, ner_tags, id, tokens.\n", + "[INFO|trainer.py:1162] 2021-07-19 09:28:19,409 >> ***** Running training *****\n", + "[INFO|trainer.py:1163] 2021-07-19 09:28:19,409 >> Num examples = 8324\n", + "[INFO|trainer.py:1164] 2021-07-19 09:28:19,409 >> Num Epochs = 5\n", + "[INFO|trainer.py:1165] 2021-07-19 09:28:19,409 >> Instantaneous batch size per device = 8\n", + "[INFO|trainer.py:1166] 2021-07-19 09:28:19,409 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "[INFO|trainer.py:1167] 2021-07-19 09:28:19,409 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1168] 2021-07-19 09:28:19,409 >> Total optimization steps = 5205\n", + "[INFO|integrations.py:417] 2021-07-19 09:28:19,425 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mversae\u001b[0m (use `wandb login --relogin` to force relogin)\n", + "2021-07-19 09:28:21.087997: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m./outputs\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval/runs/1zmf4tfr\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in /content/wandb/run-20210719_092819-1zmf4tfr\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run `wandb offline` to turn off syncing.\n", + "\n", + "{'loss': 0.1263, 'learning_rate': 4.5196926032660905e-05, 'epoch': 0.48}\n", + " 10% 500/5205 [02:49<30:18, 2.59it/s][INFO|trainer.py:1917] 2021-07-19 09:31:12,177 >> Saving model checkpoint to ./outputs/checkpoint-500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 09:31:12,179 >> Configuration saved in ./outputs/checkpoint-500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 09:31:13,724 >> Model weights saved in ./outputs/checkpoint-500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 09:31:13,725 >> tokenizer config file saved in ./outputs/checkpoint-500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 09:31:13,725 >> Special tokens file saved in ./outputs/checkpoint-500/special_tokens_map.json\n", + "{'loss': 0.0641, 'learning_rate': 4.039385206532181e-05, 'epoch': 0.96}\n", + " 19% 1000/5205 [05:45<26:44, 2.62it/s][INFO|trainer.py:1917] 2021-07-19 09:34:07,849 >> Saving model checkpoint to ./outputs/checkpoint-1000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 09:34:07,850 >> Configuration saved in ./outputs/checkpoint-1000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 09:34:09,238 >> Model weights saved in ./outputs/checkpoint-1000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 09:34:09,238 >> tokenizer config file saved in ./outputs/checkpoint-1000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 09:34:09,239 >> Special tokens file saved in ./outputs/checkpoint-1000/special_tokens_map.json\n", + "{'loss': 0.041, 'learning_rate': 3.5590778097982716e-05, 'epoch': 1.44}\n", + " 29% 1500/5205 [08:40<23:26, 2.63it/s][INFO|trainer.py:1917] 2021-07-19 09:37:02,981 >> Saving model checkpoint to ./outputs/checkpoint-1500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 09:37:02,983 >> Configuration saved in ./outputs/checkpoint-1500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 09:37:04,542 >> Model weights saved in ./outputs/checkpoint-1500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 09:37:04,543 >> tokenizer config file saved in ./outputs/checkpoint-1500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 09:37:04,544 >> Special tokens file saved in ./outputs/checkpoint-1500/special_tokens_map.json\n", + "{'loss': 0.0394, 'learning_rate': 3.078770413064361e-05, 'epoch': 1.92}\n", + " 38% 2000/5205 [11:35<20:16, 2.63it/s][INFO|trainer.py:1917] 2021-07-19 09:39:58,083 >> Saving model checkpoint to ./outputs/checkpoint-2000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 09:39:58,085 >> Configuration saved in ./outputs/checkpoint-2000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 09:39:59,421 >> Model weights saved in ./outputs/checkpoint-2000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 09:39:59,422 >> tokenizer config file saved in ./outputs/checkpoint-2000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 09:39:59,422 >> Special tokens file saved in ./outputs/checkpoint-2000/special_tokens_map.json\n", + " 48% 2500/5205 [14:30<17:11, 2.62it/s]{'loss': 0.0261, 'learning_rate': 2.5984630163304517e-05, 'epoch': 2.4}\n", + " 48% 2500/5205 [14:30<17:11, 2.62it/s][INFO|trainer.py:1917] 2021-07-19 09:42:52,967 >> Saving model checkpoint to ./outputs/checkpoint-2500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 09:42:52,968 >> Configuration saved in ./outputs/checkpoint-2500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 09:42:54,418 >> Model weights saved in ./outputs/checkpoint-2500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 09:42:54,419 >> tokenizer config file saved in ./outputs/checkpoint-2500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 09:42:54,420 >> Special tokens file saved in ./outputs/checkpoint-2500/special_tokens_map.json\n", + " 58% 3000/5205 [17:25<14:09, 2.60it/s]{'loss': 0.0221, 'learning_rate': 2.118155619596542e-05, 'epoch': 2.88}\n", + " 58% 3000/5205 [17:25<14:09, 2.60it/s][INFO|trainer.py:1917] 2021-07-19 09:45:48,000 >> Saving model checkpoint to ./outputs/checkpoint-3000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 09:45:48,001 >> Configuration saved in ./outputs/checkpoint-3000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 09:45:49,621 >> Model weights saved in ./outputs/checkpoint-3000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 09:45:49,622 >> tokenizer config file saved in ./outputs/checkpoint-3000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 09:45:49,622 >> Special tokens file saved in ./outputs/checkpoint-3000/special_tokens_map.json\n", + " 67% 3500/5205 [20:20<10:53, 2.61it/s]{'loss': 0.0165, 'learning_rate': 1.6378482228626322e-05, 'epoch': 3.36}\n", + "[INFO|trainer.py:1917] 2021-07-19 09:48:42,983 >> Saving model checkpoint to ./outputs/checkpoint-3500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 09:48:42,985 >> Configuration saved in ./outputs/checkpoint-3500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 09:48:44,303 >> Model weights saved in ./outputs/checkpoint-3500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 09:48:44,304 >> tokenizer config file saved in ./outputs/checkpoint-3500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 09:48:44,305 >> Special tokens file saved in ./outputs/checkpoint-3500/special_tokens_map.json\n", + "{'loss': 0.0125, 'learning_rate': 1.1575408261287224e-05, 'epoch': 3.84}\n", + " 77% 4000/5205 [23:15<07:45, 2.59it/s][INFO|trainer.py:1917] 2021-07-19 09:51:37,552 >> Saving model checkpoint to ./outputs/checkpoint-4000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 09:51:37,553 >> Configuration saved in ./outputs/checkpoint-4000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 09:51:39,078 >> Model weights saved in ./outputs/checkpoint-4000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 09:51:39,121 >> tokenizer config file saved in ./outputs/checkpoint-4000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 09:51:39,121 >> Special tokens file saved in ./outputs/checkpoint-4000/special_tokens_map.json\n", + " 86% 4500/5205 [26:09<04:31, 2.59it/s]{'loss': 0.0095, 'learning_rate': 6.7723342939481265e-06, 'epoch': 4.32}\n", + " 86% 4500/5205 [26:09<04:31, 2.59it/s][INFO|trainer.py:1917] 2021-07-19 09:54:32,046 >> Saving model checkpoint to ./outputs/checkpoint-4500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 09:54:32,048 >> Configuration saved in ./outputs/checkpoint-4500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 09:54:33,695 >> Model weights saved in ./outputs/checkpoint-4500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 09:54:33,696 >> tokenizer config file saved in ./outputs/checkpoint-4500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 09:54:33,696 >> Special tokens file saved in ./outputs/checkpoint-4500/special_tokens_map.json\n", + " 96% 5000/5205 [29:04<01:18, 2.61it/s]{'loss': 0.0086, 'learning_rate': 1.96926032660903e-06, 'epoch': 4.8}\n", + " 96% 5000/5205 [29:04<01:18, 2.61it/s][INFO|trainer.py:1917] 2021-07-19 09:57:26,718 >> Saving model checkpoint to ./outputs/checkpoint-5000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 09:57:26,720 >> Configuration saved in ./outputs/checkpoint-5000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 09:57:28,163 >> Model weights saved in ./outputs/checkpoint-5000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 09:57:28,164 >> tokenizer config file saved in ./outputs/checkpoint-5000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 09:57:28,169 >> Special tokens file saved in ./outputs/checkpoint-5000/special_tokens_map.json\n", + "100% 5205/5205 [30:19<00:00, 3.31it/s][INFO|trainer.py:1358] 2021-07-19 09:58:41,915 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + " {'train_runtime': 1822.5055, 'train_samples_per_second': 22.837, 'train_steps_per_second': 2.856, 'train_loss': 0.035404586448449564, 'epoch': 5.0}\n", + "100% 5205/5205 [30:19<00:00, 2.86it/s]\n", + "[INFO|trainer.py:1917] 2021-07-19 09:58:41,922 >> Saving model checkpoint to ./outputs\n", + "[INFO|configuration_utils.py:379] 2021-07-19 09:58:41,924 >> Configuration saved in ./outputs/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 09:58:43,256 >> Model weights saved in ./outputs/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 09:58:43,257 >> tokenizer config file saved in ./outputs/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 09:58:43,257 >> Special tokens file saved in ./outputs/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 5.0\n", + " train_loss = 0.0354\n", + " train_runtime = 0:30:22.50\n", + " train_samples = 8324\n", + " train_samples_per_second = 22.837\n", + " train_steps_per_second = 2.856\n", + "[INFO|trainer.py:522] 2021-07-19 09:58:43,390 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: pos_tags, ner_tags, id, tokens.\n", + "07/19/2021 09:58:43 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:2163] 2021-07-19 09:58:43,420 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2165] 2021-07-19 09:58:43,420 >> Num examples = 1916\n", + "[INFO|trainer.py:2168] 2021-07-19 09:58:43,420 >> Batch size = 8\n", + "100% 240/240 [00:25<00:00, 9.26it/s]\n", + "***** eval metrics *****\n", + " epoch = 5.0\n", + " eval_accuracy = 0.9816\n", + " eval_f1 = 0.8792\n", + " eval_loss = 0.1098\n", + " eval_precision = 0.8731\n", + " eval_recall = 0.8853\n", + " eval_runtime = 0:00:26.04\n", + " eval_samples = 1916\n", + " eval_samples_per_second = 73.569\n", + " eval_steps_per_second = 9.215\n", + "\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish, PID 392\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Program ended successfully.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find user logs for this run at: /content/wandb/run-20210719_092819-1zmf4tfr/logs/debug.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find internal logs for this run at: /content/wandb/run-20210719_092819-1zmf4tfr/logs/debug-internal.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.0086\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 5205\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime 1850\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp 1626688749\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step 11\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime 1822.5055\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second 22.837\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second 2.856\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos 1.087585926764544e+16\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss 0.0354\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 0.10978\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision 0.8731\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall 0.88534\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 0.87918\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy 0.9816\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 26.0437\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 73.569\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 9.215\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▄▃▃▂▂▁▁▁▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate █▇▆▆▅▄▃▃▂▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step ▁▂▂▃▄▄▅▅▆▇▇█\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced \u001b[33m./outputs\u001b[0m: \u001b[34mhttps://wandb.ai/versae/bertin-eval/runs/1zmf4tfr\u001b[0m\n", + "2021-07-19 09:59:22.866480: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "07/19/2021 09:59:25 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "07/19/2021 09:59:25 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_find_unused_parameters=None,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=False,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_steps=500,\n", + "evaluation_strategy=IntervalStrategy.NO,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "gradient_accumulation_steps=1,\n", + "greater_is_better=None,\n", + "group_by_length=False,\n", + "ignore_data_skip=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=5e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=False,\n", + "local_rank=-1,\n", + "log_level=-1,\n", + "log_level_replica=-1,\n", + "log_on_each_node=True,\n", + "logging_dir=./outputs/runs/Jul19_09-59-25_eab5184de9ff,\n", + "logging_first_step=False,\n", + "logging_steps=500,\n", + "logging_strategy=IntervalStrategy.STEPS,\n", + "lr_scheduler_type=SchedulerType.LINEAR,\n", + "max_grad_norm=1.0,\n", + "max_steps=-1,\n", + "metric_for_best_model=None,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=5.0,\n", + "output_dir=./outputs,\n", + "overwrite_output_dir=True,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=outputs,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=None,\n", + "remove_unused_columns=True,\n", + "report_to=['tensorboard', 'wandb'],\n", + "resume_from_checkpoint=None,\n", + "run_name=./outputs,\n", + "save_on_each_node=False,\n", + "save_steps=500,\n", + "save_strategy=IntervalStrategy.STEPS,\n", + "save_total_limit=None,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_legacy_prediction_loop=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + ")\n", + "07/19/2021 09:59:27 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 09:59:27 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 09:59:27 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 09:59:27 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 09:59:27 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 09:59:28 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 09:59:28 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 09:59:28 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 09:59:28 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 09:59:28 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 09:59:28 - INFO - datasets.info - Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 09:59:28 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "07/19/2021 09:59:28 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 09:59:28 - WARNING - datasets.builder - Reusing dataset conll2002 (/root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5)\n", + "07/19/2021 09:59:28 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "100% 3/3 [00:00<00:00, 27.39it/s]\n", + "[INFO|file_utils.py:1624] 2021-07-19 09:59:29,157 >> https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmph_lls71d\n", + "Downloading: 100% 618/618 [00:00<00:00, 462kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 09:59:29,878 >> storing https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/367bb30bd1ae06268e9d1c64ae1fb923fc9931913fa478dfa01d79a4c7086238.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|file_utils.py:1636] 2021-07-19 09:59:29,878 >> creating metadata file for /root/.cache/huggingface/transformers/367bb30bd1ae06268e9d1c64ae1fb923fc9931913fa478dfa01d79a4c7086238.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|configuration_utils.py:545] 2021-07-19 09:59:29,878 >> loading configuration file https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/367bb30bd1ae06268e9d1c64ae1fb923fc9931913fa478dfa01d79a4c7086238.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|configuration_utils.py:581] 2021-07-19 09:59:29,879 >> Model config RobertaConfig {\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"bos_token_id\": 0,\n", + " \"eos_token_id\": 2,\n", + " \"finetuning_task\": \"ner\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"id2label\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8\n", + " },\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"label2id\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8\n", + " },\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50265\n", + "}\n", + "\n", + "[INFO|file_utils.py:1624] 2021-07-19 09:59:30,597 >> https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpa6ym7rla\n", + "Downloading: 100% 292/292 [00:00<00:00, 221kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 09:59:31,316 >> storing https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/tokenizer_config.json in cache at /root/.cache/huggingface/transformers/e577044edb84fa4a576105e2202c62cf62f831634f9581da80435c97b8034fba.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|file_utils.py:1636] 2021-07-19 09:59:31,317 >> creating metadata file for /root/.cache/huggingface/transformers/e577044edb84fa4a576105e2202c62cf62f831634f9581da80435c97b8034fba.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|file_utils.py:1624] 2021-07-19 09:59:32,043 >> https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/vocab.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpqpb1qtjv\n", + "Downloading: 100% 855k/855k [00:00<00:00, 1.60MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 09:59:33,298 >> storing https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/vocab.json in cache at /root/.cache/huggingface/transformers/dc7971c78d10d920138338883fd23b96f3994bce40018345ab1ba2ba8c8f6bdd.a80f232f572026f92499b14999a8ed4e044e04cf3d01b9f2be298c98e78e8498\n", + "[INFO|file_utils.py:1636] 2021-07-19 09:59:33,298 >> creating metadata file for /root/.cache/huggingface/transformers/dc7971c78d10d920138338883fd23b96f3994bce40018345ab1ba2ba8c8f6bdd.a80f232f572026f92499b14999a8ed4e044e04cf3d01b9f2be298c98e78e8498\n", + "[INFO|file_utils.py:1624] 2021-07-19 09:59:34,020 >> https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/merges.txt not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpksv749_o\n", + "Downloading: 100% 514k/514k [00:00<00:00, 986kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 09:59:35,793 >> storing https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/merges.txt in cache at /root/.cache/huggingface/transformers/5f573076405f6fab314615142ba9deec180a84917e495fecbf81f61afb2965cb.a0dfc41f9d0f03a56ba7a5401d770f6e43071045a0bd79073380d408d17a0d92\n", + "[INFO|file_utils.py:1636] 2021-07-19 09:59:35,794 >> creating metadata file for /root/.cache/huggingface/transformers/5f573076405f6fab314615142ba9deec180a84917e495fecbf81f61afb2965cb.a0dfc41f9d0f03a56ba7a5401d770f6e43071045a0bd79073380d408d17a0d92\n", + "[INFO|file_utils.py:1624] 2021-07-19 09:59:36,527 >> https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/tokenizer.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpqnzcwtbi\n", + "Downloading: 100% 1.47M/1.47M [00:00<00:00, 2.09MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 09:59:38,071 >> storing https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/tokenizer.json in cache at /root/.cache/huggingface/transformers/9a541e4855ef267ea4879cc9c2277f67dd5569f68cc688d212822bed1ca8755f.2a5dc806edc00ab3a329cb22b9973596ca75b24ba0e5e4963bf1308de7237a3d\n", + "[INFO|file_utils.py:1636] 2021-07-19 09:59:38,071 >> creating metadata file for /root/.cache/huggingface/transformers/9a541e4855ef267ea4879cc9c2277f67dd5569f68cc688d212822bed1ca8755f.2a5dc806edc00ab3a329cb22b9973596ca75b24ba0e5e4963bf1308de7237a3d\n", + "[INFO|file_utils.py:1624] 2021-07-19 09:59:39,506 >> https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/special_tokens_map.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpzakjsz9m\n", + "Downloading: 100% 239/239 [00:00<00:00, 176kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 09:59:40,756 >> storing https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/special_tokens_map.json in cache at /root/.cache/huggingface/transformers/8e21c2757a0c3938b80989bb3dabd355f9221ed98847fb957a5c4e9a86209c03.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|file_utils.py:1636] 2021-07-19 09:59:40,756 >> creating metadata file for /root/.cache/huggingface/transformers/8e21c2757a0c3938b80989bb3dabd355f9221ed98847fb957a5c4e9a86209c03.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 09:59:41,473 >> loading file https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/dc7971c78d10d920138338883fd23b96f3994bce40018345ab1ba2ba8c8f6bdd.a80f232f572026f92499b14999a8ed4e044e04cf3d01b9f2be298c98e78e8498\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 09:59:41,473 >> loading file https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/5f573076405f6fab314615142ba9deec180a84917e495fecbf81f61afb2965cb.a0dfc41f9d0f03a56ba7a5401d770f6e43071045a0bd79073380d408d17a0d92\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 09:59:41,473 >> loading file https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/9a541e4855ef267ea4879cc9c2277f67dd5569f68cc688d212822bed1ca8755f.2a5dc806edc00ab3a329cb22b9973596ca75b24ba0e5e4963bf1308de7237a3d\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 09:59:41,473 >> loading file https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 09:59:41,473 >> loading file https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/8e21c2757a0c3938b80989bb3dabd355f9221ed98847fb957a5c4e9a86209c03.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 09:59:41,473 >> loading file https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/e577044edb84fa4a576105e2202c62cf62f831634f9581da80435c97b8034fba.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|file_utils.py:1624] 2021-07-19 09:59:42,259 >> https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/pytorch_model.bin not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmplk38ang6\n", + "Downloading: 100% 499M/499M [00:46<00:00, 10.7MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 10:00:30,156 >> storing https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/4832a73c0f4a13adaab71151bf2413717416da487cdb2a79247f10198c6421f8.aebba6b503a22a0c70b362f8b026aa0f030aae594f7580f0164a0a73fb0001af\n", + "[INFO|file_utils.py:1636] 2021-07-19 10:00:30,157 >> creating metadata file for /root/.cache/huggingface/transformers/4832a73c0f4a13adaab71151bf2413717416da487cdb2a79247f10198c6421f8.aebba6b503a22a0c70b362f8b026aa0f030aae594f7580f0164a0a73fb0001af\n", + "[INFO|modeling_utils.py:1271] 2021-07-19 10:00:30,157 >> loading weights file https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/4832a73c0f4a13adaab71151bf2413717416da487cdb2a79247f10198c6421f8.aebba6b503a22a0c70b362f8b026aa0f030aae594f7580f0164a0a73fb0001af\n", + "[WARNING|modeling_utils.py:1502] 2021-07-19 10:00:31,608 >> Some weights of the model checkpoint at bertin-project/bertin-base-stepwise were not used when initializing RobertaForTokenClassification: ['lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias']\n", + "- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:1513] 2021-07-19 10:00:31,608 >> Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at bertin-project/bertin-base-stepwise and are newly initialized: ['classifier.weight', 'classifier.bias']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "Running tokenizer on train dataset: 0% 0/9 [00:00> The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, pos_tags, id, ner_tags.\n", + "[INFO|trainer.py:1162] 2021-07-19 10:00:42,473 >> ***** Running training *****\n", + "[INFO|trainer.py:1163] 2021-07-19 10:00:42,473 >> Num examples = 8324\n", + "[INFO|trainer.py:1164] 2021-07-19 10:00:42,473 >> Num Epochs = 5\n", + "[INFO|trainer.py:1165] 2021-07-19 10:00:42,473 >> Instantaneous batch size per device = 8\n", + "[INFO|trainer.py:1166] 2021-07-19 10:00:42,473 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "[INFO|trainer.py:1167] 2021-07-19 10:00:42,474 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1168] 2021-07-19 10:00:42,474 >> Total optimization steps = 5205\n", + "[INFO|integrations.py:417] 2021-07-19 10:00:42,490 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mversae\u001b[0m (use `wandb login --relogin` to force relogin)\n", + "2021-07-19 10:00:44.095745: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m./outputs\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval/runs/3dtrffsb\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in /content/wandb/run-20210719_100042-3dtrffsb\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run `wandb offline` to turn off syncing.\n", + "\n", + "{'loss': 0.1284, 'learning_rate': 4.5196926032660905e-05, 'epoch': 0.48}\n", + " 10% 500/5205 [02:51<30:10, 2.60it/s][INFO|trainer.py:1917] 2021-07-19 10:03:36,580 >> Saving model checkpoint to ./outputs/checkpoint-500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 10:03:36,582 >> Configuration saved in ./outputs/checkpoint-500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 10:03:38,006 >> Model weights saved in ./outputs/checkpoint-500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 10:03:38,007 >> tokenizer config file saved in ./outputs/checkpoint-500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 10:03:38,007 >> Special tokens file saved in ./outputs/checkpoint-500/special_tokens_map.json\n", + "{'loss': 0.0634, 'learning_rate': 4.039385206532181e-05, 'epoch': 0.96}\n", + " 19% 1000/5205 [05:47<27:17, 2.57it/s][INFO|trainer.py:1917] 2021-07-19 10:06:32,797 >> Saving model checkpoint to ./outputs/checkpoint-1000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 10:06:32,799 >> Configuration saved in ./outputs/checkpoint-1000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 10:06:34,318 >> Model weights saved in ./outputs/checkpoint-1000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 10:06:34,318 >> tokenizer config file saved in ./outputs/checkpoint-1000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 10:06:34,319 >> Special tokens file saved in ./outputs/checkpoint-1000/special_tokens_map.json\n", + " 29% 1500/5205 [08:43<23:57, 2.58it/s]{'loss': 0.0392, 'learning_rate': 3.5590778097982716e-05, 'epoch': 1.44}\n", + "[INFO|trainer.py:1917] 2021-07-19 10:09:28,641 >> Saving model checkpoint to ./outputs/checkpoint-1500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 10:09:28,643 >> Configuration saved in ./outputs/checkpoint-1500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 10:09:30,189 >> Model weights saved in ./outputs/checkpoint-1500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 10:09:30,190 >> tokenizer config file saved in ./outputs/checkpoint-1500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 10:09:30,190 >> Special tokens file saved in ./outputs/checkpoint-1500/special_tokens_map.json\n", + " 38% 2000/5205 [11:39<20:40, 2.58it/s]{'loss': 0.0386, 'learning_rate': 3.078770413064361e-05, 'epoch': 1.92}\n", + "[INFO|trainer.py:1917] 2021-07-19 10:12:24,463 >> Saving model checkpoint to ./outputs/checkpoint-2000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 10:12:24,465 >> Configuration saved in ./outputs/checkpoint-2000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 10:12:25,827 >> Model weights saved in ./outputs/checkpoint-2000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 10:12:25,828 >> tokenizer config file saved in ./outputs/checkpoint-2000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 10:12:25,828 >> Special tokens file saved in ./outputs/checkpoint-2000/special_tokens_map.json\n", + " 48% 2500/5205 [14:35<17:29, 2.58it/s]{'loss': 0.0259, 'learning_rate': 2.5984630163304517e-05, 'epoch': 2.4}\n", + " 48% 2500/5205 [14:35<17:29, 2.58it/s][INFO|trainer.py:1917] 2021-07-19 10:15:20,423 >> Saving model checkpoint to ./outputs/checkpoint-2500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 10:15:20,427 >> Configuration saved in ./outputs/checkpoint-2500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 10:15:21,822 >> Model weights saved in ./outputs/checkpoint-2500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 10:15:21,823 >> tokenizer config file saved in ./outputs/checkpoint-2500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 10:15:21,823 >> Special tokens file saved in ./outputs/checkpoint-2500/special_tokens_map.json\n", + "{'loss': 0.0206, 'learning_rate': 2.118155619596542e-05, 'epoch': 2.88}\n", + " 58% 3000/5205 [17:30<14:06, 2.60it/s][INFO|trainer.py:1917] 2021-07-19 10:18:16,113 >> Saving model checkpoint to ./outputs/checkpoint-3000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 10:18:16,115 >> Configuration saved in ./outputs/checkpoint-3000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 10:18:17,682 >> Model weights saved in ./outputs/checkpoint-3000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 10:18:17,683 >> tokenizer config file saved in ./outputs/checkpoint-3000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 10:18:17,712 >> Special tokens file saved in ./outputs/checkpoint-3000/special_tokens_map.json\n", + " 67% 3500/5205 [20:26<10:58, 2.59it/s]{'loss': 0.0167, 'learning_rate': 1.6378482228626322e-05, 'epoch': 3.36}\n", + "[INFO|trainer.py:1917] 2021-07-19 10:21:11,952 >> Saving model checkpoint to ./outputs/checkpoint-3500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 10:21:11,958 >> Configuration saved in ./outputs/checkpoint-3500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 10:21:13,451 >> Model weights saved in ./outputs/checkpoint-3500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 10:21:13,452 >> tokenizer config file saved in ./outputs/checkpoint-3500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 10:21:13,452 >> Special tokens file saved in ./outputs/checkpoint-3500/special_tokens_map.json\n", + "{'loss': 0.0122, 'learning_rate': 1.1575408261287224e-05, 'epoch': 3.84}\n", + " 77% 4000/5205 [23:23<07:46, 2.58it/s][INFO|trainer.py:1917] 2021-07-19 10:24:08,832 >> Saving model checkpoint to ./outputs/checkpoint-4000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 10:24:08,833 >> Configuration saved in ./outputs/checkpoint-4000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 10:24:10,424 >> Model weights saved in ./outputs/checkpoint-4000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 10:24:10,425 >> tokenizer config file saved in ./outputs/checkpoint-4000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 10:24:10,425 >> Special tokens file saved in ./outputs/checkpoint-4000/special_tokens_map.json\n", + " 86% 4500/5205 [26:20<04:34, 2.57it/s]{'loss': 0.0102, 'learning_rate': 6.7723342939481265e-06, 'epoch': 4.32}\n", + "[INFO|trainer.py:1917] 2021-07-19 10:27:05,632 >> Saving model checkpoint to ./outputs/checkpoint-4500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 10:27:05,633 >> Configuration saved in ./outputs/checkpoint-4500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 10:27:07,331 >> Model weights saved in ./outputs/checkpoint-4500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 10:27:07,333 >> tokenizer config file saved in ./outputs/checkpoint-4500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 10:27:07,333 >> Special tokens file saved in ./outputs/checkpoint-4500/special_tokens_map.json\n", + "{'loss': 0.0082, 'learning_rate': 1.96926032660903e-06, 'epoch': 4.8}\n", + " 96% 5000/5205 [29:17<01:19, 2.57it/s][INFO|trainer.py:1917] 2021-07-19 10:30:02,859 >> Saving model checkpoint to ./outputs/checkpoint-5000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 10:30:02,860 >> Configuration saved in ./outputs/checkpoint-5000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 10:30:04,283 >> Model weights saved in ./outputs/checkpoint-5000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 10:30:04,284 >> tokenizer config file saved in ./outputs/checkpoint-5000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 10:30:04,285 >> Special tokens file saved in ./outputs/checkpoint-5000/special_tokens_map.json\n", + "100% 5205/5205 [30:33<00:00, 3.23it/s][INFO|trainer.py:1358] 2021-07-19 10:31:18,770 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "{'train_runtime': 1836.2971, 'train_samples_per_second': 22.665, 'train_steps_per_second': 2.835, 'train_loss': 0.03514896872865821, 'epoch': 5.0}\n", + "100% 5205/5205 [30:33<00:00, 2.84it/s]\n", + "[INFO|trainer.py:1917] 2021-07-19 10:31:18,782 >> Saving model checkpoint to ./outputs\n", + "[INFO|configuration_utils.py:379] 2021-07-19 10:31:18,784 >> Configuration saved in ./outputs/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 10:31:20,398 >> Model weights saved in ./outputs/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 10:31:20,399 >> tokenizer config file saved in ./outputs/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 10:31:20,399 >> Special tokens file saved in ./outputs/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 5.0\n", + " train_loss = 0.0351\n", + " train_runtime = 0:30:36.29\n", + " train_samples = 8324\n", + " train_samples_per_second = 22.665\n", + " train_steps_per_second = 2.835\n", + "[INFO|trainer.py:522] 2021-07-19 10:31:20,531 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, pos_tags, id, ner_tags.\n", + "07/19/2021 10:31:20 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:2163] 2021-07-19 10:31:20,554 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2165] 2021-07-19 10:31:20,554 >> Num examples = 1916\n", + "[INFO|trainer.py:2168] 2021-07-19 10:31:20,555 >> Batch size = 8\n", + "100% 240/240 [00:26<00:00, 9.03it/s]\n", + "***** eval metrics *****\n", + " epoch = 5.0\n", + " eval_accuracy = 0.9809\n", + " eval_f1 = 0.8705\n", + " eval_loss = 0.1126\n", + " eval_precision = 0.8643\n", + " eval_recall = 0.8768\n", + " eval_runtime = 0:00:26.68\n", + " eval_samples = 1916\n", + " eval_samples_per_second = 71.804\n", + " eval_steps_per_second = 8.994\n", + "\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish, PID 463\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Program ended successfully.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find user logs for this run at: /content/wandb/run-20210719_100042-3dtrffsb/logs/debug.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find internal logs for this run at: /content/wandb/run-20210719_100042-3dtrffsb/logs/debug-internal.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.0082\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 5205\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime 1865\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp 1626690707\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step 11\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime 1836.2971\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second 22.665\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second 2.835\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos 1.087585926764544e+16\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss 0.03515\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 0.1126\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision 0.86433\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall 0.87684\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 0.87054\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy 0.98093\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 26.6838\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 71.804\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 8.994\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▄▃▃▂▂▁▁▁▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate █▇▆▆▅▄▃▃▂▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step ▁▂▂▃▄▄▅▅▆▇▇█\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced \u001b[33m./outputs\u001b[0m: \u001b[34mhttps://wandb.ai/versae/bertin-eval/runs/3dtrffsb\u001b[0m\n", + "2021-07-19 10:31:59.813609: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "07/19/2021 10:32:02 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "07/19/2021 10:32:02 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_find_unused_parameters=None,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=False,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_steps=500,\n", + "evaluation_strategy=IntervalStrategy.NO,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "gradient_accumulation_steps=1,\n", + "greater_is_better=None,\n", + "group_by_length=False,\n", + "ignore_data_skip=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=5e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=False,\n", + "local_rank=-1,\n", + "log_level=-1,\n", + "log_level_replica=-1,\n", + "log_on_each_node=True,\n", + "logging_dir=./outputs/runs/Jul19_10-32-02_eab5184de9ff,\n", + "logging_first_step=False,\n", + "logging_steps=500,\n", + "logging_strategy=IntervalStrategy.STEPS,\n", + "lr_scheduler_type=SchedulerType.LINEAR,\n", + "max_grad_norm=1.0,\n", + "max_steps=-1,\n", + "metric_for_best_model=None,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=5.0,\n", + "output_dir=./outputs,\n", + "overwrite_output_dir=True,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=outputs,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=None,\n", + "remove_unused_columns=True,\n", + "report_to=['tensorboard', 'wandb'],\n", + "resume_from_checkpoint=None,\n", + "run_name=./outputs,\n", + "save_on_each_node=False,\n", + "save_steps=500,\n", + "save_strategy=IntervalStrategy.STEPS,\n", + "save_total_limit=None,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_legacy_prediction_loop=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + ")\n", + "07/19/2021 10:32:04 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 10:32:04 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 10:32:04 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 10:32:04 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 10:32:04 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 10:32:05 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 10:32:05 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 10:32:05 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 10:32:05 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 10:32:05 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 10:32:05 - INFO - datasets.info - Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 10:32:05 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "07/19/2021 10:32:05 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 10:32:05 - WARNING - datasets.builder - Reusing dataset conll2002 (/root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5)\n", + "07/19/2021 10:32:05 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "100% 3/3 [00:00<00:00, 26.98it/s]\n", + "[INFO|file_utils.py:1624] 2021-07-19 10:32:06,091 >> https://huggingface.co/bertin-project/bertin-base-random/resolve/main/config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpn8ndvk_m\n", + "Downloading: 100% 618/618 [00:00<00:00, 441kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 10:32:07,076 >> storing https://huggingface.co/bertin-project/bertin-base-random/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/206b1dac57f81203b68e667a852122cb8107d8f6ec61c5f61ff3911b995464bc.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|file_utils.py:1636] 2021-07-19 10:32:07,076 >> creating metadata file for /root/.cache/huggingface/transformers/206b1dac57f81203b68e667a852122cb8107d8f6ec61c5f61ff3911b995464bc.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|configuration_utils.py:545] 2021-07-19 10:32:07,077 >> loading configuration file https://huggingface.co/bertin-project/bertin-base-random/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/206b1dac57f81203b68e667a852122cb8107d8f6ec61c5f61ff3911b995464bc.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|configuration_utils.py:581] 2021-07-19 10:32:07,078 >> Model config RobertaConfig {\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"bos_token_id\": 0,\n", + " \"eos_token_id\": 2,\n", + " \"finetuning_task\": \"ner\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"id2label\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8\n", + " },\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"label2id\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8\n", + " },\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50265\n", + "}\n", + "\n", + "[INFO|file_utils.py:1624] 2021-07-19 10:32:07,798 >> https://huggingface.co/bertin-project/bertin-base-random/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpz_j0rpzc\n", + "Downloading: 100% 292/292 [00:00<00:00, 217kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 10:32:08,519 >> storing https://huggingface.co/bertin-project/bertin-base-random/resolve/main/tokenizer_config.json in cache at /root/.cache/huggingface/transformers/115b6bf4f08e65d03dda82a834c42cf46339813770aadf59679bd51d0f2ea3a5.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|file_utils.py:1636] 2021-07-19 10:32:08,519 >> creating metadata file for /root/.cache/huggingface/transformers/115b6bf4f08e65d03dda82a834c42cf46339813770aadf59679bd51d0f2ea3a5.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|file_utils.py:1624] 2021-07-19 10:32:09,243 >> https://huggingface.co/bertin-project/bertin-base-random/resolve/main/vocab.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpkr_3etqa\n", + "Downloading: 100% 855k/855k [00:00<00:00, 1.62MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 10:32:10,524 >> storing https://huggingface.co/bertin-project/bertin-base-random/resolve/main/vocab.json in cache at /root/.cache/huggingface/transformers/b4006ecc1f0c7264bbecc09c6cb89b65ec7db509c3c0604de97e88f24ea4d1f6.a80f232f572026f92499b14999a8ed4e044e04cf3d01b9f2be298c98e78e8498\n", + "[INFO|file_utils.py:1636] 2021-07-19 10:32:10,525 >> creating metadata file for /root/.cache/huggingface/transformers/b4006ecc1f0c7264bbecc09c6cb89b65ec7db509c3c0604de97e88f24ea4d1f6.a80f232f572026f92499b14999a8ed4e044e04cf3d01b9f2be298c98e78e8498\n", + "[INFO|file_utils.py:1624] 2021-07-19 10:32:11,246 >> https://huggingface.co/bertin-project/bertin-base-random/resolve/main/merges.txt not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpzyyos06z\n", + "Downloading: 100% 514k/514k [00:00<00:00, 980kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 10:32:12,765 >> storing https://huggingface.co/bertin-project/bertin-base-random/resolve/main/merges.txt in cache at /root/.cache/huggingface/transformers/edf6b87725489a8b69bfa2267d17e097798faa72e64ff4c5cc3d18672be1064c.a0dfc41f9d0f03a56ba7a5401d770f6e43071045a0bd79073380d408d17a0d92\n", + "[INFO|file_utils.py:1636] 2021-07-19 10:32:12,765 >> creating metadata file for /root/.cache/huggingface/transformers/edf6b87725489a8b69bfa2267d17e097798faa72e64ff4c5cc3d18672be1064c.a0dfc41f9d0f03a56ba7a5401d770f6e43071045a0bd79073380d408d17a0d92\n", + "[INFO|file_utils.py:1624] 2021-07-19 10:32:13,495 >> https://huggingface.co/bertin-project/bertin-base-random/resolve/main/tokenizer.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpgdijuhio\n", + "Downloading: 100% 1.47M/1.47M [00:00<00:00, 2.09MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 10:32:14,927 >> storing https://huggingface.co/bertin-project/bertin-base-random/resolve/main/tokenizer.json in cache at /root/.cache/huggingface/transformers/a2564e0d9105fa93a9e315cbeba7570df09180efa1d54facf9770aa67f46bfe6.2a5dc806edc00ab3a329cb22b9973596ca75b24ba0e5e4963bf1308de7237a3d\n", + "[INFO|file_utils.py:1636] 2021-07-19 10:32:14,927 >> creating metadata file for /root/.cache/huggingface/transformers/a2564e0d9105fa93a9e315cbeba7570df09180efa1d54facf9770aa67f46bfe6.2a5dc806edc00ab3a329cb22b9973596ca75b24ba0e5e4963bf1308de7237a3d\n", + "[INFO|file_utils.py:1624] 2021-07-19 10:32:16,366 >> https://huggingface.co/bertin-project/bertin-base-random/resolve/main/special_tokens_map.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp0wf9iu70\n", + "Downloading: 100% 239/239 [00:00<00:00, 188kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 10:32:17,102 >> storing https://huggingface.co/bertin-project/bertin-base-random/resolve/main/special_tokens_map.json in cache at /root/.cache/huggingface/transformers/92bfdce86055cda7484e6cda39edf00079de963108ce7a53ac914029100d8a99.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|file_utils.py:1636] 2021-07-19 10:32:17,102 >> creating metadata file for /root/.cache/huggingface/transformers/92bfdce86055cda7484e6cda39edf00079de963108ce7a53ac914029100d8a99.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 10:32:17,820 >> loading file https://huggingface.co/bertin-project/bertin-base-random/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/b4006ecc1f0c7264bbecc09c6cb89b65ec7db509c3c0604de97e88f24ea4d1f6.a80f232f572026f92499b14999a8ed4e044e04cf3d01b9f2be298c98e78e8498\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 10:32:17,820 >> loading file https://huggingface.co/bertin-project/bertin-base-random/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/edf6b87725489a8b69bfa2267d17e097798faa72e64ff4c5cc3d18672be1064c.a0dfc41f9d0f03a56ba7a5401d770f6e43071045a0bd79073380d408d17a0d92\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 10:32:17,820 >> loading file https://huggingface.co/bertin-project/bertin-base-random/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/a2564e0d9105fa93a9e315cbeba7570df09180efa1d54facf9770aa67f46bfe6.2a5dc806edc00ab3a329cb22b9973596ca75b24ba0e5e4963bf1308de7237a3d\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 10:32:17,820 >> loading file https://huggingface.co/bertin-project/bertin-base-random/resolve/main/added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 10:32:17,820 >> loading file https://huggingface.co/bertin-project/bertin-base-random/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/92bfdce86055cda7484e6cda39edf00079de963108ce7a53ac914029100d8a99.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 10:32:17,820 >> loading file https://huggingface.co/bertin-project/bertin-base-random/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/115b6bf4f08e65d03dda82a834c42cf46339813770aadf59679bd51d0f2ea3a5.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|file_utils.py:1624] 2021-07-19 10:32:18,877 >> https://huggingface.co/bertin-project/bertin-base-random/resolve/main/pytorch_model.bin not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpnd8xa6xo\n", + "Downloading: 100% 499M/499M [00:47<00:00, 10.5MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 10:33:07,769 >> storing https://huggingface.co/bertin-project/bertin-base-random/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/5bec117e97fb2ec50c8615b1e2cde784a5851d7664417aa49e21403f91722df2.fc71d0bf8be2b57b708f6397729f6f12e8e95a3e69e8a44030293be69ee5bc0d\n", + "[INFO|file_utils.py:1636] 2021-07-19 10:33:07,769 >> creating metadata file for /root/.cache/huggingface/transformers/5bec117e97fb2ec50c8615b1e2cde784a5851d7664417aa49e21403f91722df2.fc71d0bf8be2b57b708f6397729f6f12e8e95a3e69e8a44030293be69ee5bc0d\n", + "[INFO|modeling_utils.py:1271] 2021-07-19 10:33:07,770 >> loading weights file https://huggingface.co/bertin-project/bertin-base-random/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/5bec117e97fb2ec50c8615b1e2cde784a5851d7664417aa49e21403f91722df2.fc71d0bf8be2b57b708f6397729f6f12e8e95a3e69e8a44030293be69ee5bc0d\n", + "[WARNING|modeling_utils.py:1502] 2021-07-19 10:33:09,270 >> Some weights of the model checkpoint at bertin-project/bertin-base-random were not used when initializing RobertaForTokenClassification: ['lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias']\n", + "- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:1513] 2021-07-19 10:33:09,270 >> Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at bertin-project/bertin-base-random and are newly initialized: ['classifier.bias', 'classifier.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "Running tokenizer on train dataset: 0% 0/9 [00:00> The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, pos_tags, id, tokens.\n", + "[INFO|trainer.py:1162] 2021-07-19 10:33:20,589 >> ***** Running training *****\n", + "[INFO|trainer.py:1163] 2021-07-19 10:33:20,589 >> Num examples = 8324\n", + "[INFO|trainer.py:1164] 2021-07-19 10:33:20,589 >> Num Epochs = 5\n", + "[INFO|trainer.py:1165] 2021-07-19 10:33:20,589 >> Instantaneous batch size per device = 8\n", + "[INFO|trainer.py:1166] 2021-07-19 10:33:20,589 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "[INFO|trainer.py:1167] 2021-07-19 10:33:20,589 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1168] 2021-07-19 10:33:20,589 >> Total optimization steps = 5205\n", + "[INFO|integrations.py:417] 2021-07-19 10:33:20,607 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mversae\u001b[0m (use `wandb login --relogin` to force relogin)\n", + "2021-07-19 10:33:22.258421: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m./outputs\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval/runs/ibzlw8va\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in /content/wandb/run-20210719_103320-ibzlw8va\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run `wandb offline` to turn off syncing.\n", + "\n", + "{'loss': 0.1287, 'learning_rate': 4.5196926032660905e-05, 'epoch': 0.48}\n", + " 10% 500/5205 [02:53<31:17, 2.51it/s][INFO|trainer.py:1917] 2021-07-19 10:36:16,688 >> Saving model checkpoint to ./outputs/checkpoint-500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 10:36:16,690 >> Configuration saved in ./outputs/checkpoint-500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 10:36:18,026 >> Model weights saved in ./outputs/checkpoint-500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 10:36:18,027 >> tokenizer config file saved in ./outputs/checkpoint-500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 10:36:18,027 >> Special tokens file saved in ./outputs/checkpoint-500/special_tokens_map.json\n", + "{'loss': 0.0639, 'learning_rate': 4.039385206532181e-05, 'epoch': 0.96}\n", + " 19% 1000/5205 [05:51<27:24, 2.56it/s][INFO|trainer.py:1917] 2021-07-19 10:39:15,501 >> Saving model checkpoint to ./outputs/checkpoint-1000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 10:39:15,503 >> Configuration saved in ./outputs/checkpoint-1000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 10:39:17,123 >> Model weights saved in ./outputs/checkpoint-1000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 10:39:17,124 >> tokenizer config file saved in ./outputs/checkpoint-1000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 10:39:17,124 >> Special tokens file saved in ./outputs/checkpoint-1000/special_tokens_map.json\n", + " 29% 1500/5205 [08:50<24:18, 2.54it/s][INFO|trainer.py:1917] 2021-07-19 10:42:13,678 >> Saving model checkpoint to ./outputs/checkpoint-1500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 10:42:13,679 >> Configuration saved in ./outputs/checkpoint-1500/config.json\n", + "{'loss': 0.0415, 'learning_rate': 3.5590778097982716e-05, 'epoch': 1.44}\n", + "[INFO|modeling_utils.py:997] 2021-07-19 10:42:15,229 >> Model weights saved in ./outputs/checkpoint-1500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 10:42:15,229 >> tokenizer config file saved in ./outputs/checkpoint-1500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 10:42:15,230 >> Special tokens file saved in ./outputs/checkpoint-1500/special_tokens_map.json\n", + " 38% 2000/5205 [11:48<20:52, 2.56it/s]{'loss': 0.0391, 'learning_rate': 3.078770413064361e-05, 'epoch': 1.92}\n", + "[INFO|trainer.py:1917] 2021-07-19 10:45:12,082 >> Saving model checkpoint to ./outputs/checkpoint-2000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 10:45:12,083 >> Configuration saved in ./outputs/checkpoint-2000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 10:45:13,570 >> Model weights saved in ./outputs/checkpoint-2000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 10:45:13,571 >> tokenizer config file saved in ./outputs/checkpoint-2000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 10:45:13,572 >> Special tokens file saved in ./outputs/checkpoint-2000/special_tokens_map.json\n", + "{'loss': 0.0264, 'learning_rate': 2.5984630163304517e-05, 'epoch': 2.4}\n", + " 48% 2500/5205 [14:46<17:34, 2.56it/s][INFO|trainer.py:1917] 2021-07-19 10:48:10,298 >> Saving model checkpoint to ./outputs/checkpoint-2500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 10:48:10,299 >> Configuration saved in ./outputs/checkpoint-2500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 10:48:11,882 >> Model weights saved in ./outputs/checkpoint-2500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 10:48:11,882 >> tokenizer config file saved in ./outputs/checkpoint-2500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 10:48:11,883 >> Special tokens file saved in ./outputs/checkpoint-2500/special_tokens_map.json\n", + "{'loss': 0.0231, 'learning_rate': 2.118155619596542e-05, 'epoch': 2.88}\n", + " 58% 3000/5205 [17:44<14:23, 2.55it/s][INFO|trainer.py:1917] 2021-07-19 10:51:08,324 >> Saving model checkpoint to ./outputs/checkpoint-3000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 10:51:08,325 >> Configuration saved in ./outputs/checkpoint-3000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 10:51:09,869 >> Model weights saved in ./outputs/checkpoint-3000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 10:51:09,870 >> tokenizer config file saved in ./outputs/checkpoint-3000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 10:51:09,870 >> Special tokens file saved in ./outputs/checkpoint-3000/special_tokens_map.json\n", + " 67% 3500/5205 [20:41<11:04, 2.56it/s]{'loss': 0.0165, 'learning_rate': 1.6378482228626322e-05, 'epoch': 3.36}\n", + " 67% 3500/5205 [20:41<11:04, 2.56it/s][INFO|trainer.py:1917] 2021-07-19 10:54:05,467 >> Saving model checkpoint to ./outputs/checkpoint-3500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 10:54:05,469 >> Configuration saved in ./outputs/checkpoint-3500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 10:54:07,021 >> Model weights saved in ./outputs/checkpoint-3500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 10:54:07,058 >> tokenizer config file saved in ./outputs/checkpoint-3500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 10:54:07,059 >> Special tokens file saved in ./outputs/checkpoint-3500/special_tokens_map.json\n", + " 77% 4000/5205 [23:39<07:49, 2.57it/s]{'loss': 0.0134, 'learning_rate': 1.1575408261287224e-05, 'epoch': 3.84}\n", + " 77% 4000/5205 [23:39<07:49, 2.57it/s][INFO|trainer.py:1917] 2021-07-19 10:57:03,171 >> Saving model checkpoint to ./outputs/checkpoint-4000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 10:57:03,175 >> Configuration saved in ./outputs/checkpoint-4000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 10:57:04,716 >> Model weights saved in ./outputs/checkpoint-4000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 10:57:04,716 >> tokenizer config file saved in ./outputs/checkpoint-4000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 10:57:04,717 >> Special tokens file saved in ./outputs/checkpoint-4000/special_tokens_map.json\n", + "{'loss': 0.0097, 'learning_rate': 6.7723342939481265e-06, 'epoch': 4.32}\n", + " 86% 4500/5205 [26:36<04:36, 2.55it/s][INFO|trainer.py:1917] 2021-07-19 11:00:00,353 >> Saving model checkpoint to ./outputs/checkpoint-4500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 11:00:00,354 >> Configuration saved in ./outputs/checkpoint-4500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 11:00:01,980 >> Model weights saved in ./outputs/checkpoint-4500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 11:00:01,981 >> tokenizer config file saved in ./outputs/checkpoint-4500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 11:00:01,982 >> Special tokens file saved in ./outputs/checkpoint-4500/special_tokens_map.json\n", + " 96% 5000/5205 [29:34<01:19, 2.57it/s]{'loss': 0.0085, 'learning_rate': 1.96926032660903e-06, 'epoch': 4.8}\n", + " 96% 5000/5205 [29:34<01:19, 2.57it/s][INFO|trainer.py:1917] 2021-07-19 11:02:57,860 >> Saving model checkpoint to ./outputs/checkpoint-5000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 11:02:57,865 >> Configuration saved in ./outputs/checkpoint-5000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 11:02:59,390 >> Model weights saved in ./outputs/checkpoint-5000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 11:02:59,391 >> tokenizer config file saved in ./outputs/checkpoint-5000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 11:02:59,392 >> Special tokens file saved in ./outputs/checkpoint-5000/special_tokens_map.json\n", + "100% 5205/5205 [30:49<00:00, 3.28it/s][INFO|trainer.py:1358] 2021-07-19 11:04:13,445 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "{'train_runtime': 1852.8561, 'train_samples_per_second': 22.463, 'train_steps_per_second': 2.809, 'train_loss': 0.03592821839448927, 'epoch': 5.0}\n", + "100% 5205/5205 [30:49<00:00, 2.81it/s]\n", + "[INFO|trainer.py:1917] 2021-07-19 11:04:13,449 >> Saving model checkpoint to ./outputs\n", + "[INFO|configuration_utils.py:379] 2021-07-19 11:04:13,451 >> Configuration saved in ./outputs/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 11:04:14,892 >> Model weights saved in ./outputs/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 11:04:14,893 >> tokenizer config file saved in ./outputs/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 11:04:14,893 >> Special tokens file saved in ./outputs/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 5.0\n", + " train_loss = 0.0359\n", + " train_runtime = 0:30:52.85\n", + " train_samples = 8324\n", + " train_samples_per_second = 22.463\n", + " train_steps_per_second = 2.809\n", + "[INFO|trainer.py:522] 2021-07-19 11:04:15,047 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, pos_tags, id, tokens.\n", + "07/19/2021 11:04:15 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:2163] 2021-07-19 11:04:15,145 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2165] 2021-07-19 11:04:15,147 >> Num examples = 1916\n", + "[INFO|trainer.py:2168] 2021-07-19 11:04:15,147 >> Batch size = 8\n", + "100% 240/240 [00:26<00:00, 9.01it/s]\n", + "***** eval metrics *****\n", + " epoch = 5.0\n", + " eval_accuracy = 0.9807\n", + " eval_f1 = 0.8704\n", + " eval_loss = 0.1116\n", + " eval_precision = 0.8652\n", + " eval_recall = 0.8757\n", + " eval_runtime = 0:00:26.78\n", + " eval_samples = 1916\n", + " eval_samples_per_second = 71.544\n", + " eval_steps_per_second = 8.962\n", + "\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish, PID 534\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Program ended successfully.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find user logs for this run at: /content/wandb/run-20210719_103320-ibzlw8va/logs/debug.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find internal logs for this run at: /content/wandb/run-20210719_103320-ibzlw8va/logs/debug-internal.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.0085\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 5205\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime 1881\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp 1626692681\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step 11\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime 1852.8561\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second 22.463\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second 2.809\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos 1.087585926764544e+16\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss 0.03593\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 0.11165\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision 0.86515\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall 0.87569\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 0.87039\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy 0.98067\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 26.7809\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 71.544\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 8.962\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▄▃▃▂▂▁▁▁▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate █▇▆▆▅▄▃▃▂▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step ▁▂▂▃▄▄▅▅▆▇▇█\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced \u001b[33m./outputs\u001b[0m: \u001b[34mhttps://wandb.ai/versae/bertin-eval/runs/ibzlw8va\u001b[0m\n", + "2021-07-19 11:04:55.088954: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "07/19/2021 11:04:57 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "07/19/2021 11:04:57 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_find_unused_parameters=None,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=False,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_steps=500,\n", + "evaluation_strategy=IntervalStrategy.NO,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "gradient_accumulation_steps=1,\n", + "greater_is_better=None,\n", + "group_by_length=False,\n", + "ignore_data_skip=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=5e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=False,\n", + "local_rank=-1,\n", + "log_level=-1,\n", + "log_level_replica=-1,\n", + "log_on_each_node=True,\n", + "logging_dir=./outputs/runs/Jul19_11-04-57_eab5184de9ff,\n", + "logging_first_step=False,\n", + "logging_steps=500,\n", + "logging_strategy=IntervalStrategy.STEPS,\n", + "lr_scheduler_type=SchedulerType.LINEAR,\n", + "max_grad_norm=1.0,\n", + "max_steps=-1,\n", + "metric_for_best_model=None,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=5.0,\n", + "output_dir=./outputs,\n", + "overwrite_output_dir=True,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=outputs,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=None,\n", + "remove_unused_columns=True,\n", + "report_to=['tensorboard', 'wandb'],\n", + "resume_from_checkpoint=None,\n", + "run_name=./outputs,\n", + "save_on_each_node=False,\n", + "save_steps=500,\n", + "save_strategy=IntervalStrategy.STEPS,\n", + "save_total_limit=None,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_legacy_prediction_loop=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + ")\n", + "07/19/2021 11:04:59 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 11:04:59 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 11:04:59 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 11:04:59 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 11:04:59 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 11:05:00 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 11:05:00 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 11:05:00 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 11:05:00 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 11:05:00 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 11:05:00 - INFO - datasets.info - Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 11:05:00 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "07/19/2021 11:05:00 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 11:05:00 - WARNING - datasets.builder - Reusing dataset conll2002 (/root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5)\n", + "07/19/2021 11:05:00 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "100% 3/3 [00:00<00:00, 26.69it/s]\n", + "[INFO|file_utils.py:1624] 2021-07-19 11:05:01,612 >> https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpx_65nogc\n", + "Downloading: 100% 618/618 [00:00<00:00, 500kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 11:05:02,332 >> storing https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/aa24361f0b7bed62876f6cd0a784a2b622c1959523906d89eeb1112139a4864a.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|file_utils.py:1636] 2021-07-19 11:05:02,332 >> creating metadata file for /root/.cache/huggingface/transformers/aa24361f0b7bed62876f6cd0a784a2b622c1959523906d89eeb1112139a4864a.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|configuration_utils.py:545] 2021-07-19 11:05:02,332 >> loading configuration file https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/aa24361f0b7bed62876f6cd0a784a2b622c1959523906d89eeb1112139a4864a.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|configuration_utils.py:581] 2021-07-19 11:05:02,333 >> Model config RobertaConfig {\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"bos_token_id\": 0,\n", + " \"eos_token_id\": 2,\n", + " \"finetuning_task\": \"ner\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"id2label\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8\n", + " },\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"label2id\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8\n", + " },\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50265\n", + "}\n", + "\n", + "[INFO|file_utils.py:1624] 2021-07-19 11:05:03,052 >> https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp9x2e0_gj\n", + "Downloading: 100% 292/292 [00:00<00:00, 214kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 11:05:03,773 >> storing https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/tokenizer_config.json in cache at /root/.cache/huggingface/transformers/72bf61c243630a112d8fa8c8d9162f1a5e01fab0602d2f2a7792cecdc0a4986f.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|file_utils.py:1636] 2021-07-19 11:05:03,773 >> creating metadata file for /root/.cache/huggingface/transformers/72bf61c243630a112d8fa8c8d9162f1a5e01fab0602d2f2a7792cecdc0a4986f.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|file_utils.py:1624] 2021-07-19 11:05:04,499 >> https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/vocab.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpzo0fedli\n", + "Downloading: 100% 846k/846k [00:00<00:00, 1.48MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 11:05:05,794 >> storing https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/vocab.json in cache at /root/.cache/huggingface/transformers/f47efb87887425ef9a4ef795bfaa907d57ac9a650d733c7ca621b9eced3235e8.af6be2925520943ae92cd64250bdaa83a8c9d6f91422efbac7ed33a20e0b6e1b\n", + "[INFO|file_utils.py:1636] 2021-07-19 11:05:05,795 >> creating metadata file for /root/.cache/huggingface/transformers/f47efb87887425ef9a4ef795bfaa907d57ac9a650d733c7ca621b9eced3235e8.af6be2925520943ae92cd64250bdaa83a8c9d6f91422efbac7ed33a20e0b6e1b\n", + "[INFO|file_utils.py:1624] 2021-07-19 11:05:06,786 >> https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/merges.txt not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpip9hh8gg\n", + "Downloading: 100% 505k/505k [00:00<00:00, 1.42MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 11:05:07,862 >> storing https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/merges.txt in cache at /root/.cache/huggingface/transformers/aba9e0895dea47dd4208a36012ffd3eb21eb4c5f7ce0be6547afb37cdd4ddef4.360a60d1c13cbafd862a8e2a85b267c2b3eb0226d5469633a889b33ba0dde234\n", + "[INFO|file_utils.py:1636] 2021-07-19 11:05:07,862 >> creating metadata file for /root/.cache/huggingface/transformers/aba9e0895dea47dd4208a36012ffd3eb21eb4c5f7ce0be6547afb37cdd4ddef4.360a60d1c13cbafd862a8e2a85b267c2b3eb0226d5469633a889b33ba0dde234\n", + "[INFO|file_utils.py:1624] 2021-07-19 11:05:08,592 >> https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/tokenizer.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp9zwks1tj\n", + "Downloading: 100% 1.45M/1.45M [00:00<00:00, 2.06MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 11:05:10,024 >> storing https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/tokenizer.json in cache at /root/.cache/huggingface/transformers/baad57d0f574d3e660cafb14601d0ecebe83f25071d59f3e51d225d75285b773.0803a0fadc521f273c2682a34dc285f90fe9de9e932bbe77401d11672f7fcb60\n", + "[INFO|file_utils.py:1636] 2021-07-19 11:05:10,024 >> creating metadata file for /root/.cache/huggingface/transformers/baad57d0f574d3e660cafb14601d0ecebe83f25071d59f3e51d225d75285b773.0803a0fadc521f273c2682a34dc285f90fe9de9e932bbe77401d11672f7fcb60\n", + "[INFO|file_utils.py:1624] 2021-07-19 11:05:11,551 >> https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/special_tokens_map.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpx8dpmj_p\n", + "Downloading: 100% 239/239 [00:00<00:00, 177kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 11:05:12,541 >> storing https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/special_tokens_map.json in cache at /root/.cache/huggingface/transformers/68d1fdfe72fdcac403d8f363239c379d8125162f50a954030c4476982f88d69e.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|file_utils.py:1636] 2021-07-19 11:05:12,541 >> creating metadata file for /root/.cache/huggingface/transformers/68d1fdfe72fdcac403d8f363239c379d8125162f50a954030c4476982f88d69e.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 11:05:13,258 >> loading file https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/f47efb87887425ef9a4ef795bfaa907d57ac9a650d733c7ca621b9eced3235e8.af6be2925520943ae92cd64250bdaa83a8c9d6f91422efbac7ed33a20e0b6e1b\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 11:05:13,258 >> loading file https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/aba9e0895dea47dd4208a36012ffd3eb21eb4c5f7ce0be6547afb37cdd4ddef4.360a60d1c13cbafd862a8e2a85b267c2b3eb0226d5469633a889b33ba0dde234\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 11:05:13,258 >> loading file https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/baad57d0f574d3e660cafb14601d0ecebe83f25071d59f3e51d225d75285b773.0803a0fadc521f273c2682a34dc285f90fe9de9e932bbe77401d11672f7fcb60\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 11:05:13,259 >> loading file https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 11:05:13,259 >> loading file https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/68d1fdfe72fdcac403d8f363239c379d8125162f50a954030c4476982f88d69e.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 11:05:13,259 >> loading file https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/72bf61c243630a112d8fa8c8d9162f1a5e01fab0602d2f2a7792cecdc0a4986f.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|file_utils.py:1624] 2021-07-19 11:05:14,052 >> https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/pytorch_model.bin not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp8b5ozkc8\n", + "Downloading: 100% 499M/499M [00:47<00:00, 10.5MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 11:06:03,353 >> storing https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/8a611c8e6ab409ea523e84173bef4b1ef257262487d732b05c68d31b674788e5.ae8a25c127f59d8b0d9d11c53667336d7c491e5b06338b3ce56369ee735acd6f\n", + "[INFO|file_utils.py:1636] 2021-07-19 11:06:03,353 >> creating metadata file for /root/.cache/huggingface/transformers/8a611c8e6ab409ea523e84173bef4b1ef257262487d732b05c68d31b674788e5.ae8a25c127f59d8b0d9d11c53667336d7c491e5b06338b3ce56369ee735acd6f\n", + "[INFO|modeling_utils.py:1271] 2021-07-19 11:06:03,353 >> loading weights file https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/8a611c8e6ab409ea523e84173bef4b1ef257262487d732b05c68d31b674788e5.ae8a25c127f59d8b0d9d11c53667336d7c491e5b06338b3ce56369ee735acd6f\n", + "[WARNING|modeling_utils.py:1502] 2021-07-19 11:06:04,877 >> Some weights of the model checkpoint at bertin-project/bertin-roberta-base-spanish were not used when initializing RobertaForTokenClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight']\n", + "- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:1513] 2021-07-19 11:06:04,877 >> Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at bertin-project/bertin-roberta-base-spanish and are newly initialized: ['classifier.weight', 'classifier.bias']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "Running tokenizer on train dataset: 0% 0/9 [00:00> The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: id, pos_tags, tokens, ner_tags.\n", + "[INFO|trainer.py:1162] 2021-07-19 11:06:16,543 >> ***** Running training *****\n", + "[INFO|trainer.py:1163] 2021-07-19 11:06:16,543 >> Num examples = 8324\n", + "[INFO|trainer.py:1164] 2021-07-19 11:06:16,543 >> Num Epochs = 5\n", + "[INFO|trainer.py:1165] 2021-07-19 11:06:16,543 >> Instantaneous batch size per device = 8\n", + "[INFO|trainer.py:1166] 2021-07-19 11:06:16,543 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "[INFO|trainer.py:1167] 2021-07-19 11:06:16,543 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1168] 2021-07-19 11:06:16,543 >> Total optimization steps = 5205\n", + "[INFO|integrations.py:417] 2021-07-19 11:06:16,562 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mversae\u001b[0m (use `wandb login --relogin` to force relogin)\n", + "2021-07-19 11:06:18.176951: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m./outputs\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval/runs/j1pmca9j\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in /content/wandb/run-20210719_110616-j1pmca9j\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run `wandb offline` to turn off syncing.\n", + "\n", + " 10% 500/5205 [02:53<30:54, 2.54it/s]{'loss': 0.1572, 'learning_rate': 4.5196926032660905e-05, 'epoch': 0.48}\n", + " 10% 500/5205 [02:53<30:54, 2.54it/s][INFO|trainer.py:1917] 2021-07-19 11:09:13,478 >> Saving model checkpoint to ./outputs/checkpoint-500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 11:09:13,480 >> Configuration saved in ./outputs/checkpoint-500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 11:09:14,958 >> Model weights saved in ./outputs/checkpoint-500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 11:09:14,966 >> tokenizer config file saved in ./outputs/checkpoint-500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 11:09:14,967 >> Special tokens file saved in ./outputs/checkpoint-500/special_tokens_map.json\n", + "{'loss': 0.0718, 'learning_rate': 4.039385206532181e-05, 'epoch': 0.96}\n", + " 19% 1000/5205 [05:53<27:38, 2.54it/s][INFO|trainer.py:1917] 2021-07-19 11:12:12,586 >> Saving model checkpoint to ./outputs/checkpoint-1000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 11:12:12,588 >> Configuration saved in ./outputs/checkpoint-1000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 11:12:14,117 >> Model weights saved in ./outputs/checkpoint-1000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 11:12:14,117 >> tokenizer config file saved in ./outputs/checkpoint-1000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 11:12:14,118 >> Special tokens file saved in ./outputs/checkpoint-1000/special_tokens_map.json\n", + " 29% 1500/5205 [08:51<24:16, 2.54it/s]{'loss': 0.0488, 'learning_rate': 3.5590778097982716e-05, 'epoch': 1.44}\n", + " 29% 1500/5205 [08:51<24:16, 2.54it/s][INFO|trainer.py:1917] 2021-07-19 11:15:11,464 >> Saving model checkpoint to ./outputs/checkpoint-1500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 11:15:11,466 >> Configuration saved in ./outputs/checkpoint-1500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 11:15:12,940 >> Model weights saved in ./outputs/checkpoint-1500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 11:15:12,941 >> tokenizer config file saved in ./outputs/checkpoint-1500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 11:15:12,941 >> Special tokens file saved in ./outputs/checkpoint-1500/special_tokens_map.json\n", + " 38% 2000/5205 [11:50<20:49, 2.57it/s]{'loss': 0.0455, 'learning_rate': 3.078770413064361e-05, 'epoch': 1.92}\n", + " 38% 2000/5205 [11:50<20:49, 2.57it/s][INFO|trainer.py:1917] 2021-07-19 11:18:09,853 >> Saving model checkpoint to ./outputs/checkpoint-2000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 11:18:09,855 >> Configuration saved in ./outputs/checkpoint-2000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 11:18:11,273 >> Model weights saved in ./outputs/checkpoint-2000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 11:18:11,274 >> tokenizer config file saved in ./outputs/checkpoint-2000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 11:18:11,275 >> Special tokens file saved in ./outputs/checkpoint-2000/special_tokens_map.json\n", + " 48% 2500/5205 [14:48<17:38, 2.56it/s]{'loss': 0.0293, 'learning_rate': 2.5984630163304517e-05, 'epoch': 2.4}\n", + "[INFO|trainer.py:1917] 2021-07-19 11:21:08,207 >> Saving model checkpoint to ./outputs/checkpoint-2500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 11:21:08,208 >> Configuration saved in ./outputs/checkpoint-2500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 11:21:09,636 >> Model weights saved in ./outputs/checkpoint-2500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 11:21:09,637 >> tokenizer config file saved in ./outputs/checkpoint-2500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 11:21:09,638 >> Special tokens file saved in ./outputs/checkpoint-2500/special_tokens_map.json\n", + "{'loss': 0.0252, 'learning_rate': 2.118155619596542e-05, 'epoch': 2.88}\n", + " 58% 3000/5205 [17:47<14:28, 2.54it/s][INFO|trainer.py:1917] 2021-07-19 11:24:07,079 >> Saving model checkpoint to ./outputs/checkpoint-3000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 11:24:07,083 >> Configuration saved in ./outputs/checkpoint-3000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 11:24:08,774 >> Model weights saved in ./outputs/checkpoint-3000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 11:24:08,775 >> tokenizer config file saved in ./outputs/checkpoint-3000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 11:24:08,775 >> Special tokens file saved in ./outputs/checkpoint-3000/special_tokens_map.json\n", + " 67% 3500/5205 [20:45<11:00, 2.58it/s]{'loss': 0.0186, 'learning_rate': 1.6378482228626322e-05, 'epoch': 3.36}\n", + "[INFO|trainer.py:1917] 2021-07-19 11:27:05,160 >> Saving model checkpoint to ./outputs/checkpoint-3500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 11:27:05,166 >> Configuration saved in ./outputs/checkpoint-3500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 11:27:06,645 >> Model weights saved in ./outputs/checkpoint-3500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 11:27:06,646 >> tokenizer config file saved in ./outputs/checkpoint-3500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 11:27:06,646 >> Special tokens file saved in ./outputs/checkpoint-3500/special_tokens_map.json\n", + " 77% 4000/5205 [23:43<07:50, 2.56it/s]{'loss': 0.0151, 'learning_rate': 1.1575408261287224e-05, 'epoch': 3.84}\n", + "[INFO|trainer.py:1917] 2021-07-19 11:30:03,364 >> Saving model checkpoint to ./outputs/checkpoint-4000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 11:30:03,365 >> Configuration saved in ./outputs/checkpoint-4000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 11:30:04,870 >> Model weights saved in ./outputs/checkpoint-4000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 11:30:04,870 >> tokenizer config file saved in ./outputs/checkpoint-4000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 11:30:04,871 >> Special tokens file saved in ./outputs/checkpoint-4000/special_tokens_map.json\n", + " 86% 4500/5205 [26:41<04:34, 2.56it/s]{'loss': 0.0115, 'learning_rate': 6.7723342939481265e-06, 'epoch': 4.32}\n", + " 86% 4500/5205 [26:41<04:34, 2.56it/s][INFO|trainer.py:1917] 2021-07-19 11:33:01,313 >> Saving model checkpoint to ./outputs/checkpoint-4500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 11:33:01,315 >> Configuration saved in ./outputs/checkpoint-4500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 11:33:02,842 >> Model weights saved in ./outputs/checkpoint-4500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 11:33:02,843 >> tokenizer config file saved in ./outputs/checkpoint-4500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 11:33:02,843 >> Special tokens file saved in ./outputs/checkpoint-4500/special_tokens_map.json\n", + " 96% 5000/5205 [29:39<01:20, 2.54it/s]{'loss': 0.0094, 'learning_rate': 1.96926032660903e-06, 'epoch': 4.8}\n", + "[INFO|trainer.py:1917] 2021-07-19 11:35:59,265 >> Saving model checkpoint to ./outputs/checkpoint-5000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 11:35:59,266 >> Configuration saved in ./outputs/checkpoint-5000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 11:36:00,820 >> Model weights saved in ./outputs/checkpoint-5000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 11:36:00,821 >> tokenizer config file saved in ./outputs/checkpoint-5000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 11:36:00,821 >> Special tokens file saved in ./outputs/checkpoint-5000/special_tokens_map.json\n", + "100% 5205/5205 [30:56<00:00, 3.25it/s][INFO|trainer.py:1358] 2021-07-19 11:37:15,632 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "100% 5205/5205 [30:56<00:00, 3.25it/s]{'train_runtime': 1859.0889, 'train_samples_per_second': 22.387, 'train_steps_per_second': 2.8, 'train_loss': 0.04189618094166692, 'epoch': 5.0}\n", + "100% 5205/5205 [30:56<00:00, 2.80it/s]\n", + "[INFO|trainer.py:1917] 2021-07-19 11:37:15,636 >> Saving model checkpoint to ./outputs\n", + "[INFO|configuration_utils.py:379] 2021-07-19 11:37:15,637 >> Configuration saved in ./outputs/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 11:37:17,201 >> Model weights saved in ./outputs/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 11:37:17,202 >> tokenizer config file saved in ./outputs/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 11:37:17,203 >> Special tokens file saved in ./outputs/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 5.0\n", + " train_loss = 0.0419\n", + " train_runtime = 0:30:59.08\n", + " train_samples = 8324\n", + " train_samples_per_second = 22.387\n", + " train_steps_per_second = 2.8\n", + "[INFO|trainer.py:522] 2021-07-19 11:37:17,352 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: id, pos_tags, tokens, ner_tags.\n", + "07/19/2021 11:37:17 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:2163] 2021-07-19 11:37:17,370 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2165] 2021-07-19 11:37:17,370 >> Num examples = 1916\n", + "[INFO|trainer.py:2168] 2021-07-19 11:37:17,370 >> Batch size = 8\n", + "100% 240/240 [00:26<00:00, 8.97it/s]\n", + "***** eval metrics *****\n", + " epoch = 5.0\n", + " eval_accuracy = 0.9812\n", + " eval_f1 = 0.8725\n", + " eval_loss = 0.1023\n", + " eval_precision = 0.8638\n", + " eval_recall = 0.8814\n", + " eval_runtime = 0:00:26.87\n", + " eval_samples = 1916\n", + " eval_samples_per_second = 71.287\n", + " eval_steps_per_second = 8.929\n", + "\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish, PID 605\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Program ended successfully.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find user logs for this run at: /content/wandb/run-20210719_110616-j1pmca9j/logs/debug.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find internal logs for this run at: /content/wandb/run-20210719_110616-j1pmca9j/logs/debug-internal.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.0094\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 5205\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime 1888\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp 1626694664\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step 11\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime 1859.0889\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second 22.387\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second 2.8\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos 1.087585926764544e+16\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss 0.0419\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 0.10234\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision 0.86377\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall 0.88143\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 0.87251\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy 0.98122\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 26.8774\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 71.287\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 8.929\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▄▃▃▂▂▁▁▁▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate █▇▆▆▅▄▃▃▂▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step ▁▂▂▃▄▄▅▅▆▇▇█\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced \u001b[33m./outputs\u001b[0m: \u001b[34mhttps://wandb.ai/versae/bertin-eval/runs/j1pmca9j\u001b[0m\n", + "2021-07-19 11:37:56.763079: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "07/19/2021 11:37:59 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "07/19/2021 11:37:59 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_find_unused_parameters=None,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=False,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_steps=500,\n", + "evaluation_strategy=IntervalStrategy.NO,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "gradient_accumulation_steps=1,\n", + "greater_is_better=None,\n", + "group_by_length=False,\n", + "ignore_data_skip=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=5e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=False,\n", + "local_rank=-1,\n", + "log_level=-1,\n", + "log_level_replica=-1,\n", + "log_on_each_node=True,\n", + "logging_dir=./outputs/runs/Jul19_11-37-59_eab5184de9ff,\n", + "logging_first_step=False,\n", + "logging_steps=500,\n", + "logging_strategy=IntervalStrategy.STEPS,\n", + "lr_scheduler_type=SchedulerType.LINEAR,\n", + "max_grad_norm=1.0,\n", + "max_steps=-1,\n", + "metric_for_best_model=None,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=5.0,\n", + "output_dir=./outputs,\n", + "overwrite_output_dir=True,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=outputs,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=None,\n", + "remove_unused_columns=True,\n", + "report_to=['tensorboard', 'wandb'],\n", + "resume_from_checkpoint=None,\n", + "run_name=./outputs,\n", + "save_on_each_node=False,\n", + "save_steps=500,\n", + "save_strategy=IntervalStrategy.STEPS,\n", + "save_total_limit=None,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_legacy_prediction_loop=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + ")\n", + "07/19/2021 11:38:01 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 11:38:01 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 11:38:01 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 11:38:01 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 11:38:01 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 11:38:02 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 11:38:02 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 11:38:02 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 11:38:02 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 11:38:02 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 11:38:02 - INFO - datasets.info - Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 11:38:02 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "07/19/2021 11:38:02 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 11:38:02 - WARNING - datasets.builder - Reusing dataset conll2002 (/root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5)\n", + "07/19/2021 11:38:02 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "100% 3/3 [00:00<00:00, 27.85it/s]\n", + "[INFO|file_utils.py:1624] 2021-07-19 11:38:02,918 >> https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpyi0y3esx\n", + "Downloading: 100% 618/618 [00:00<00:00, 448kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 11:38:03,899 >> storing https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/d32cc9b6c8f303dbd398a937552d8d9c26d842d418f1cd79adb3fc25f169f722.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|file_utils.py:1636] 2021-07-19 11:38:03,899 >> creating metadata file for /root/.cache/huggingface/transformers/d32cc9b6c8f303dbd398a937552d8d9c26d842d418f1cd79adb3fc25f169f722.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|configuration_utils.py:545] 2021-07-19 11:38:03,900 >> loading configuration file https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/d32cc9b6c8f303dbd398a937552d8d9c26d842d418f1cd79adb3fc25f169f722.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|configuration_utils.py:581] 2021-07-19 11:38:03,900 >> Model config RobertaConfig {\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"bos_token_id\": 0,\n", + " \"eos_token_id\": 2,\n", + " \"finetuning_task\": \"ner\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"id2label\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8\n", + " },\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"label2id\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8\n", + " },\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50265\n", + "}\n", + "\n", + "[INFO|file_utils.py:1624] 2021-07-19 11:38:04,619 >> https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpu8t1or1_\n", + "Downloading: 100% 292/292 [00:00<00:00, 211kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 11:38:05,341 >> storing https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/tokenizer_config.json in cache at /root/.cache/huggingface/transformers/4f604c1e66964e3a55ecf1d4959f4ebb3416a50476c866ac53fedd6a93b68c2b.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|file_utils.py:1636] 2021-07-19 11:38:05,341 >> creating metadata file for /root/.cache/huggingface/transformers/4f604c1e66964e3a55ecf1d4959f4ebb3416a50476c866ac53fedd6a93b68c2b.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|file_utils.py:1624] 2021-07-19 11:38:06,071 >> https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/vocab.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpgrd08tl6\n", + "Downloading: 100% 846k/846k [00:00<00:00, 1.60MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 11:38:07,326 >> storing https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/vocab.json in cache at /root/.cache/huggingface/transformers/20af099ba4d2572dbbdf666ab59e4864bd3ec4867e50cc70d7989d1638fced71.af6be2925520943ae92cd64250bdaa83a8c9d6f91422efbac7ed33a20e0b6e1b\n", + "[INFO|file_utils.py:1636] 2021-07-19 11:38:07,327 >> creating metadata file for /root/.cache/huggingface/transformers/20af099ba4d2572dbbdf666ab59e4864bd3ec4867e50cc70d7989d1638fced71.af6be2925520943ae92cd64250bdaa83a8c9d6f91422efbac7ed33a20e0b6e1b\n", + "[INFO|file_utils.py:1624] 2021-07-19 11:38:08,048 >> https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/merges.txt not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpbl3mlyxu\n", + "Downloading: 100% 505k/505k [00:00<00:00, 964kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 11:38:09,297 >> storing https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/merges.txt in cache at /root/.cache/huggingface/transformers/45c757f0d795baeb7913a8678519d1c353de20333a2e2e181a005f8f5b4dc60c.360a60d1c13cbafd862a8e2a85b267c2b3eb0226d5469633a889b33ba0dde234\n", + "[INFO|file_utils.py:1636] 2021-07-19 11:38:09,298 >> creating metadata file for /root/.cache/huggingface/transformers/45c757f0d795baeb7913a8678519d1c353de20333a2e2e181a005f8f5b4dc60c.360a60d1c13cbafd862a8e2a85b267c2b3eb0226d5469633a889b33ba0dde234\n", + "[INFO|file_utils.py:1624] 2021-07-19 11:38:10,026 >> https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/tokenizer.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpv4lczfio\n", + "Downloading: 100% 1.45M/1.45M [00:00<00:00, 2.07MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 11:38:11,457 >> storing https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/tokenizer.json in cache at /root/.cache/huggingface/transformers/abcefe6cb866709f8793e0130a2d9bd883fee0cbcc25ffee734858e4d002c5b6.0803a0fadc521f273c2682a34dc285f90fe9de9e932bbe77401d11672f7fcb60\n", + "[INFO|file_utils.py:1636] 2021-07-19 11:38:11,457 >> creating metadata file for /root/.cache/huggingface/transformers/abcefe6cb866709f8793e0130a2d9bd883fee0cbcc25ffee734858e4d002c5b6.0803a0fadc521f273c2682a34dc285f90fe9de9e932bbe77401d11672f7fcb60\n", + "[INFO|file_utils.py:1624] 2021-07-19 11:38:12,885 >> https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/special_tokens_map.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpg4oq8nhp\n", + "Downloading: 100% 239/239 [00:00<00:00, 178kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 11:38:13,601 >> storing https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/special_tokens_map.json in cache at /root/.cache/huggingface/transformers/73738db8ccbbbf8b6685f923eeb9e382d4c8e36fcfa5c8124d82ed3eed582725.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|file_utils.py:1636] 2021-07-19 11:38:13,601 >> creating metadata file for /root/.cache/huggingface/transformers/73738db8ccbbbf8b6685f923eeb9e382d4c8e36fcfa5c8124d82ed3eed582725.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 11:38:14,321 >> loading file https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/20af099ba4d2572dbbdf666ab59e4864bd3ec4867e50cc70d7989d1638fced71.af6be2925520943ae92cd64250bdaa83a8c9d6f91422efbac7ed33a20e0b6e1b\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 11:38:14,321 >> loading file https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/45c757f0d795baeb7913a8678519d1c353de20333a2e2e181a005f8f5b4dc60c.360a60d1c13cbafd862a8e2a85b267c2b3eb0226d5469633a889b33ba0dde234\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 11:38:14,321 >> loading file https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/abcefe6cb866709f8793e0130a2d9bd883fee0cbcc25ffee734858e4d002c5b6.0803a0fadc521f273c2682a34dc285f90fe9de9e932bbe77401d11672f7fcb60\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 11:38:14,321 >> loading file https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 11:38:14,321 >> loading file https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/73738db8ccbbbf8b6685f923eeb9e382d4c8e36fcfa5c8124d82ed3eed582725.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 11:38:14,321 >> loading file https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/4f604c1e66964e3a55ecf1d4959f4ebb3416a50476c866ac53fedd6a93b68c2b.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|file_utils.py:1624] 2021-07-19 11:38:15,384 >> https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/pytorch_model.bin not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpczgj117x\n", + "Downloading: 100% 499M/499M [00:47<00:00, 10.5MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 11:39:04,638 >> storing https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/7189e822ad05437642e8c70b270aad1da7745b3f42ae88ee851bd7336559387c.8970e1b3c4890a283e6d9c30f1437e32aec6a36c0f9aae84920ff5a363d4b636\n", + "[INFO|file_utils.py:1636] 2021-07-19 11:39:04,638 >> creating metadata file for /root/.cache/huggingface/transformers/7189e822ad05437642e8c70b270aad1da7745b3f42ae88ee851bd7336559387c.8970e1b3c4890a283e6d9c30f1437e32aec6a36c0f9aae84920ff5a363d4b636\n", + "[INFO|modeling_utils.py:1271] 2021-07-19 11:39:04,639 >> loading weights file https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/7189e822ad05437642e8c70b270aad1da7745b3f42ae88ee851bd7336559387c.8970e1b3c4890a283e6d9c30f1437e32aec6a36c0f9aae84920ff5a363d4b636\n", + "[WARNING|modeling_utils.py:1502] 2021-07-19 11:39:06,140 >> Some weights of the model checkpoint at flax-community/bertin-roberta-large-spanish were not used when initializing RobertaForTokenClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias']\n", + "- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:1513] 2021-07-19 11:39:06,140 >> Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at flax-community/bertin-roberta-large-spanish and are newly initialized: ['classifier.bias', 'classifier.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "Running tokenizer on train dataset: 0% 0/9 [00:00> The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: id, tokens, ner_tags, pos_tags.\n", + "[INFO|trainer.py:1162] 2021-07-19 11:39:17,438 >> ***** Running training *****\n", + "[INFO|trainer.py:1163] 2021-07-19 11:39:17,438 >> Num examples = 8324\n", + "[INFO|trainer.py:1164] 2021-07-19 11:39:17,438 >> Num Epochs = 5\n", + "[INFO|trainer.py:1165] 2021-07-19 11:39:17,438 >> Instantaneous batch size per device = 8\n", + "[INFO|trainer.py:1166] 2021-07-19 11:39:17,438 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "[INFO|trainer.py:1167] 2021-07-19 11:39:17,438 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1168] 2021-07-19 11:39:17,438 >> Total optimization steps = 5205\n", + "[INFO|integrations.py:417] 2021-07-19 11:39:17,456 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mversae\u001b[0m (use `wandb login --relogin` to force relogin)\n", + "2021-07-19 11:39:19.075107: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m./outputs\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval/runs/fh1ara05\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in /content/wandb/run-20210719_113917-fh1ara05\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run `wandb offline` to turn off syncing.\n", + "\n", + "{'loss': 0.1506, 'learning_rate': 4.5196926032660905e-05, 'epoch': 0.48}\n", + " 10% 500/5205 [02:53<30:56, 2.53it/s][INFO|trainer.py:1917] 2021-07-19 11:42:14,041 >> Saving model checkpoint to ./outputs/checkpoint-500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 11:42:14,048 >> Configuration saved in ./outputs/checkpoint-500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 11:42:15,549 >> Model weights saved in ./outputs/checkpoint-500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 11:42:15,550 >> tokenizer config file saved in ./outputs/checkpoint-500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 11:42:15,550 >> Special tokens file saved in ./outputs/checkpoint-500/special_tokens_map.json\n", + " 19% 1000/5205 [05:52<27:31, 2.55it/s][INFO|trainer.py:1917] 2021-07-19 11:45:13,162 >> Saving model checkpoint to ./outputs/checkpoint-1000\n", + "{'loss': 0.0709, 'learning_rate': 4.039385206532181e-05, 'epoch': 0.96}\n", + "[INFO|configuration_utils.py:379] 2021-07-19 11:45:13,164 >> Configuration saved in ./outputs/checkpoint-1000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 11:45:14,726 >> Model weights saved in ./outputs/checkpoint-1000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 11:45:14,727 >> tokenizer config file saved in ./outputs/checkpoint-1000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 11:45:14,727 >> Special tokens file saved in ./outputs/checkpoint-1000/special_tokens_map.json\n", + " 29% 1500/5205 [08:51<24:13, 2.55it/s]{'loss': 0.0456, 'learning_rate': 3.5590778097982716e-05, 'epoch': 1.44}\n", + " 29% 1500/5205 [08:51<24:13, 2.55it/s][INFO|trainer.py:1917] 2021-07-19 11:48:11,899 >> Saving model checkpoint to ./outputs/checkpoint-1500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 11:48:11,900 >> Configuration saved in ./outputs/checkpoint-1500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 11:48:13,401 >> Model weights saved in ./outputs/checkpoint-1500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 11:48:13,402 >> tokenizer config file saved in ./outputs/checkpoint-1500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 11:48:13,403 >> Special tokens file saved in ./outputs/checkpoint-1500/special_tokens_map.json\n", + " 38% 2000/5205 [11:50<20:52, 2.56it/s]{'loss': 0.0422, 'learning_rate': 3.078770413064361e-05, 'epoch': 1.92}\n", + "[INFO|trainer.py:1917] 2021-07-19 11:51:10,580 >> Saving model checkpoint to ./outputs/checkpoint-2000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 11:51:10,582 >> Configuration saved in ./outputs/checkpoint-2000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 11:51:11,981 >> Model weights saved in ./outputs/checkpoint-2000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 11:51:11,982 >> tokenizer config file saved in ./outputs/checkpoint-2000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 11:51:11,982 >> Special tokens file saved in ./outputs/checkpoint-2000/special_tokens_map.json\n", + "{'loss': 0.0298, 'learning_rate': 2.5984630163304517e-05, 'epoch': 2.4}\n", + " 48% 2500/5205 [14:48<17:42, 2.55it/s][INFO|trainer.py:1917] 2021-07-19 11:54:09,015 >> Saving model checkpoint to ./outputs/checkpoint-2500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 11:54:09,017 >> Configuration saved in ./outputs/checkpoint-2500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 11:54:10,503 >> Model weights saved in ./outputs/checkpoint-2500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 11:54:10,504 >> tokenizer config file saved in ./outputs/checkpoint-2500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 11:54:10,561 >> Special tokens file saved in ./outputs/checkpoint-2500/special_tokens_map.json\n", + "{'loss': 0.0241, 'learning_rate': 2.118155619596542e-05, 'epoch': 2.88}\n", + " 58% 3000/5205 [17:46<14:20, 2.56it/s][INFO|trainer.py:1917] 2021-07-19 11:57:07,048 >> Saving model checkpoint to ./outputs/checkpoint-3000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 11:57:07,050 >> Configuration saved in ./outputs/checkpoint-3000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 11:57:08,701 >> Model weights saved in ./outputs/checkpoint-3000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 11:57:08,710 >> tokenizer config file saved in ./outputs/checkpoint-3000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 11:57:08,710 >> Special tokens file saved in ./outputs/checkpoint-3000/special_tokens_map.json\n", + " 67% 3500/5205 [20:44<11:02, 2.57it/s]{'loss': 0.018, 'learning_rate': 1.6378482228626322e-05, 'epoch': 3.36}\n", + " 67% 3500/5205 [20:44<11:02, 2.57it/s][INFO|trainer.py:1917] 2021-07-19 12:00:04,889 >> Saving model checkpoint to ./outputs/checkpoint-3500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 12:00:04,891 >> Configuration saved in ./outputs/checkpoint-3500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 12:00:06,301 >> Model weights saved in ./outputs/checkpoint-3500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 12:00:06,302 >> tokenizer config file saved in ./outputs/checkpoint-3500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 12:00:06,303 >> Special tokens file saved in ./outputs/checkpoint-3500/special_tokens_map.json\n", + " 77% 4000/5205 [23:42<07:52, 2.55it/s]{'loss': 0.0145, 'learning_rate': 1.1575408261287224e-05, 'epoch': 3.84}\n", + "[INFO|trainer.py:1917] 2021-07-19 12:03:02,699 >> Saving model checkpoint to ./outputs/checkpoint-4000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 12:03:02,700 >> Configuration saved in ./outputs/checkpoint-4000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 12:03:04,160 >> Model weights saved in ./outputs/checkpoint-4000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 12:03:04,229 >> tokenizer config file saved in ./outputs/checkpoint-4000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 12:03:04,229 >> Special tokens file saved in ./outputs/checkpoint-4000/special_tokens_map.json\n", + " 86% 4500/5205 [26:39<04:35, 2.56it/s]{'loss': 0.0116, 'learning_rate': 6.7723342939481265e-06, 'epoch': 4.32}\n", + "[INFO|trainer.py:1917] 2021-07-19 12:05:59,683 >> Saving model checkpoint to ./outputs/checkpoint-4500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 12:05:59,684 >> Configuration saved in ./outputs/checkpoint-4500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 12:06:01,360 >> Model weights saved in ./outputs/checkpoint-4500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 12:06:01,361 >> tokenizer config file saved in ./outputs/checkpoint-4500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 12:06:01,361 >> Special tokens file saved in ./outputs/checkpoint-4500/special_tokens_map.json\n", + "{'loss': 0.0093, 'learning_rate': 1.96926032660903e-06, 'epoch': 4.8}\n", + " 96% 5000/5205 [29:37<01:19, 2.57it/s][INFO|trainer.py:1917] 2021-07-19 12:08:57,911 >> Saving model checkpoint to ./outputs/checkpoint-5000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 12:08:57,913 >> Configuration saved in ./outputs/checkpoint-5000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 12:08:59,474 >> Model weights saved in ./outputs/checkpoint-5000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 12:08:59,475 >> tokenizer config file saved in ./outputs/checkpoint-5000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 12:08:59,475 >> Special tokens file saved in ./outputs/checkpoint-5000/special_tokens_map.json\n", + "100% 5205/5205 [30:53<00:00, 3.24it/s][INFO|trainer.py:1358] 2021-07-19 12:10:14,210 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "{'train_runtime': 1856.7718, 'train_samples_per_second': 22.415, 'train_steps_per_second': 2.803, 'train_loss': 0.040356940655154064, 'epoch': 5.0}\n", + "100% 5205/5205 [30:53<00:00, 2.81it/s]\n", + "[INFO|trainer.py:1917] 2021-07-19 12:10:14,229 >> Saving model checkpoint to ./outputs\n", + "[INFO|configuration_utils.py:379] 2021-07-19 12:10:14,230 >> Configuration saved in ./outputs/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 12:10:15,786 >> Model weights saved in ./outputs/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 12:10:15,787 >> tokenizer config file saved in ./outputs/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 12:10:15,787 >> Special tokens file saved in ./outputs/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 5.0\n", + " train_loss = 0.0404\n", + " train_runtime = 0:30:56.77\n", + " train_samples = 8324\n", + " train_samples_per_second = 22.415\n", + " train_steps_per_second = 2.803\n", + "07/19/2021 12:10:15 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:522] 2021-07-19 12:10:15,969 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: id, tokens, ner_tags, pos_tags.\n", + "[INFO|trainer.py:2163] 2021-07-19 12:10:15,988 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2165] 2021-07-19 12:10:15,988 >> Num examples = 1916\n", + "[INFO|trainer.py:2168] 2021-07-19 12:10:15,988 >> Batch size = 8\n", + "100% 240/240 [00:26<00:00, 9.01it/s]\n", + "***** eval metrics *****\n", + " epoch = 5.0\n", + " eval_accuracy = 0.9806\n", + " eval_f1 = 0.8735\n", + " eval_loss = 0.1056\n", + " eval_precision = 0.8667\n", + " eval_recall = 0.8803\n", + " eval_runtime = 0:00:26.74\n", + " eval_samples = 1916\n", + " eval_samples_per_second = 71.635\n", + " eval_steps_per_second = 8.973\n", + "\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish, PID 676\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Program ended successfully.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find user logs for this run at: /content/wandb/run-20210719_113917-fh1ara05/logs/debug.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find internal logs for this run at: /content/wandb/run-20210719_113917-fh1ara05/logs/debug-internal.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.0093\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 5205\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime 1885\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp 1626696642\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step 11\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime 1856.7718\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second 22.415\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second 2.803\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos 1.087585926764544e+16\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss 0.04036\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 0.10558\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision 0.86674\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall 0.88028\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 0.87346\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy 0.98063\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 26.7467\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 71.635\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 8.973\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▄▃▃▂▂▁▁▁▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate █▇▆▆▅▄▃▃▂▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step ▁▂▂▃▄▄▅▅▆▇▇█\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced \u001b[33m./outputs\u001b[0m: \u001b[34mhttps://wandb.ai/versae/bertin-eval/runs/fh1ara05\u001b[0m\n", + "2021-07-19 12:10:55.954685: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "07/19/2021 12:10:58 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "07/19/2021 12:10:58 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_find_unused_parameters=None,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=False,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_steps=500,\n", + "evaluation_strategy=IntervalStrategy.NO,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "gradient_accumulation_steps=1,\n", + "greater_is_better=None,\n", + "group_by_length=False,\n", + "ignore_data_skip=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=5e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=False,\n", + "local_rank=-1,\n", + "log_level=-1,\n", + "log_level_replica=-1,\n", + "log_on_each_node=True,\n", + "logging_dir=./outputs/runs/Jul19_12-10-58_eab5184de9ff,\n", + "logging_first_step=False,\n", + "logging_steps=500,\n", + "logging_strategy=IntervalStrategy.STEPS,\n", + "lr_scheduler_type=SchedulerType.LINEAR,\n", + "max_grad_norm=1.0,\n", + "max_steps=-1,\n", + "metric_for_best_model=None,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=5.0,\n", + "output_dir=./outputs,\n", + "overwrite_output_dir=True,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=outputs,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=None,\n", + "remove_unused_columns=True,\n", + "report_to=['tensorboard', 'wandb'],\n", + "resume_from_checkpoint=None,\n", + "run_name=./outputs,\n", + "save_on_each_node=False,\n", + "save_steps=500,\n", + "save_strategy=IntervalStrategy.STEPS,\n", + "save_total_limit=None,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_legacy_prediction_loop=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + ")\n", + "07/19/2021 12:11:00 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 12:11:00 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 12:11:00 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 12:11:00 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 12:11:00 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 12:11:01 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 12:11:01 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 12:11:01 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 12:11:01 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 12:11:01 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 12:11:01 - INFO - datasets.info - Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 12:11:01 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "07/19/2021 12:11:01 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 12:11:01 - WARNING - datasets.builder - Reusing dataset conll2002 (/root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5)\n", + "07/19/2021 12:11:01 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "100% 3/3 [00:00<00:00, 25.94it/s]\n", + "[INFO|file_utils.py:1624] 2021-07-19 12:11:01,984 >> https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp2k5xww1q\n", + "Downloading: 100% 613/613 [00:00<00:00, 467kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 12:11:03,238 >> storing https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/3b60ce79e9e99338ab683eee75e146acc49852f2f34f2c5610dfd78221a176ee.33b0b03a5bf5e640494a22a3aa4909c661effc0fa0e186b1513b17d9b058ca59\n", + "[INFO|file_utils.py:1636] 2021-07-19 12:11:03,238 >> creating metadata file for /root/.cache/huggingface/transformers/3b60ce79e9e99338ab683eee75e146acc49852f2f34f2c5610dfd78221a176ee.33b0b03a5bf5e640494a22a3aa4909c661effc0fa0e186b1513b17d9b058ca59\n", + "[INFO|configuration_utils.py:545] 2021-07-19 12:11:03,239 >> loading configuration file https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3b60ce79e9e99338ab683eee75e146acc49852f2f34f2c5610dfd78221a176ee.33b0b03a5bf5e640494a22a3aa4909c661effc0fa0e186b1513b17d9b058ca59\n", + "[INFO|configuration_utils.py:581] 2021-07-19 12:11:03,239 >> Model config RobertaConfig {\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.0,\n", + " \"bos_token_id\": 0,\n", + " \"eos_token_id\": 2,\n", + " \"finetuning_task\": \"ner\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.0,\n", + " \"hidden_size\": 768,\n", + " \"id2label\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8\n", + " },\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"label2id\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8\n", + " },\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50262\n", + "}\n", + "\n", + "[INFO|file_utils.py:1624] 2021-07-19 12:11:03,966 >> https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpl2t8n1yp\n", + "Downloading: 100% 1.46k/1.46k [00:00<00:00, 1.03MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 12:11:04,686 >> storing https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/tokenizer_config.json in cache at /root/.cache/huggingface/transformers/e025b5b045564995a040386d80efa4d55779730827b72b4e40908073db9f0630.d8a7d006294d83173a76ac51a95b5a8470bbbc87c93c63633eaf9476656ed660\n", + "[INFO|file_utils.py:1636] 2021-07-19 12:11:04,686 >> creating metadata file for /root/.cache/huggingface/transformers/e025b5b045564995a040386d80efa4d55779730827b72b4e40908073db9f0630.d8a7d006294d83173a76ac51a95b5a8470bbbc87c93c63633eaf9476656ed660\n", + "[INFO|configuration_utils.py:545] 2021-07-19 12:11:05,404 >> loading configuration file https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3b60ce79e9e99338ab683eee75e146acc49852f2f34f2c5610dfd78221a176ee.33b0b03a5bf5e640494a22a3aa4909c661effc0fa0e186b1513b17d9b058ca59\n", + "[INFO|configuration_utils.py:581] 2021-07-19 12:11:05,405 >> Model config RobertaConfig {\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.0,\n", + " \"bos_token_id\": 0,\n", + " \"eos_token_id\": 2,\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.0,\n", + " \"hidden_size\": 768,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50262\n", + "}\n", + "\n", + "[INFO|file_utils.py:1624] 2021-07-19 12:11:06,134 >> https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/vocab.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmph8j_y_v6\n", + "Downloading: 100% 1.15M/1.15M [00:01<00:00, 1.11MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 12:11:07,912 >> storing https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/vocab.json in cache at /root/.cache/huggingface/transformers/aec224417e3c4a4bda2283292ca7898205329eeb16f3b8db7ea5a36e51d55257.26eadee3bbe78c0682ce89a698fbb1698a0eee50c36cf83be2280a0f2a7b23c1\n", + "[INFO|file_utils.py:1636] 2021-07-19 12:11:07,912 >> creating metadata file for /root/.cache/huggingface/transformers/aec224417e3c4a4bda2283292ca7898205329eeb16f3b8db7ea5a36e51d55257.26eadee3bbe78c0682ce89a698fbb1698a0eee50c36cf83be2280a0f2a7b23c1\n", + "[INFO|file_utils.py:1624] 2021-07-19 12:11:08,644 >> https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/merges.txt not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp75jqk_c3\n", + "Downloading: 100% 509k/509k [00:00<00:00, 1.31MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 12:11:09,753 >> storing https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/merges.txt in cache at /root/.cache/huggingface/transformers/6688b2cc67bbc01f3e369230731a072338dc286bf25b97e1513d359ab00f2ea3.0d24ae8bd5fabb1f5020f91bc602cefeb5a2938ab77e21769d28776345634b23\n", + "[INFO|file_utils.py:1636] 2021-07-19 12:11:09,753 >> creating metadata file for /root/.cache/huggingface/transformers/6688b2cc67bbc01f3e369230731a072338dc286bf25b97e1513d359ab00f2ea3.0d24ae8bd5fabb1f5020f91bc602cefeb5a2938ab77e21769d28776345634b23\n", + "[INFO|file_utils.py:1624] 2021-07-19 12:11:10,482 >> https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/tokenizer.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpr4ou2joy\n", + "Downloading: 100% 1.46M/1.46M [00:00<00:00, 2.06MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 12:11:12,187 >> storing https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/tokenizer.json in cache at /root/.cache/huggingface/transformers/2f6b692045224588b9a5a4d5639db72d5e3fd2c18cbd0accf4c6dc81a4adc413.bd775ba884c9e650b58a3a333a97e47c8d1b9d37cdbe19b22fb04b1e41beb19d\n", + "[INFO|file_utils.py:1636] 2021-07-19 12:11:12,187 >> creating metadata file for /root/.cache/huggingface/transformers/2f6b692045224588b9a5a4d5639db72d5e3fd2c18cbd0accf4c6dc81a4adc413.bd775ba884c9e650b58a3a333a97e47c8d1b9d37cdbe19b22fb04b1e41beb19d\n", + "[INFO|file_utils.py:1624] 2021-07-19 12:11:13,623 >> https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/special_tokens_map.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpxa1ze4ou\n", + "Downloading: 100% 772/772 [00:00<00:00, 572kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 12:11:14,343 >> storing https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/special_tokens_map.json in cache at /root/.cache/huggingface/transformers/0f2dbdd0b96b43180e16d660cb1b274e46fa0cf63da426f09c1c82900fb52da1.cb2244924ab24d706b02fd7fcedaea4531566537687a539ebb94db511fd122a0\n", + "[INFO|file_utils.py:1636] 2021-07-19 12:11:14,343 >> creating metadata file for /root/.cache/huggingface/transformers/0f2dbdd0b96b43180e16d660cb1b274e46fa0cf63da426f09c1c82900fb52da1.cb2244924ab24d706b02fd7fcedaea4531566537687a539ebb94db511fd122a0\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 12:11:15,065 >> loading file https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/aec224417e3c4a4bda2283292ca7898205329eeb16f3b8db7ea5a36e51d55257.26eadee3bbe78c0682ce89a698fbb1698a0eee50c36cf83be2280a0f2a7b23c1\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 12:11:15,065 >> loading file https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/6688b2cc67bbc01f3e369230731a072338dc286bf25b97e1513d359ab00f2ea3.0d24ae8bd5fabb1f5020f91bc602cefeb5a2938ab77e21769d28776345634b23\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 12:11:15,065 >> loading file https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/2f6b692045224588b9a5a4d5639db72d5e3fd2c18cbd0accf4c6dc81a4adc413.bd775ba884c9e650b58a3a333a97e47c8d1b9d37cdbe19b22fb04b1e41beb19d\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 12:11:15,065 >> loading file https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 12:11:15,065 >> loading file https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/0f2dbdd0b96b43180e16d660cb1b274e46fa0cf63da426f09c1c82900fb52da1.cb2244924ab24d706b02fd7fcedaea4531566537687a539ebb94db511fd122a0\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 12:11:15,065 >> loading file https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/e025b5b045564995a040386d80efa4d55779730827b72b4e40908073db9f0630.d8a7d006294d83173a76ac51a95b5a8470bbbc87c93c63633eaf9476656ed660\n", + "[INFO|configuration_utils.py:545] 2021-07-19 12:11:15,783 >> loading configuration file https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3b60ce79e9e99338ab683eee75e146acc49852f2f34f2c5610dfd78221a176ee.33b0b03a5bf5e640494a22a3aa4909c661effc0fa0e186b1513b17d9b058ca59\n", + "[INFO|configuration_utils.py:581] 2021-07-19 12:11:15,783 >> Model config RobertaConfig {\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.0,\n", + " \"bos_token_id\": 0,\n", + " \"eos_token_id\": 2,\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.0,\n", + " \"hidden_size\": 768,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50262\n", + "}\n", + "\n", + "[INFO|file_utils.py:1624] 2021-07-19 12:11:16,577 >> https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/pytorch_model.bin not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp7palrwn_\n", + "Downloading: 100% 499M/499M [00:10<00:00, 47.6MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 12:11:28,168 >> storing https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/a337644e63cac89729bd4fd067c987d2eb61b4b398d17d0ce0973c31dd76d2b0.c86d60e89da68465cb73e129befe8209faa3ac57b9aa272b87db45ba1f619582\n", + "[INFO|file_utils.py:1636] 2021-07-19 12:11:28,168 >> creating metadata file for /root/.cache/huggingface/transformers/a337644e63cac89729bd4fd067c987d2eb61b4b398d17d0ce0973c31dd76d2b0.c86d60e89da68465cb73e129befe8209faa3ac57b9aa272b87db45ba1f619582\n", + "[INFO|modeling_utils.py:1271] 2021-07-19 12:11:28,168 >> loading weights file https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/a337644e63cac89729bd4fd067c987d2eb61b4b398d17d0ce0973c31dd76d2b0.c86d60e89da68465cb73e129befe8209faa3ac57b9aa272b87db45ba1f619582\n", + "[WARNING|modeling_utils.py:1502] 2021-07-19 12:11:29,685 >> Some weights of the model checkpoint at BSC-TeMU/roberta-base-bne were not used when initializing RobertaForTokenClassification: ['lm_head.bias', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.weight']\n", + "- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:1513] 2021-07-19 12:11:29,685 >> Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at BSC-TeMU/roberta-base-bne and are newly initialized: ['classifier.weight', 'classifier.bias']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "Running tokenizer on train dataset: 0% 0/9 [00:00> The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, pos_tags, tokens, id.\n", + "[INFO|trainer.py:1162] 2021-07-19 12:11:41,218 >> ***** Running training *****\n", + "[INFO|trainer.py:1163] 2021-07-19 12:11:41,218 >> Num examples = 8324\n", + "[INFO|trainer.py:1164] 2021-07-19 12:11:41,218 >> Num Epochs = 5\n", + "[INFO|trainer.py:1165] 2021-07-19 12:11:41,218 >> Instantaneous batch size per device = 8\n", + "[INFO|trainer.py:1166] 2021-07-19 12:11:41,218 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "[INFO|trainer.py:1167] 2021-07-19 12:11:41,218 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1168] 2021-07-19 12:11:41,219 >> Total optimization steps = 5205\n", + "[INFO|integrations.py:417] 2021-07-19 12:11:41,235 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mversae\u001b[0m (use `wandb login --relogin` to force relogin)\n", + "2021-07-19 12:11:42.880000: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m./outputs\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval/runs/12cfhsht\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in /content/wandb/run-20210719_121141-12cfhsht\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run `wandb offline` to turn off syncing.\n", + "\n", + "{'loss': 0.1418, 'learning_rate': 4.5196926032660905e-05, 'epoch': 0.48}\n", + " 10% 500/5205 [02:48<30:04, 2.61it/s][INFO|trainer.py:1917] 2021-07-19 12:14:32,851 >> Saving model checkpoint to ./outputs/checkpoint-500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 12:14:32,853 >> Configuration saved in ./outputs/checkpoint-500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 12:14:34,368 >> Model weights saved in ./outputs/checkpoint-500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 12:14:34,369 >> tokenizer config file saved in ./outputs/checkpoint-500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 12:14:34,370 >> Special tokens file saved in ./outputs/checkpoint-500/special_tokens_map.json\n", + " 19% 1000/5205 [05:42<26:57, 2.60it/s]{'loss': 0.0593, 'learning_rate': 4.039385206532181e-05, 'epoch': 0.96}\n", + "[INFO|trainer.py:1917] 2021-07-19 12:17:26,829 >> Saving model checkpoint to ./outputs/checkpoint-1000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 12:17:26,831 >> Configuration saved in ./outputs/checkpoint-1000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 12:17:28,225 >> Model weights saved in ./outputs/checkpoint-1000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 12:17:28,227 >> tokenizer config file saved in ./outputs/checkpoint-1000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 12:17:28,227 >> Special tokens file saved in ./outputs/checkpoint-1000/special_tokens_map.json\n", + "{'loss': 0.0343, 'learning_rate': 3.5590778097982716e-05, 'epoch': 1.44}\n", + " 29% 1500/5205 [08:36<23:33, 2.62it/s][INFO|trainer.py:1917] 2021-07-19 12:20:20,638 >> Saving model checkpoint to ./outputs/checkpoint-1500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 12:20:20,639 >> Configuration saved in ./outputs/checkpoint-1500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 12:20:22,205 >> Model weights saved in ./outputs/checkpoint-1500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 12:20:22,206 >> tokenizer config file saved in ./outputs/checkpoint-1500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 12:20:22,207 >> Special tokens file saved in ./outputs/checkpoint-1500/special_tokens_map.json\n", + "{'loss': 0.0352, 'learning_rate': 3.078770413064361e-05, 'epoch': 1.92}\n", + " 38% 2000/5205 [11:30<20:28, 2.61it/s][INFO|trainer.py:1917] 2021-07-19 12:23:14,468 >> Saving model checkpoint to ./outputs/checkpoint-2000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 12:23:14,469 >> Configuration saved in ./outputs/checkpoint-2000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 12:23:15,919 >> Model weights saved in ./outputs/checkpoint-2000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 12:23:15,921 >> tokenizer config file saved in ./outputs/checkpoint-2000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 12:23:15,921 >> Special tokens file saved in ./outputs/checkpoint-2000/special_tokens_map.json\n", + " 48% 2500/5205 [14:23<17:21, 2.60it/s][INFO|trainer.py:1917] 2021-07-19 12:26:08,104 >> Saving model checkpoint to ./outputs/checkpoint-2500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 12:26:08,105 >> Configuration saved in ./outputs/checkpoint-2500/config.json\n", + "{'loss': 0.0219, 'learning_rate': 2.5984630163304517e-05, 'epoch': 2.4}\n", + "[INFO|modeling_utils.py:997] 2021-07-19 12:26:09,567 >> Model weights saved in ./outputs/checkpoint-2500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 12:26:09,568 >> tokenizer config file saved in ./outputs/checkpoint-2500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 12:26:09,569 >> Special tokens file saved in ./outputs/checkpoint-2500/special_tokens_map.json\n", + "{'loss': 0.0163, 'learning_rate': 2.118155619596542e-05, 'epoch': 2.88}\n", + " 58% 3000/5205 [17:17<14:00, 2.62it/s][INFO|trainer.py:1917] 2021-07-19 12:29:01,777 >> Saving model checkpoint to ./outputs/checkpoint-3000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 12:29:01,778 >> Configuration saved in ./outputs/checkpoint-3000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 12:29:03,453 >> Model weights saved in ./outputs/checkpoint-3000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 12:29:03,455 >> tokenizer config file saved in ./outputs/checkpoint-3000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 12:29:03,455 >> Special tokens file saved in ./outputs/checkpoint-3000/special_tokens_map.json\n", + " 67% 3500/5205 [20:11<10:43, 2.65it/s]{'loss': 0.0114, 'learning_rate': 1.6378482228626322e-05, 'epoch': 3.36}\n", + "[INFO|trainer.py:1917] 2021-07-19 12:31:55,668 >> Saving model checkpoint to ./outputs/checkpoint-3500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 12:31:55,669 >> Configuration saved in ./outputs/checkpoint-3500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 12:31:57,243 >> Model weights saved in ./outputs/checkpoint-3500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 12:31:57,244 >> tokenizer config file saved in ./outputs/checkpoint-3500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 12:31:57,245 >> Special tokens file saved in ./outputs/checkpoint-3500/special_tokens_map.json\n", + " 77% 4000/5205 [23:05<07:40, 2.62it/s]{'loss': 0.0088, 'learning_rate': 1.1575408261287224e-05, 'epoch': 3.84}\n", + "[INFO|trainer.py:1917] 2021-07-19 12:34:49,424 >> Saving model checkpoint to ./outputs/checkpoint-4000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 12:34:49,425 >> Configuration saved in ./outputs/checkpoint-4000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 12:34:50,977 >> Model weights saved in ./outputs/checkpoint-4000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 12:34:50,978 >> tokenizer config file saved in ./outputs/checkpoint-4000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 12:34:50,978 >> Special tokens file saved in ./outputs/checkpoint-4000/special_tokens_map.json\n", + " 86% 4500/5205 [25:58<04:26, 2.65it/s]{'loss': 0.0066, 'learning_rate': 6.7723342939481265e-06, 'epoch': 4.32}\n", + "[INFO|trainer.py:1917] 2021-07-19 12:37:42,549 >> Saving model checkpoint to ./outputs/checkpoint-4500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 12:37:42,551 >> Configuration saved in ./outputs/checkpoint-4500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 12:37:44,154 >> Model weights saved in ./outputs/checkpoint-4500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 12:37:44,156 >> tokenizer config file saved in ./outputs/checkpoint-4500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 12:37:44,156 >> Special tokens file saved in ./outputs/checkpoint-4500/special_tokens_map.json\n", + "{'loss': 0.005, 'learning_rate': 1.96926032660903e-06, 'epoch': 4.8}\n", + " 96% 5000/5205 [28:51<01:17, 2.63it/s][INFO|trainer.py:1917] 2021-07-19 12:40:35,837 >> Saving model checkpoint to ./outputs/checkpoint-5000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 12:40:35,838 >> Configuration saved in ./outputs/checkpoint-5000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 12:40:37,324 >> Model weights saved in ./outputs/checkpoint-5000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 12:40:37,325 >> tokenizer config file saved in ./outputs/checkpoint-5000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 12:40:37,326 >> Special tokens file saved in ./outputs/checkpoint-5000/special_tokens_map.json\n", + "100% 5205/5205 [30:06<00:00, 3.33it/s][INFO|trainer.py:1358] 2021-07-19 12:41:50,325 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "100% 5205/5205 [30:06<00:00, 3.33it/s]{'train_runtime': 1809.1062, 'train_samples_per_second': 23.006, 'train_steps_per_second': 2.877, 'train_loss': 0.03290085400902916, 'epoch': 5.0}\n", + "100% 5205/5205 [30:06<00:00, 2.88it/s]\n", + "[INFO|trainer.py:1917] 2021-07-19 12:41:50,331 >> Saving model checkpoint to ./outputs\n", + "[INFO|configuration_utils.py:379] 2021-07-19 12:41:50,333 >> Configuration saved in ./outputs/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 12:41:51,802 >> Model weights saved in ./outputs/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 12:41:51,803 >> tokenizer config file saved in ./outputs/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 12:41:51,804 >> Special tokens file saved in ./outputs/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 5.0\n", + " train_loss = 0.0329\n", + " train_runtime = 0:30:09.10\n", + " train_samples = 8324\n", + " train_samples_per_second = 23.006\n", + " train_steps_per_second = 2.877\n", + "07/19/2021 12:41:51 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:522] 2021-07-19 12:41:51,935 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, pos_tags, tokens, id.\n", + "[INFO|trainer.py:2163] 2021-07-19 12:41:51,957 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2165] 2021-07-19 12:41:51,958 >> Num examples = 1916\n", + "[INFO|trainer.py:2168] 2021-07-19 12:41:51,958 >> Batch size = 8\n", + "100% 240/240 [00:26<00:00, 8.94it/s]\n", + "***** eval metrics *****\n", + " epoch = 5.0\n", + " eval_accuracy = 0.9807\n", + " eval_f1 = 0.87\n", + " eval_loss = 0.1062\n", + " eval_precision = 0.8656\n", + " eval_recall = 0.8745\n", + " eval_runtime = 0:00:26.95\n", + " eval_samples = 1916\n", + " eval_samples_per_second = 71.076\n", + " eval_steps_per_second = 8.903\n", + "\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish, PID 747\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Program ended successfully.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find user logs for this run at: /content/wandb/run-20210719_121141-12cfhsht/logs/debug.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find internal logs for this run at: /content/wandb/run-20210719_121141-12cfhsht/logs/debug-internal.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.005\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 5205\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime 1837\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp 1626698538\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step 11\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime 1809.1062\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second 23.006\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second 2.877\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos 1.087585926764544e+16\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss 0.0329\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 0.10615\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision 0.86559\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall 0.87454\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 0.87004\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy 0.98065\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 26.9571\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 71.076\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 8.903\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▄▂▃▂▂▁▁▁▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate █▇▆▆▅▄▃▃▂▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step ▁▂▂▃▄▄▅▅▆▇▇█\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced \u001b[33m./outputs\u001b[0m: \u001b[34mhttps://wandb.ai/versae/bertin-eval/runs/12cfhsht\u001b[0m\n", + "2021-07-19 12:42:31.685010: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "07/19/2021 12:42:34 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "07/19/2021 12:42:34 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_find_unused_parameters=None,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=False,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_steps=500,\n", + "evaluation_strategy=IntervalStrategy.NO,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "gradient_accumulation_steps=1,\n", + "greater_is_better=None,\n", + "group_by_length=False,\n", + "ignore_data_skip=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=5e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=False,\n", + "local_rank=-1,\n", + "log_level=-1,\n", + "log_level_replica=-1,\n", + "log_on_each_node=True,\n", + "logging_dir=./outputs/runs/Jul19_12-42-34_eab5184de9ff,\n", + "logging_first_step=False,\n", + "logging_steps=500,\n", + "logging_strategy=IntervalStrategy.STEPS,\n", + "lr_scheduler_type=SchedulerType.LINEAR,\n", + "max_grad_norm=1.0,\n", + "max_steps=-1,\n", + "metric_for_best_model=None,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=5.0,\n", + "output_dir=./outputs,\n", + "overwrite_output_dir=True,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=outputs,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=None,\n", + "remove_unused_columns=True,\n", + "report_to=['tensorboard', 'wandb'],\n", + "resume_from_checkpoint=None,\n", + "run_name=./outputs,\n", + "save_on_each_node=False,\n", + "save_steps=500,\n", + "save_strategy=IntervalStrategy.STEPS,\n", + "save_total_limit=None,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_legacy_prediction_loop=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + ")\n", + "07/19/2021 12:42:36 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 12:42:36 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 12:42:36 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 12:42:36 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 12:42:36 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 12:42:37 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 12:42:37 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 12:42:37 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 12:42:37 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 12:42:37 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 12:42:37 - INFO - datasets.info - Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 12:42:37 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "07/19/2021 12:42:37 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 12:42:37 - WARNING - datasets.builder - Reusing dataset conll2002 (/root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5)\n", + "07/19/2021 12:42:37 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "100% 3/3 [00:00<00:00, 28.00it/s]\n", + "[INFO|file_utils.py:1624] 2021-07-19 12:42:38,678 >> https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpck_oqd1g\n", + "Downloading: 100% 648/648 [00:00<00:00, 440kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 12:42:39,399 >> storing https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/cb7cedb04246e225d56ba26d207f1d1809b31a9bbe9b63103371d835c6ac0502.f4e4777229bac528fa2a8d4833e2ef53624e985ebde0fd527064a5cc7c50832b\n", + "[INFO|file_utils.py:1636] 2021-07-19 12:42:39,400 >> creating metadata file for /root/.cache/huggingface/transformers/cb7cedb04246e225d56ba26d207f1d1809b31a9bbe9b63103371d835c6ac0502.f4e4777229bac528fa2a8d4833e2ef53624e985ebde0fd527064a5cc7c50832b\n", + "[INFO|configuration_utils.py:545] 2021-07-19 12:42:39,400 >> loading configuration file https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/cb7cedb04246e225d56ba26d207f1d1809b31a9bbe9b63103371d835c6ac0502.f4e4777229bac528fa2a8d4833e2ef53624e985ebde0fd527064a5cc7c50832b\n", + "[INFO|configuration_utils.py:581] 2021-07-19 12:42:39,401 >> Model config BertConfig {\n", + " \"_name_or_path\": \"dccuchile/bert-base-spanish-wwm-cased\",\n", + " \"architectures\": [\n", + " \"BertForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"finetuning_task\": \"ner\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"id2label\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8\n", + " },\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"label2id\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8\n", + " },\n", + " \"layer_norm_eps\": 1e-12,\n", + " \"max_position_embeddings\": 512,\n", + " \"model_type\": \"bert\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"output_past\": true,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 2,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 31002\n", + "}\n", + "\n", + "[INFO|file_utils.py:1624] 2021-07-19 12:42:40,119 >> https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpgd0560ha\n", + "Downloading: 100% 364/364 [00:00<00:00, 267kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 12:42:40,845 >> storing https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/tokenizer_config.json in cache at /root/.cache/huggingface/transformers/ca34e6c1251888a8ed98da2a454f869d28e3438eef67c2f93aa8133459ac08a3.0e90f656d0426b15b4927d1fe8ca5ec4c2e7b0d0e878c9153c3ddc6ed9bbed3c\n", + "[INFO|file_utils.py:1636] 2021-07-19 12:42:40,845 >> creating metadata file for /root/.cache/huggingface/transformers/ca34e6c1251888a8ed98da2a454f869d28e3438eef67c2f93aa8133459ac08a3.0e90f656d0426b15b4927d1fe8ca5ec4c2e7b0d0e878c9153c3ddc6ed9bbed3c\n", + "[INFO|configuration_utils.py:545] 2021-07-19 12:42:41,566 >> loading configuration file https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/cb7cedb04246e225d56ba26d207f1d1809b31a9bbe9b63103371d835c6ac0502.f4e4777229bac528fa2a8d4833e2ef53624e985ebde0fd527064a5cc7c50832b\n", + "[INFO|configuration_utils.py:581] 2021-07-19 12:42:41,567 >> Model config BertConfig {\n", + " \"_name_or_path\": \"dccuchile/bert-base-spanish-wwm-cased\",\n", + " \"architectures\": [\n", + " \"BertForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"layer_norm_eps\": 1e-12,\n", + " \"max_position_embeddings\": 512,\n", + " \"model_type\": \"bert\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"output_past\": true,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 2,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 31002\n", + "}\n", + "\n", + "[INFO|file_utils.py:1624] 2021-07-19 12:42:42,282 >> https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/vocab.txt not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmplt5qimq9\n", + "Downloading: 100% 242k/242k [00:00<00:00, 693kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 12:42:43,361 >> storing https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/vocab.txt in cache at /root/.cache/huggingface/transformers/6761cd0c3d282272f598fcc1fa8c4ecfff8c18762ec8acb40f9cbb562cb0901e.6587bde86239957281af55b2f7e564df111a2b4f9dfc0ad884f13ea7106e4dfb\n", + "[INFO|file_utils.py:1636] 2021-07-19 12:42:43,361 >> creating metadata file for /root/.cache/huggingface/transformers/6761cd0c3d282272f598fcc1fa8c4ecfff8c18762ec8acb40f9cbb562cb0901e.6587bde86239957281af55b2f7e564df111a2b4f9dfc0ad884f13ea7106e4dfb\n", + "[INFO|file_utils.py:1624] 2021-07-19 12:42:44,077 >> https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/tokenizer.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpkhoeusow\n", + "Downloading: 100% 480k/480k [00:00<00:00, 1.34MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 12:42:45,157 >> storing https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/tokenizer.json in cache at /root/.cache/huggingface/transformers/44de7af89c157bf67367a71105165d92bebe0585543739a918e3870d25484c27.6a099cd4b12bf7db174fffe48b004eb919c325f108e0c36176a0fe0ad1848d31\n", + "[INFO|file_utils.py:1636] 2021-07-19 12:42:45,157 >> creating metadata file for /root/.cache/huggingface/transformers/44de7af89c157bf67367a71105165d92bebe0585543739a918e3870d25484c27.6a099cd4b12bf7db174fffe48b004eb919c325f108e0c36176a0fe0ad1848d31\n", + "[INFO|file_utils.py:1624] 2021-07-19 12:42:46,596 >> https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/special_tokens_map.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmprgindbow\n", + "Downloading: 100% 134/134 [00:00<00:00, 103kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 12:42:47,314 >> storing https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/special_tokens_map.json in cache at /root/.cache/huggingface/transformers/9848a00af462c42dfb4ec88ef438fbab5256330f7f6f50badc48d277f9367d49.f982506b52498d4adb4bd491f593dc92b2ef6be61bfdbe9d30f53f963f9f5b66\n", + "[INFO|file_utils.py:1636] 2021-07-19 12:42:47,314 >> creating metadata file for /root/.cache/huggingface/transformers/9848a00af462c42dfb4ec88ef438fbab5256330f7f6f50badc48d277f9367d49.f982506b52498d4adb4bd491f593dc92b2ef6be61bfdbe9d30f53f963f9f5b66\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 12:42:48,032 >> loading file https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/vocab.txt from cache at /root/.cache/huggingface/transformers/6761cd0c3d282272f598fcc1fa8c4ecfff8c18762ec8acb40f9cbb562cb0901e.6587bde86239957281af55b2f7e564df111a2b4f9dfc0ad884f13ea7106e4dfb\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 12:42:48,033 >> loading file https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/44de7af89c157bf67367a71105165d92bebe0585543739a918e3870d25484c27.6a099cd4b12bf7db174fffe48b004eb919c325f108e0c36176a0fe0ad1848d31\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 12:42:48,033 >> loading file https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 12:42:48,033 >> loading file https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/9848a00af462c42dfb4ec88ef438fbab5256330f7f6f50badc48d277f9367d49.f982506b52498d4adb4bd491f593dc92b2ef6be61bfdbe9d30f53f963f9f5b66\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 12:42:48,033 >> loading file https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/ca34e6c1251888a8ed98da2a454f869d28e3438eef67c2f93aa8133459ac08a3.0e90f656d0426b15b4927d1fe8ca5ec4c2e7b0d0e878c9153c3ddc6ed9bbed3c\n", + "[INFO|configuration_utils.py:545] 2021-07-19 12:42:48,751 >> loading configuration file https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/cb7cedb04246e225d56ba26d207f1d1809b31a9bbe9b63103371d835c6ac0502.f4e4777229bac528fa2a8d4833e2ef53624e985ebde0fd527064a5cc7c50832b\n", + "[INFO|configuration_utils.py:581] 2021-07-19 12:42:48,752 >> Model config BertConfig {\n", + " \"_name_or_path\": \"dccuchile/bert-base-spanish-wwm-cased\",\n", + " \"architectures\": [\n", + " \"BertForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"layer_norm_eps\": 1e-12,\n", + " \"max_position_embeddings\": 512,\n", + " \"model_type\": \"bert\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"output_past\": true,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 2,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 31002\n", + "}\n", + "\n", + "[INFO|file_utils.py:1624] 2021-07-19 12:42:49,495 >> https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/pytorch_model.bin not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpzy1_hik_\n", + "Downloading: 100% 440M/440M [00:09<00:00, 47.1MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 12:42:59,298 >> storing https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/52382cbe7c1587c6b588daa81eaf247c5e2ad073d42b52192a8cd4202e7429b6.a88ccd19b1f271e63b6a901510804e6c0318089355c471334fe8b71b316a30ab\n", + "[INFO|file_utils.py:1636] 2021-07-19 12:42:59,299 >> creating metadata file for /root/.cache/huggingface/transformers/52382cbe7c1587c6b588daa81eaf247c5e2ad073d42b52192a8cd4202e7429b6.a88ccd19b1f271e63b6a901510804e6c0318089355c471334fe8b71b316a30ab\n", + "[INFO|modeling_utils.py:1271] 2021-07-19 12:42:59,299 >> loading weights file https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/52382cbe7c1587c6b588daa81eaf247c5e2ad073d42b52192a8cd4202e7429b6.a88ccd19b1f271e63b6a901510804e6c0318089355c471334fe8b71b316a30ab\n", + "[WARNING|modeling_utils.py:1502] 2021-07-19 12:43:00,603 >> Some weights of the model checkpoint at dccuchile/bert-base-spanish-wwm-cased were not used when initializing BertForTokenClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']\n", + "- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:1513] 2021-07-19 12:43:00,603 >> Some weights of BertForTokenClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "Running tokenizer on train dataset: 0% 0/9 [00:00> The following columns in the training set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, pos_tags, tokens, ner_tags.\n", + "[INFO|trainer.py:1162] 2021-07-19 12:43:12,682 >> ***** Running training *****\n", + "[INFO|trainer.py:1163] 2021-07-19 12:43:12,682 >> Num examples = 8324\n", + "[INFO|trainer.py:1164] 2021-07-19 12:43:12,682 >> Num Epochs = 5\n", + "[INFO|trainer.py:1165] 2021-07-19 12:43:12,682 >> Instantaneous batch size per device = 8\n", + "[INFO|trainer.py:1166] 2021-07-19 12:43:12,683 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "[INFO|trainer.py:1167] 2021-07-19 12:43:12,683 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1168] 2021-07-19 12:43:12,683 >> Total optimization steps = 5205\n", + "[INFO|integrations.py:417] 2021-07-19 12:43:12,727 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mversae\u001b[0m (use `wandb login --relogin` to force relogin)\n", + "2021-07-19 12:43:14.386397: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m./outputs\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval/runs/3afdjnxp\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in /content/wandb/run-20210719_124312-3afdjnxp\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run `wandb offline` to turn off syncing.\n", + "\n", + " 10% 500/5205 [02:54<30:58, 2.53it/s]{'loss': 0.1093, 'learning_rate': 4.5196926032660905e-05, 'epoch': 0.48}\n", + "[INFO|trainer.py:1917] 2021-07-19 12:46:10,608 >> Saving model checkpoint to ./outputs/checkpoint-500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 12:46:10,609 >> Configuration saved in ./outputs/checkpoint-500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 12:46:12,470 >> Model weights saved in ./outputs/checkpoint-500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 12:46:12,471 >> tokenizer config file saved in ./outputs/checkpoint-500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 12:46:12,471 >> Special tokens file saved in ./outputs/checkpoint-500/special_tokens_map.json\n", + " 19% 1000/5205 [05:54<27:38, 2.54it/s]{'loss': 0.0607, 'learning_rate': 4.039385206532181e-05, 'epoch': 0.96}\n", + "[INFO|trainer.py:1917] 2021-07-19 12:49:10,192 >> Saving model checkpoint to ./outputs/checkpoint-1000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 12:49:10,195 >> Configuration saved in ./outputs/checkpoint-1000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 12:49:12,074 >> Model weights saved in ./outputs/checkpoint-1000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 12:49:12,075 >> tokenizer config file saved in ./outputs/checkpoint-1000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 12:49:12,075 >> Special tokens file saved in ./outputs/checkpoint-1000/special_tokens_map.json\n", + "{'loss': 0.0386, 'learning_rate': 3.5590778097982716e-05, 'epoch': 1.44}\n", + " 29% 1500/5205 [08:53<24:04, 2.56it/s][INFO|trainer.py:1917] 2021-07-19 12:52:09,685 >> Saving model checkpoint to ./outputs/checkpoint-1500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 12:52:09,687 >> Configuration saved in ./outputs/checkpoint-1500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 12:52:11,597 >> Model weights saved in ./outputs/checkpoint-1500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 12:52:11,598 >> tokenizer config file saved in ./outputs/checkpoint-1500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 12:52:11,598 >> Special tokens file saved in ./outputs/checkpoint-1500/special_tokens_map.json\n", + " 38% 2000/5205 [11:52<21:04, 2.53it/s]{'loss': 0.0374, 'learning_rate': 3.078770413064361e-05, 'epoch': 1.92}\n", + "[INFO|trainer.py:1917] 2021-07-19 12:55:08,667 >> Saving model checkpoint to ./outputs/checkpoint-2000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 12:55:08,669 >> Configuration saved in ./outputs/checkpoint-2000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 12:55:10,255 >> Model weights saved in ./outputs/checkpoint-2000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 12:55:10,256 >> tokenizer config file saved in ./outputs/checkpoint-2000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 12:55:10,256 >> Special tokens file saved in ./outputs/checkpoint-2000/special_tokens_map.json\n", + " 48% 2500/5205 [14:50<17:47, 2.53it/s]{'loss': 0.0205, 'learning_rate': 2.5984630163304517e-05, 'epoch': 2.4}\n", + " 48% 2500/5205 [14:50<17:47, 2.53it/s][INFO|trainer.py:1917] 2021-07-19 12:58:06,487 >> Saving model checkpoint to ./outputs/checkpoint-2500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 12:58:06,489 >> Configuration saved in ./outputs/checkpoint-2500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 12:58:08,096 >> Model weights saved in ./outputs/checkpoint-2500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 12:58:08,096 >> tokenizer config file saved in ./outputs/checkpoint-2500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 12:58:08,097 >> Special tokens file saved in ./outputs/checkpoint-2500/special_tokens_map.json\n", + " 58% 3000/5205 [17:48<14:28, 2.54it/s]{'loss': 0.018, 'learning_rate': 2.118155619596542e-05, 'epoch': 2.88}\n", + " 58% 3000/5205 [17:49<14:28, 2.54it/s][INFO|trainer.py:1917] 2021-07-19 13:01:04,733 >> Saving model checkpoint to ./outputs/checkpoint-3000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 13:01:04,734 >> Configuration saved in ./outputs/checkpoint-3000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 13:01:06,183 >> Model weights saved in ./outputs/checkpoint-3000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 13:01:06,184 >> tokenizer config file saved in ./outputs/checkpoint-3000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 13:01:06,185 >> Special tokens file saved in ./outputs/checkpoint-3000/special_tokens_map.json\n", + " 67% 3500/5205 [20:46<11:06, 2.56it/s]{'loss': 0.0132, 'learning_rate': 1.6378482228626322e-05, 'epoch': 3.36}\n", + " 67% 3500/5205 [20:46<11:06, 2.56it/s][INFO|trainer.py:1917] 2021-07-19 13:04:02,534 >> Saving model checkpoint to ./outputs/checkpoint-3500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 13:04:02,540 >> Configuration saved in ./outputs/checkpoint-3500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 13:04:03,793 >> Model weights saved in ./outputs/checkpoint-3500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 13:04:03,798 >> tokenizer config file saved in ./outputs/checkpoint-3500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 13:04:03,799 >> Special tokens file saved in ./outputs/checkpoint-3500/special_tokens_map.json\n", + "{'loss': 0.0096, 'learning_rate': 1.1575408261287224e-05, 'epoch': 3.84}\n", + " 77% 4000/5205 [23:44<07:56, 2.53it/s][INFO|trainer.py:1917] 2021-07-19 13:07:00,237 >> Saving model checkpoint to ./outputs/checkpoint-4000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 13:07:00,238 >> Configuration saved in ./outputs/checkpoint-4000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 13:07:01,487 >> Model weights saved in ./outputs/checkpoint-4000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 13:07:01,488 >> tokenizer config file saved in ./outputs/checkpoint-4000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 13:07:01,513 >> Special tokens file saved in ./outputs/checkpoint-4000/special_tokens_map.json\n", + " 86% 4500/5205 [26:42<04:36, 2.55it/s]{'loss': 0.0072, 'learning_rate': 6.7723342939481265e-06, 'epoch': 4.32}\n", + " 86% 4500/5205 [26:42<04:36, 2.55it/s][INFO|trainer.py:1917] 2021-07-19 13:09:57,873 >> Saving model checkpoint to ./outputs/checkpoint-4500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 13:09:57,875 >> Configuration saved in ./outputs/checkpoint-4500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 13:09:59,182 >> Model weights saved in ./outputs/checkpoint-4500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 13:09:59,183 >> tokenizer config file saved in ./outputs/checkpoint-4500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 13:09:59,184 >> Special tokens file saved in ./outputs/checkpoint-4500/special_tokens_map.json\n", + "{'loss': 0.0054, 'learning_rate': 1.96926032660903e-06, 'epoch': 4.8}\n", + " 96% 5000/5205 [29:39<01:19, 2.57it/s][INFO|trainer.py:1917] 2021-07-19 13:12:55,378 >> Saving model checkpoint to ./outputs/checkpoint-5000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 13:12:55,379 >> Configuration saved in ./outputs/checkpoint-5000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 13:12:56,826 >> Model weights saved in ./outputs/checkpoint-5000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 13:12:57,047 >> tokenizer config file saved in ./outputs/checkpoint-5000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 13:12:57,047 >> Special tokens file saved in ./outputs/checkpoint-5000/special_tokens_map.json\n", + "100% 5205/5205 [30:55<00:00, 3.24it/s][INFO|trainer.py:1358] 2021-07-19 13:14:10,972 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "{'train_runtime': 1858.2897, 'train_samples_per_second': 22.397, 'train_steps_per_second': 2.801, 'train_loss': 0.030922601500116324, 'epoch': 5.0}\n", + "100% 5205/5205 [30:55<00:00, 2.81it/s]\n", + "[INFO|trainer.py:1917] 2021-07-19 13:14:10,976 >> Saving model checkpoint to ./outputs\n", + "[INFO|configuration_utils.py:379] 2021-07-19 13:14:10,978 >> Configuration saved in ./outputs/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 13:14:12,231 >> Model weights saved in ./outputs/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 13:14:12,232 >> tokenizer config file saved in ./outputs/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 13:14:12,233 >> Special tokens file saved in ./outputs/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 5.0\n", + " train_loss = 0.0309\n", + " train_runtime = 0:30:58.28\n", + " train_samples = 8324\n", + " train_samples_per_second = 22.397\n", + " train_steps_per_second = 2.801\n", + "07/19/2021 13:14:12 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:522] 2021-07-19 13:14:12,284 >> The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, pos_tags, tokens, ner_tags.\n", + "[INFO|trainer.py:2163] 2021-07-19 13:14:12,470 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2165] 2021-07-19 13:14:12,470 >> Num examples = 1916\n", + "[INFO|trainer.py:2168] 2021-07-19 13:14:12,470 >> Batch size = 8\n", + "100% 240/240 [00:27<00:00, 8.72it/s]\n", + "***** eval metrics *****\n", + " epoch = 5.0\n", + " eval_accuracy = 0.9783\n", + " eval_f1 = 0.8579\n", + " eval_loss = 0.136\n", + " eval_precision = 0.8544\n", + " eval_recall = 0.8614\n", + " eval_runtime = 0:00:27.66\n", + " eval_samples = 1916\n", + " eval_samples_per_second = 69.269\n", + " eval_steps_per_second = 8.677\n", + "\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish, PID 817\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Program ended successfully.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find user logs for this run at: /content/wandb/run-20210719_124312-3afdjnxp/logs/debug.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find internal logs for this run at: /content/wandb/run-20210719_124312-3afdjnxp/logs/debug-internal.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.0054\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 5205\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime 1887\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp 1626700480\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step 11\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime 1858.2897\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second 22.397\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second 2.801\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos 1.087585926764544e+16\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss 0.03092\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 0.13604\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision 0.85438\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall 0.86144\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 0.85789\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy 0.97827\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 27.6603\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 69.269\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 8.677\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▅▃▃▂▂▂▁▁▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate █▇▆▆▅▄▃▃▂▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step ▁▂▂▃▄▄▅▅▆▇▇█\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced \u001b[33m./outputs\u001b[0m: \u001b[34mhttps://wandb.ai/versae/bertin-eval/runs/3afdjnxp\u001b[0m\n", + "2021-07-19 13:14:52.452814: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "07/19/2021 13:14:55 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "07/19/2021 13:14:55 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_find_unused_parameters=None,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=False,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_steps=500,\n", + "evaluation_strategy=IntervalStrategy.NO,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "gradient_accumulation_steps=1,\n", + "greater_is_better=None,\n", + "group_by_length=False,\n", + "ignore_data_skip=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=5e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=False,\n", + "local_rank=-1,\n", + "log_level=-1,\n", + "log_level_replica=-1,\n", + "log_on_each_node=True,\n", + "logging_dir=./outputs/runs/Jul19_13-14-55_eab5184de9ff,\n", + "logging_first_step=False,\n", + "logging_steps=500,\n", + "logging_strategy=IntervalStrategy.STEPS,\n", + "lr_scheduler_type=SchedulerType.LINEAR,\n", + "max_grad_norm=1.0,\n", + "max_steps=-1,\n", + "metric_for_best_model=None,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=5.0,\n", + "output_dir=./outputs,\n", + "overwrite_output_dir=True,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=outputs,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=None,\n", + "remove_unused_columns=True,\n", + "report_to=['tensorboard', 'wandb'],\n", + "resume_from_checkpoint=None,\n", + "run_name=./outputs,\n", + "save_on_each_node=False,\n", + "save_steps=500,\n", + "save_strategy=IntervalStrategy.STEPS,\n", + "save_total_limit=None,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_legacy_prediction_loop=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + ")\n", + "07/19/2021 13:14:56 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 13:14:56 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 13:14:56 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 13:14:56 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 13:14:56 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 13:14:57 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 13:14:57 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 13:14:57 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 13:14:57 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 13:14:57 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 13:14:57 - INFO - datasets.info - Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 13:14:57 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "07/19/2021 13:14:57 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 13:14:57 - WARNING - datasets.builder - Reusing dataset conll2002 (/root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5)\n", + "07/19/2021 13:14:57 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "100% 3/3 [00:00<00:00, 26.94it/s]\n", + "[INFO|file_utils.py:1624] 2021-07-19 13:14:58,668 >> https://huggingface.co/bert-base-multilingual-cased/resolve/main/config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp99d369nz\n", + "Downloading: 100% 625/625 [00:00<00:00, 479kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 13:14:59,385 >> storing https://huggingface.co/bert-base-multilingual-cased/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/6c4a5d81a58c9791cdf76a09bce1b5abfb9cf958aebada51200f4515403e5d08.0fe59f3f4f1335dadeb4bce8b8146199d9083512b50d07323c1c319f96df450c\n", + "[INFO|file_utils.py:1636] 2021-07-19 13:14:59,385 >> creating metadata file for /root/.cache/huggingface/transformers/6c4a5d81a58c9791cdf76a09bce1b5abfb9cf958aebada51200f4515403e5d08.0fe59f3f4f1335dadeb4bce8b8146199d9083512b50d07323c1c319f96df450c\n", + "[INFO|configuration_utils.py:545] 2021-07-19 13:14:59,386 >> loading configuration file https://huggingface.co/bert-base-multilingual-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/6c4a5d81a58c9791cdf76a09bce1b5abfb9cf958aebada51200f4515403e5d08.0fe59f3f4f1335dadeb4bce8b8146199d9083512b50d07323c1c319f96df450c\n", + "[INFO|configuration_utils.py:581] 2021-07-19 13:14:59,387 >> Model config BertConfig {\n", + " \"architectures\": [\n", + " \"BertForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"directionality\": \"bidi\",\n", + " \"finetuning_task\": \"ner\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"id2label\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8\n", + " },\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"label2id\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8\n", + " },\n", + " \"layer_norm_eps\": 1e-12,\n", + " \"max_position_embeddings\": 512,\n", + " \"model_type\": \"bert\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 0,\n", + " \"pooler_fc_size\": 768,\n", + " \"pooler_num_attention_heads\": 12,\n", + " \"pooler_num_fc_layers\": 3,\n", + " \"pooler_size_per_head\": 128,\n", + " \"pooler_type\": \"first_token_transform\",\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 2,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 119547\n", + "}\n", + "\n", + "[INFO|file_utils.py:1624] 2021-07-19 13:15:00,367 >> https://huggingface.co/bert-base-multilingual-cased/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmplitezxht\n", + "Downloading: 100% 29.0/29.0 [00:00<00:00, 21.6kB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 13:15:01,085 >> storing https://huggingface.co/bert-base-multilingual-cased/resolve/main/tokenizer_config.json in cache at /root/.cache/huggingface/transformers/f55e7a2ad4f8d0fff2733b3f79777e1e99247f2e4583703e92ce74453af8c235.ec5c189f89475aac7d8cbd243960a0655cfadc3d0474da8ff2ed0bf1699c2a5f\n", + "[INFO|file_utils.py:1636] 2021-07-19 13:15:01,085 >> creating metadata file for /root/.cache/huggingface/transformers/f55e7a2ad4f8d0fff2733b3f79777e1e99247f2e4583703e92ce74453af8c235.ec5c189f89475aac7d8cbd243960a0655cfadc3d0474da8ff2ed0bf1699c2a5f\n", + "[INFO|configuration_utils.py:545] 2021-07-19 13:15:01,804 >> loading configuration file https://huggingface.co/bert-base-multilingual-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/6c4a5d81a58c9791cdf76a09bce1b5abfb9cf958aebada51200f4515403e5d08.0fe59f3f4f1335dadeb4bce8b8146199d9083512b50d07323c1c319f96df450c\n", + "[INFO|configuration_utils.py:581] 2021-07-19 13:15:01,804 >> Model config BertConfig {\n", + " \"architectures\": [\n", + " \"BertForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"directionality\": \"bidi\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"layer_norm_eps\": 1e-12,\n", + " \"max_position_embeddings\": 512,\n", + " \"model_type\": \"bert\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 0,\n", + " \"pooler_fc_size\": 768,\n", + " \"pooler_num_attention_heads\": 12,\n", + " \"pooler_num_fc_layers\": 3,\n", + " \"pooler_size_per_head\": 128,\n", + " \"pooler_type\": \"first_token_transform\",\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 2,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 119547\n", + "}\n", + "\n", + "[INFO|file_utils.py:1624] 2021-07-19 13:15:02,527 >> https://huggingface.co/bert-base-multilingual-cased/resolve/main/vocab.txt not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp3mdjv62p\n", + "Downloading: 100% 996k/996k [00:00<00:00, 1.42MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 13:15:03,952 >> storing https://huggingface.co/bert-base-multilingual-cased/resolve/main/vocab.txt in cache at /root/.cache/huggingface/transformers/eff018e45de5364a8368df1f2df3461d506e2a111e9dd50af1fae061cd460ead.6c5b6600e968f4b5e08c86d8891ea99e51537fc2bf251435fb46922e8f7a7b29\n", + "[INFO|file_utils.py:1636] 2021-07-19 13:15:03,952 >> creating metadata file for /root/.cache/huggingface/transformers/eff018e45de5364a8368df1f2df3461d506e2a111e9dd50af1fae061cd460ead.6c5b6600e968f4b5e08c86d8891ea99e51537fc2bf251435fb46922e8f7a7b29\n", + "[INFO|file_utils.py:1624] 2021-07-19 13:15:04,670 >> https://huggingface.co/bert-base-multilingual-cased/resolve/main/tokenizer.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpffm9tmvf\n", + "Downloading: 100% 1.96M/1.96M [00:00<00:00, 2.75MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 13:15:06,118 >> storing https://huggingface.co/bert-base-multilingual-cased/resolve/main/tokenizer.json in cache at /root/.cache/huggingface/transformers/46880f3b0081fda494a4e15b05787692aa4c1e21e0ff2428ba8b14d4eda0784d.b33e51591f94f17c238ee9b1fac75b96ff2678cbaed6e108feadb3449d18dc24\n", + "[INFO|file_utils.py:1636] 2021-07-19 13:15:06,119 >> creating metadata file for /root/.cache/huggingface/transformers/46880f3b0081fda494a4e15b05787692aa4c1e21e0ff2428ba8b14d4eda0784d.b33e51591f94f17c238ee9b1fac75b96ff2678cbaed6e108feadb3449d18dc24\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 13:15:08,276 >> loading file https://huggingface.co/bert-base-multilingual-cased/resolve/main/vocab.txt from cache at /root/.cache/huggingface/transformers/eff018e45de5364a8368df1f2df3461d506e2a111e9dd50af1fae061cd460ead.6c5b6600e968f4b5e08c86d8891ea99e51537fc2bf251435fb46922e8f7a7b29\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 13:15:08,276 >> loading file https://huggingface.co/bert-base-multilingual-cased/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/46880f3b0081fda494a4e15b05787692aa4c1e21e0ff2428ba8b14d4eda0784d.b33e51591f94f17c238ee9b1fac75b96ff2678cbaed6e108feadb3449d18dc24\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 13:15:08,276 >> loading file https://huggingface.co/bert-base-multilingual-cased/resolve/main/added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 13:15:08,276 >> loading file https://huggingface.co/bert-base-multilingual-cased/resolve/main/special_tokens_map.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 13:15:08,276 >> loading file https://huggingface.co/bert-base-multilingual-cased/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/f55e7a2ad4f8d0fff2733b3f79777e1e99247f2e4583703e92ce74453af8c235.ec5c189f89475aac7d8cbd243960a0655cfadc3d0474da8ff2ed0bf1699c2a5f\n", + "[INFO|configuration_utils.py:545] 2021-07-19 13:15:08,996 >> loading configuration file https://huggingface.co/bert-base-multilingual-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/6c4a5d81a58c9791cdf76a09bce1b5abfb9cf958aebada51200f4515403e5d08.0fe59f3f4f1335dadeb4bce8b8146199d9083512b50d07323c1c319f96df450c\n", + "[INFO|configuration_utils.py:581] 2021-07-19 13:15:08,997 >> Model config BertConfig {\n", + " \"architectures\": [\n", + " \"BertForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"directionality\": \"bidi\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"layer_norm_eps\": 1e-12,\n", + " \"max_position_embeddings\": 512,\n", + " \"model_type\": \"bert\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 0,\n", + " \"pooler_fc_size\": 768,\n", + " \"pooler_num_attention_heads\": 12,\n", + " \"pooler_num_fc_layers\": 3,\n", + " \"pooler_size_per_head\": 128,\n", + " \"pooler_type\": \"first_token_transform\",\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 2,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 119547\n", + "}\n", + "\n", + "[INFO|file_utils.py:1624] 2021-07-19 13:15:09,847 >> https://huggingface.co/bert-base-multilingual-cased/resolve/main/pytorch_model.bin not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpjp03zrmf\n", + "Downloading: 100% 714M/714M [00:15<00:00, 44.8MB/s]\n", + "[INFO|file_utils.py:1628] 2021-07-19 13:15:26,143 >> storing https://huggingface.co/bert-base-multilingual-cased/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/0a3fd51713dcbb4def175c7f85bddc995d5976ce1dde327f99104e4d33069f17.aa7be4c79d76f4066d9b354496ea477c9ee39c5d889156dd1efb680643c2b052\n", + "[INFO|file_utils.py:1636] 2021-07-19 13:15:26,143 >> creating metadata file for /root/.cache/huggingface/transformers/0a3fd51713dcbb4def175c7f85bddc995d5976ce1dde327f99104e4d33069f17.aa7be4c79d76f4066d9b354496ea477c9ee39c5d889156dd1efb680643c2b052\n", + "[INFO|modeling_utils.py:1271] 2021-07-19 13:15:26,144 >> loading weights file https://huggingface.co/bert-base-multilingual-cased/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/0a3fd51713dcbb4def175c7f85bddc995d5976ce1dde327f99104e4d33069f17.aa7be4c79d76f4066d9b354496ea477c9ee39c5d889156dd1efb680643c2b052\n", + "[WARNING|modeling_utils.py:1502] 2021-07-19 13:15:28,640 >> Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForTokenClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']\n", + "- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:1513] 2021-07-19 13:15:28,640 >> Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "Running tokenizer on train dataset: 0% 0/9 [00:00> The following columns in the training set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: ner_tags, pos_tags, tokens, id.\n", + "[INFO|trainer.py:1162] 2021-07-19 13:15:41,264 >> ***** Running training *****\n", + "[INFO|trainer.py:1163] 2021-07-19 13:15:41,264 >> Num examples = 8324\n", + "[INFO|trainer.py:1164] 2021-07-19 13:15:41,264 >> Num Epochs = 5\n", + "[INFO|trainer.py:1165] 2021-07-19 13:15:41,264 >> Instantaneous batch size per device = 8\n", + "[INFO|trainer.py:1166] 2021-07-19 13:15:41,264 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "[INFO|trainer.py:1167] 2021-07-19 13:15:41,264 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1168] 2021-07-19 13:15:41,264 >> Total optimization steps = 5205\n", + "[INFO|integrations.py:417] 2021-07-19 13:15:41,282 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mversae\u001b[0m (use `wandb login --relogin` to force relogin)\n", + "2021-07-19 13:15:42.989489: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m./outputs\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval/runs/5eemjhh0\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in /content/wandb/run-20210719_131541-5eemjhh0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run `wandb offline` to turn off syncing.\n", + "\n", + "{'loss': 0.1207, 'learning_rate': 4.5196926032660905e-05, 'epoch': 0.48}\n", + " 10% 500/5205 [02:59<32:18, 2.43it/s][INFO|trainer.py:1917] 2021-07-19 13:18:43,561 >> Saving model checkpoint to ./outputs/checkpoint-500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 13:18:43,562 >> Configuration saved in ./outputs/checkpoint-500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 13:18:46,697 >> Model weights saved in ./outputs/checkpoint-500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 13:18:46,698 >> tokenizer config file saved in ./outputs/checkpoint-500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 13:18:46,699 >> Special tokens file saved in ./outputs/checkpoint-500/special_tokens_map.json\n", + "{'loss': 0.0692, 'learning_rate': 4.039385206532181e-05, 'epoch': 0.96}\n", + " 19% 1000/5205 [06:07<28:45, 2.44it/s][INFO|trainer.py:1917] 2021-07-19 13:21:51,942 >> Saving model checkpoint to ./outputs/checkpoint-1000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 13:21:51,944 >> Configuration saved in ./outputs/checkpoint-1000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 13:21:54,416 >> Model weights saved in ./outputs/checkpoint-1000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 13:21:54,417 >> tokenizer config file saved in ./outputs/checkpoint-1000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 13:21:54,417 >> Special tokens file saved in ./outputs/checkpoint-1000/special_tokens_map.json\n", + "{'loss': 0.0436, 'learning_rate': 3.5590778097982716e-05, 'epoch': 1.44}\n", + " 29% 1500/5205 [09:15<25:24, 2.43it/s][INFO|trainer.py:1917] 2021-07-19 13:24:59,297 >> Saving model checkpoint to ./outputs/checkpoint-1500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 13:24:59,302 >> Configuration saved in ./outputs/checkpoint-1500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 13:25:01,808 >> Model weights saved in ./outputs/checkpoint-1500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 13:25:01,809 >> tokenizer config file saved in ./outputs/checkpoint-1500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 13:25:01,809 >> Special tokens file saved in ./outputs/checkpoint-1500/special_tokens_map.json\n", + " 38% 2000/5205 [12:24<21:37, 2.47it/s]{'loss': 0.0476, 'learning_rate': 3.078770413064361e-05, 'epoch': 1.92}\n", + "[INFO|trainer.py:1917] 2021-07-19 13:28:09,253 >> Saving model checkpoint to ./outputs/checkpoint-2000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 13:28:09,259 >> Configuration saved in ./outputs/checkpoint-2000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 13:28:11,908 >> Model weights saved in ./outputs/checkpoint-2000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 13:28:11,909 >> tokenizer config file saved in ./outputs/checkpoint-2000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 13:28:11,909 >> Special tokens file saved in ./outputs/checkpoint-2000/special_tokens_map.json\n", + " 48% 2500/5205 [15:31<18:11, 2.48it/s]{'loss': 0.0296, 'learning_rate': 2.5984630163304517e-05, 'epoch': 2.4}\n", + "[INFO|trainer.py:1917] 2021-07-19 13:31:16,237 >> Saving model checkpoint to ./outputs/checkpoint-2500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 13:31:16,238 >> Configuration saved in ./outputs/checkpoint-2500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 13:31:18,854 >> Model weights saved in ./outputs/checkpoint-2500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 13:31:18,854 >> tokenizer config file saved in ./outputs/checkpoint-2500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 13:31:18,855 >> Special tokens file saved in ./outputs/checkpoint-2500/special_tokens_map.json\n", + " 58% 3000/5205 [18:38<14:56, 2.46it/s]{'loss': 0.0239, 'learning_rate': 2.118155619596542e-05, 'epoch': 2.88}\n", + " 58% 3000/5205 [18:38<14:56, 2.46it/s][INFO|trainer.py:1917] 2021-07-19 13:34:23,058 >> Saving model checkpoint to ./outputs/checkpoint-3000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 13:34:23,059 >> Configuration saved in ./outputs/checkpoint-3000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 13:34:25,563 >> Model weights saved in ./outputs/checkpoint-3000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 13:34:25,563 >> tokenizer config file saved in ./outputs/checkpoint-3000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 13:34:25,564 >> Special tokens file saved in ./outputs/checkpoint-3000/special_tokens_map.json\n", + " 67% 3500/5205 [21:45<11:32, 2.46it/s]{'loss': 0.02, 'learning_rate': 1.6378482228626322e-05, 'epoch': 3.36}\n", + "[INFO|trainer.py:1917] 2021-07-19 13:37:29,626 >> Saving model checkpoint to ./outputs/checkpoint-3500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 13:37:29,628 >> Configuration saved in ./outputs/checkpoint-3500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 13:37:32,133 >> Model weights saved in ./outputs/checkpoint-3500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 13:37:32,134 >> tokenizer config file saved in ./outputs/checkpoint-3500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 13:37:32,134 >> Special tokens file saved in ./outputs/checkpoint-3500/special_tokens_map.json\n", + "{'loss': 0.014, 'learning_rate': 1.1575408261287224e-05, 'epoch': 3.84}\n", + " 77% 4000/5205 [24:51<08:10, 2.46it/s][INFO|trainer.py:1917] 2021-07-19 13:40:36,098 >> Saving model checkpoint to ./outputs/checkpoint-4000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 13:40:36,102 >> Configuration saved in ./outputs/checkpoint-4000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 13:40:38,713 >> Model weights saved in ./outputs/checkpoint-4000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 13:40:38,714 >> tokenizer config file saved in ./outputs/checkpoint-4000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 13:40:38,714 >> Special tokens file saved in ./outputs/checkpoint-4000/special_tokens_map.json\n", + " 86% 4500/5205 [27:58<04:45, 2.47it/s]{'loss': 0.0102, 'learning_rate': 6.7723342939481265e-06, 'epoch': 4.32}\n", + "[INFO|trainer.py:1917] 2021-07-19 13:43:42,728 >> Saving model checkpoint to ./outputs/checkpoint-4500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 13:43:42,730 >> Configuration saved in ./outputs/checkpoint-4500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 13:43:45,403 >> Model weights saved in ./outputs/checkpoint-4500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 13:43:45,403 >> tokenizer config file saved in ./outputs/checkpoint-4500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 13:43:45,435 >> Special tokens file saved in ./outputs/checkpoint-4500/special_tokens_map.json\n", + " 96% 5000/5205 [31:05<01:23, 2.46it/s]{'loss': 0.0091, 'learning_rate': 1.96926032660903e-06, 'epoch': 4.8}\n", + "[INFO|trainer.py:1917] 2021-07-19 13:46:49,438 >> Saving model checkpoint to ./outputs/checkpoint-5000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 13:46:49,445 >> Configuration saved in ./outputs/checkpoint-5000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 13:46:52,151 >> Model weights saved in ./outputs/checkpoint-5000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 13:46:52,152 >> tokenizer config file saved in ./outputs/checkpoint-5000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 13:46:52,153 >> Special tokens file saved in ./outputs/checkpoint-5000/special_tokens_map.json\n", + "100% 5205/5205 [32:29<00:00, 3.12it/s][INFO|trainer.py:1358] 2021-07-19 13:48:13,361 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "{'train_runtime': 1952.0967, 'train_samples_per_second': 21.321, 'train_steps_per_second': 2.666, 'train_loss': 0.03754559657988241, 'epoch': 5.0}\n", + "100% 5205/5205 [32:29<00:00, 2.67it/s]\n", + "[INFO|trainer.py:1917] 2021-07-19 13:48:13,365 >> Saving model checkpoint to ./outputs\n", + "[INFO|configuration_utils.py:379] 2021-07-19 13:48:13,367 >> Configuration saved in ./outputs/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 13:48:16,181 >> Model weights saved in ./outputs/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 13:48:16,182 >> tokenizer config file saved in ./outputs/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 13:48:16,183 >> Special tokens file saved in ./outputs/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 5.0\n", + " train_loss = 0.0375\n", + " train_runtime = 0:32:32.09\n", + " train_samples = 8324\n", + " train_samples_per_second = 21.321\n", + " train_steps_per_second = 2.666\n", + "[INFO|trainer.py:522] 2021-07-19 13:48:16,519 >> The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: ner_tags, pos_tags, tokens, id.\n", + "07/19/2021 13:48:16 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:2163] 2021-07-19 13:48:16,542 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2165] 2021-07-19 13:48:16,542 >> Num examples = 1916\n", + "[INFO|trainer.py:2168] 2021-07-19 13:48:16,542 >> Batch size = 8\n", + "100% 240/240 [00:27<00:00, 8.73it/s]\n", + "***** eval metrics *****\n", + " epoch = 5.0\n", + " eval_accuracy = 0.9779\n", + " eval_f1 = 0.8539\n", + " eval_loss = 0.1326\n", + " eval_precision = 0.8522\n", + " eval_recall = 0.8557\n", + " eval_runtime = 0:00:27.61\n", + " eval_samples = 1916\n", + " eval_samples_per_second = 69.373\n", + " eval_steps_per_second = 8.69\n", + "\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish, PID 886\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Program ended successfully.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find user logs for this run at: /content/wandb/run-20210719_131541-5eemjhh0/logs/debug.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find internal logs for this run at: /content/wandb/run-20210719_131541-5eemjhh0/logs/debug-internal.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.0091\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 5205\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime 1983\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp 1626702524\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step 11\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime 1952.0967\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second 21.321\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second 2.666\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos 1.087585926764544e+16\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss 0.03755\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 0.13262\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision 0.85217\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall 0.8557\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 0.85393\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy 0.97785\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 27.619\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 69.373\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 8.69\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▅▃▃▂▂▂▁▁▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate █▇▆▆▅▄▃▃▂▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step ▁▂▂▃▄▄▅▅▆▇▇█\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced \u001b[33m./outputs\u001b[0m: \u001b[34mhttps://wandb.ai/versae/bertin-eval/runs/5eemjhh0\u001b[0m\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VuCKhYfI1P06" + }, + "source": [ + "## POS" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "j-JAvrHrkdCo", + "outputId": "27535540-2a52-45b6-d01e-6c12969a203e" + }, + "source": [ + "# !wget https://raw.githubusercontent.com/huggingface/transformers/master/examples/pytorch/token-classification/run_ner.py\n", + "for model in models:\n", + " !WANDB_PROJECT=bertin-eval TOKENIZERS_PARALLELISM=false CUDA_LAUNCH_BLOCKING=1 python run_ner.py \\\n", + " --model_name_or_path $model \\\n", + " --task_name pos \\\n", + " --dataset_name conll2002 \\\n", + " --dataset_config_name es \\\n", + " --output_dir ./outputs \\\n", + " --overwrite_output_dir \\\n", + " --pad_to_max_length \\\n", + " --num_train_epochs 5 \\\n", + " --do_train \\\n", + " --do_eval" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "\u001b[1;30;43mStreaming output truncated to the last 5000 lines.\u001b[0m\n", + " \"9\": 9,\n", + " \"10\": 10,\n", + " \"11\": 11,\n", + " \"12\": 12,\n", + " \"13\": 13,\n", + " \"14\": 14,\n", + " \"15\": 15,\n", + " \"16\": 16,\n", + " \"17\": 17,\n", + " \"18\": 18,\n", + " \"19\": 19,\n", + " \"20\": 20,\n", + " \"21\": 21,\n", + " \"22\": 22,\n", + " \"23\": 23,\n", + " \"24\": 24,\n", + " \"25\": 25,\n", + " \"26\": 26,\n", + " \"27\": 27,\n", + " \"28\": 28,\n", + " \"29\": 29,\n", + " \"30\": 30,\n", + " \"31\": 31,\n", + " \"32\": 32,\n", + " \"33\": 33,\n", + " \"34\": 34,\n", + " \"35\": 35,\n", + " \"36\": 36,\n", + " \"37\": 37,\n", + " \"38\": 38,\n", + " \"39\": 39,\n", + " \"40\": 40,\n", + " \"41\": 41,\n", + " \"42\": 42,\n", + " \"43\": 43,\n", + " \"44\": 44,\n", + " \"45\": 45,\n", + " \"46\": 46,\n", + " \"47\": 47,\n", + " \"48\": 48,\n", + " \"49\": 49,\n", + " \"50\": 50,\n", + " \"51\": 51,\n", + " \"52\": 52,\n", + " \"53\": 53,\n", + " \"54\": 54,\n", + " \"55\": 55,\n", + " \"56\": 56,\n", + " \"57\": 57,\n", + " \"58\": 58,\n", + " \"59\": 59\n", + " },\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"label2id\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8,\n", + " \"9\": 9,\n", + " \"10\": 10,\n", + " \"11\": 11,\n", + " \"12\": 12,\n", + " \"13\": 13,\n", + " \"14\": 14,\n", + " \"15\": 15,\n", + " \"16\": 16,\n", + " \"17\": 17,\n", + " \"18\": 18,\n", + " \"19\": 19,\n", + " \"20\": 20,\n", + " \"21\": 21,\n", + " \"22\": 22,\n", + " \"23\": 23,\n", + " \"24\": 24,\n", + " \"25\": 25,\n", + " \"26\": 26,\n", + " \"27\": 27,\n", + " \"28\": 28,\n", + " \"29\": 29,\n", + " \"30\": 30,\n", + " \"31\": 31,\n", + " \"32\": 32,\n", + " \"33\": 33,\n", + " \"34\": 34,\n", + " \"35\": 35,\n", + " \"36\": 36,\n", + " \"37\": 37,\n", + " \"38\": 38,\n", + " \"39\": 39,\n", + " \"40\": 40,\n", + " \"41\": 41,\n", + " \"42\": 42,\n", + " \"43\": 43,\n", + " \"44\": 44,\n", + " \"45\": 45,\n", + " \"46\": 46,\n", + " \"47\": 47,\n", + " \"48\": 48,\n", + " \"49\": 49,\n", + " \"50\": 50,\n", + " \"51\": 51,\n", + " \"52\": 52,\n", + " \"53\": 53,\n", + " \"54\": 54,\n", + " \"55\": 55,\n", + " \"56\": 56,\n", + " \"57\": 57,\n", + " \"58\": 58,\n", + " \"59\": 59\n", + " },\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50265\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 14:21:32,133 >> loading file https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/c1ba808baa4a9c0f3062f1881d448087c30a1443644365bd41cf366491ab4063.a80f232f572026f92499b14999a8ed4e044e04cf3d01b9f2be298c98e78e8498\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 14:21:32,134 >> loading file https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/17ffd9604d64364336252e5a3859c3c55be07c457328ab5fc37e4aaf39913d28.a0dfc41f9d0f03a56ba7a5401d770f6e43071045a0bd79073380d408d17a0d92\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 14:21:32,134 >> loading file https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/59e3c1ec6ec0fe2653924dcd348a763dd43f51d8eae6ab758e2d962ec7c14d5e.2a5dc806edc00ab3a329cb22b9973596ca75b24ba0e5e4963bf1308de7237a3d\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 14:21:32,134 >> loading file https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 14:21:32,134 >> loading file https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/39ddc268aab2655adb602f93d771480d4db157c1b6fae9a5ae9fc2112c645a69.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 14:21:32,134 >> loading file https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/a9d7a6740959c8c347993f62fbd5620bffa2d10c35c2e579a2ecec181299c9a1.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|modeling_utils.py:1271] 2021-07-19 14:21:32,975 >> loading weights file https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/6dd5d03f2c36b42a305cf20636d35935ad2d998d2ab0588b28eeed0fc164db43.5b6b77533b091cc9204533514d844abfe875ebba66e044b251306c4228bd3221\n", + "[WARNING|modeling_utils.py:1502] 2021-07-19 14:21:40,934 >> Some weights of the model checkpoint at bertin-project/bertin-base-random-exp-512seqlen were not used when initializing RobertaForTokenClassification: ['lm_head.dense.bias', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias']\n", + "- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:1513] 2021-07-19 14:21:40,934 >> Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at bertin-project/bertin-base-random-exp-512seqlen and are newly initialized: ['classifier.bias', 'classifier.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "Running tokenizer on train dataset: 0% 0/9 [00:00> The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: pos_tags, ner_tags, id, tokens.\n", + "[INFO|trainer.py:1162] 2021-07-19 14:21:52,654 >> ***** Running training *****\n", + "[INFO|trainer.py:1163] 2021-07-19 14:21:52,654 >> Num examples = 8324\n", + "[INFO|trainer.py:1164] 2021-07-19 14:21:52,654 >> Num Epochs = 5\n", + "[INFO|trainer.py:1165] 2021-07-19 14:21:52,654 >> Instantaneous batch size per device = 8\n", + "[INFO|trainer.py:1166] 2021-07-19 14:21:52,654 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "[INFO|trainer.py:1167] 2021-07-19 14:21:52,654 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1168] 2021-07-19 14:21:52,654 >> Total optimization steps = 5205\n", + "[INFO|integrations.py:417] 2021-07-19 14:21:52,674 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mversae\u001b[0m (use `wandb login --relogin` to force relogin)\n", + "2021-07-19 14:21:54.344552: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m./outputs\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval/runs/2r11lsea\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in /content/wandb/run-20210719_142152-2r11lsea\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run `wandb offline` to turn off syncing.\n", + "\n", + "{'loss': 0.3573, 'learning_rate': 4.5196926032660905e-05, 'epoch': 0.48}\n", + " 10% 500/5205 [02:51<30:45, 2.55it/s][INFO|trainer.py:1917] 2021-07-19 14:24:47,008 >> Saving model checkpoint to ./outputs/checkpoint-500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 14:24:47,015 >> Configuration saved in ./outputs/checkpoint-500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 14:24:48,536 >> Model weights saved in ./outputs/checkpoint-500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 14:24:48,537 >> tokenizer config file saved in ./outputs/checkpoint-500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 14:24:48,537 >> Special tokens file saved in ./outputs/checkpoint-500/special_tokens_map.json\n", + " 19% 1000/5205 [05:48<27:28, 2.55it/s]{'loss': 0.1234, 'learning_rate': 4.039385206532181e-05, 'epoch': 0.96}\n", + " 19% 1000/5205 [05:48<27:28, 2.55it/s][INFO|trainer.py:1917] 2021-07-19 14:27:43,799 >> Saving model checkpoint to ./outputs/checkpoint-1000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 14:27:43,801 >> Configuration saved in ./outputs/checkpoint-1000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 14:27:45,270 >> Model weights saved in ./outputs/checkpoint-1000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 14:27:45,270 >> tokenizer config file saved in ./outputs/checkpoint-1000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 14:27:45,271 >> Special tokens file saved in ./outputs/checkpoint-1000/special_tokens_map.json\n", + " 29% 1500/5205 [08:43<24:06, 2.56it/s]{'loss': 0.0833, 'learning_rate': 3.5590778097982716e-05, 'epoch': 1.44}\n", + "[INFO|trainer.py:1917] 2021-07-19 14:30:39,564 >> Saving model checkpoint to ./outputs/checkpoint-1500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 14:30:39,571 >> Configuration saved in ./outputs/checkpoint-1500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 14:30:41,197 >> Model weights saved in ./outputs/checkpoint-1500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 14:30:41,198 >> tokenizer config file saved in ./outputs/checkpoint-1500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 14:30:41,198 >> Special tokens file saved in ./outputs/checkpoint-1500/special_tokens_map.json\n", + "{'loss': 0.0773, 'learning_rate': 3.078770413064361e-05, 'epoch': 1.92}\n", + " 38% 2000/5205 [11:40<20:48, 2.57it/s][INFO|trainer.py:1917] 2021-07-19 14:33:36,165 >> Saving model checkpoint to ./outputs/checkpoint-2000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 14:33:36,172 >> Configuration saved in ./outputs/checkpoint-2000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 14:33:37,547 >> Model weights saved in ./outputs/checkpoint-2000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 14:33:37,548 >> tokenizer config file saved in ./outputs/checkpoint-2000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 14:33:37,548 >> Special tokens file saved in ./outputs/checkpoint-2000/special_tokens_map.json\n", + "{'loss': 0.0542, 'learning_rate': 2.5984630163304517e-05, 'epoch': 2.4}\n", + " 48% 2500/5205 [14:36<17:20, 2.60it/s][INFO|trainer.py:1917] 2021-07-19 14:36:31,893 >> Saving model checkpoint to ./outputs/checkpoint-2500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 14:36:31,895 >> Configuration saved in ./outputs/checkpoint-2500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 14:36:33,494 >> Model weights saved in ./outputs/checkpoint-2500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 14:36:33,494 >> tokenizer config file saved in ./outputs/checkpoint-2500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 14:36:33,495 >> Special tokens file saved in ./outputs/checkpoint-2500/special_tokens_map.json\n", + "{'loss': 0.0476, 'learning_rate': 2.118155619596542e-05, 'epoch': 2.88}\n", + " 58% 3000/5205 [17:32<14:22, 2.56it/s][INFO|trainer.py:1917] 2021-07-19 14:39:28,468 >> Saving model checkpoint to ./outputs/checkpoint-3000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 14:39:28,470 >> Configuration saved in ./outputs/checkpoint-3000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 14:39:30,159 >> Model weights saved in ./outputs/checkpoint-3000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 14:39:30,160 >> tokenizer config file saved in ./outputs/checkpoint-3000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 14:39:30,160 >> Special tokens file saved in ./outputs/checkpoint-3000/special_tokens_map.json\n", + " 67% 3500/5205 [20:29<10:57, 2.59it/s]{'loss': 0.0331, 'learning_rate': 1.6378482228626322e-05, 'epoch': 3.36}\n", + " 67% 3500/5205 [20:29<10:57, 2.59it/s][INFO|trainer.py:1917] 2021-07-19 14:42:24,890 >> Saving model checkpoint to ./outputs/checkpoint-3500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 14:42:24,892 >> Configuration saved in ./outputs/checkpoint-3500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 14:42:26,453 >> Model weights saved in ./outputs/checkpoint-3500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 14:42:26,454 >> tokenizer config file saved in ./outputs/checkpoint-3500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 14:42:26,454 >> Special tokens file saved in ./outputs/checkpoint-3500/special_tokens_map.json\n", + " 77% 4000/5205 [23:25<07:47, 2.58it/s][INFO|trainer.py:1917] 2021-07-19 14:45:20,930 >> Saving model checkpoint to ./outputs/checkpoint-4000\n", + "{'loss': 0.0289, 'learning_rate': 1.1575408261287224e-05, 'epoch': 3.84}\n", + "[INFO|configuration_utils.py:379] 2021-07-19 14:45:20,932 >> Configuration saved in ./outputs/checkpoint-4000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 14:45:22,454 >> Model weights saved in ./outputs/checkpoint-4000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 14:45:22,454 >> tokenizer config file saved in ./outputs/checkpoint-4000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 14:45:22,455 >> Special tokens file saved in ./outputs/checkpoint-4000/special_tokens_map.json\n", + " 86% 4500/5205 [26:20<04:32, 2.59it/s]{'loss': 0.0224, 'learning_rate': 6.7723342939481265e-06, 'epoch': 4.32}\n", + " 86% 4500/5205 [26:21<04:32, 2.59it/s][INFO|trainer.py:1917] 2021-07-19 14:48:16,636 >> Saving model checkpoint to ./outputs/checkpoint-4500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 14:48:16,638 >> Configuration saved in ./outputs/checkpoint-4500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 14:48:18,221 >> Model weights saved in ./outputs/checkpoint-4500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 14:48:18,222 >> tokenizer config file saved in ./outputs/checkpoint-4500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 14:48:18,222 >> Special tokens file saved in ./outputs/checkpoint-4500/special_tokens_map.json\n", + "{'loss': 0.0174, 'learning_rate': 1.96926032660903e-06, 'epoch': 4.8}\n", + " 96% 5000/5205 [29:16<01:19, 2.59it/s][INFO|trainer.py:1917] 2021-07-19 14:51:12,353 >> Saving model checkpoint to ./outputs/checkpoint-5000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 14:51:12,360 >> Configuration saved in ./outputs/checkpoint-5000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 14:51:13,839 >> Model weights saved in ./outputs/checkpoint-5000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 14:51:13,840 >> tokenizer config file saved in ./outputs/checkpoint-5000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 14:51:13,840 >> Special tokens file saved in ./outputs/checkpoint-5000/special_tokens_map.json\n", + "100% 5205/5205 [30:32<00:00, 3.28it/s][INFO|trainer.py:1358] 2021-07-19 14:52:27,812 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "{'train_runtime': 1835.1582, 'train_samples_per_second': 22.679, 'train_steps_per_second': 2.836, 'train_loss': 0.08193410668203627, 'epoch': 5.0}\n", + "100% 5205/5205 [30:32<00:00, 2.84it/s]\n", + "[INFO|trainer.py:1917] 2021-07-19 14:52:27,819 >> Saving model checkpoint to ./outputs\n", + "[INFO|configuration_utils.py:379] 2021-07-19 14:52:27,821 >> Configuration saved in ./outputs/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 14:52:29,335 >> Model weights saved in ./outputs/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 14:52:29,374 >> tokenizer config file saved in ./outputs/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 14:52:29,375 >> Special tokens file saved in ./outputs/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 5.0\n", + " train_loss = 0.0819\n", + " train_runtime = 0:30:35.15\n", + " train_samples = 8324\n", + " train_samples_per_second = 22.679\n", + " train_steps_per_second = 2.836\n", + "[INFO|trainer.py:522] 2021-07-19 14:52:30,157 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: pos_tags, ner_tags, id, tokens.\n", + "07/19/2021 14:52:30 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:2163] 2021-07-19 14:52:30,225 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2165] 2021-07-19 14:52:30,226 >> Num examples = 1916\n", + "[INFO|trainer.py:2168] 2021-07-19 14:52:30,226 >> Batch size = 8\n", + "100% 239/240 [00:24<00:00, 9.62it/s]/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: NC seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fpa seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fpt seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fc seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Z seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: NP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fp seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fg seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DA seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: AQ seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: SP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PR seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: RG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: CS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: CC seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fe seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fx seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DD seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PD seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: P0 seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMM seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fd seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: AO seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: RN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fat seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Faa seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DE seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fz seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PT seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PX seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DT seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fh seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fs seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSM seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Y seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fia seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fit seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "100% 240/240 [00:28<00:00, 8.50it/s]\n", + "***** eval metrics *****\n", + " epoch = 5.0\n", + " eval_accuracy = 0.9707\n", + " eval_f1 = 0.966\n", + " eval_loss = 0.1262\n", + " eval_precision = 0.9667\n", + " eval_recall = 0.9653\n", + " eval_runtime = 0:00:28.35\n", + " eval_samples = 1916\n", + " eval_samples_per_second = 67.583\n", + " eval_steps_per_second = 8.465\n", + "\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish, PID 1015\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Program ended successfully.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find user logs for this run at: /content/wandb/run-20210719_142152-2r11lsea/logs/debug.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find internal logs for this run at: /content/wandb/run-20210719_142152-2r11lsea/logs/debug-internal.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.0174\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 5205\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime 1865\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp 1626706378\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step 11\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime 1835.1582\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second 22.679\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second 2.836\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos 1.08808736772096e+16\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss 0.08193\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 0.12617\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision 0.96668\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall 0.96534\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 0.96601\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy 0.97066\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 28.3505\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 67.583\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 8.465\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▃▂▂▂▂▁▁▁▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate █▇▆▆▅▄▃▃▂▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step ▁▂▂▃▄▄▅▅▆▇▇█\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced \u001b[33m./outputs\u001b[0m: \u001b[34mhttps://wandb.ai/versae/bertin-eval/runs/2r11lsea\u001b[0m\n", + "2021-07-19 14:53:10.597067: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "07/19/2021 14:53:13 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "07/19/2021 14:53:13 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_find_unused_parameters=None,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=False,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_steps=500,\n", + "evaluation_strategy=IntervalStrategy.NO,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "gradient_accumulation_steps=1,\n", + "greater_is_better=None,\n", + "group_by_length=False,\n", + "ignore_data_skip=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=5e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=False,\n", + "local_rank=-1,\n", + "log_level=-1,\n", + "log_level_replica=-1,\n", + "log_on_each_node=True,\n", + "logging_dir=./outputs/runs/Jul19_14-53-13_eab5184de9ff,\n", + "logging_first_step=False,\n", + "logging_steps=500,\n", + "logging_strategy=IntervalStrategy.STEPS,\n", + "lr_scheduler_type=SchedulerType.LINEAR,\n", + "max_grad_norm=1.0,\n", + "max_steps=-1,\n", + "metric_for_best_model=None,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=5.0,\n", + "output_dir=./outputs,\n", + "overwrite_output_dir=True,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=outputs,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=None,\n", + "remove_unused_columns=True,\n", + "report_to=['tensorboard', 'wandb'],\n", + "resume_from_checkpoint=None,\n", + "run_name=./outputs,\n", + "save_on_each_node=False,\n", + "save_steps=500,\n", + "save_strategy=IntervalStrategy.STEPS,\n", + "save_total_limit=None,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_legacy_prediction_loop=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + ")\n", + "07/19/2021 14:53:15 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 14:53:15 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 14:53:15 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 14:53:15 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 14:53:15 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 14:53:16 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 14:53:16 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 14:53:16 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 14:53:16 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 14:53:16 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 14:53:16 - INFO - datasets.info - Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 14:53:16 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "07/19/2021 14:53:16 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 14:53:16 - WARNING - datasets.builder - Reusing dataset conll2002 (/root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5)\n", + "07/19/2021 14:53:16 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "100% 3/3 [00:00<00:00, 27.40it/s]\n", + "[INFO|configuration_utils.py:545] 2021-07-19 14:53:17,765 >> loading configuration file https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/cbd56d68ce5dcd2626aba4c4b188db63f2ba2c49a604b36e7cdc6e52578ee306.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|configuration_utils.py:581] 2021-07-19 14:53:17,766 >> Model config RobertaConfig {\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"bos_token_id\": 0,\n", + " \"eos_token_id\": 2,\n", + " \"finetuning_task\": \"pos\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"id2label\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8,\n", + " \"9\": 9,\n", + " \"10\": 10,\n", + " \"11\": 11,\n", + " \"12\": 12,\n", + " \"13\": 13,\n", + " \"14\": 14,\n", + " \"15\": 15,\n", + " \"16\": 16,\n", + " \"17\": 17,\n", + " \"18\": 18,\n", + " \"19\": 19,\n", + " \"20\": 20,\n", + " \"21\": 21,\n", + " \"22\": 22,\n", + " \"23\": 23,\n", + " \"24\": 24,\n", + " \"25\": 25,\n", + " \"26\": 26,\n", + " \"27\": 27,\n", + " \"28\": 28,\n", + " \"29\": 29,\n", + " \"30\": 30,\n", + " \"31\": 31,\n", + " \"32\": 32,\n", + " \"33\": 33,\n", + " \"34\": 34,\n", + " \"35\": 35,\n", + " \"36\": 36,\n", + " \"37\": 37,\n", + " \"38\": 38,\n", + " \"39\": 39,\n", + " \"40\": 40,\n", + " \"41\": 41,\n", + " \"42\": 42,\n", + " \"43\": 43,\n", + " \"44\": 44,\n", + " \"45\": 45,\n", + " \"46\": 46,\n", + " \"47\": 47,\n", + " \"48\": 48,\n", + " \"49\": 49,\n", + " \"50\": 50,\n", + " \"51\": 51,\n", + " \"52\": 52,\n", + " \"53\": 53,\n", + " \"54\": 54,\n", + " \"55\": 55,\n", + " \"56\": 56,\n", + " \"57\": 57,\n", + " \"58\": 58,\n", + " \"59\": 59\n", + " },\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"label2id\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8,\n", + " \"9\": 9,\n", + " \"10\": 10,\n", + " \"11\": 11,\n", + " \"12\": 12,\n", + " \"13\": 13,\n", + " \"14\": 14,\n", + " \"15\": 15,\n", + " \"16\": 16,\n", + " \"17\": 17,\n", + " \"18\": 18,\n", + " \"19\": 19,\n", + " \"20\": 20,\n", + " \"21\": 21,\n", + " \"22\": 22,\n", + " \"23\": 23,\n", + " \"24\": 24,\n", + " \"25\": 25,\n", + " \"26\": 26,\n", + " \"27\": 27,\n", + " \"28\": 28,\n", + " \"29\": 29,\n", + " \"30\": 30,\n", + " \"31\": 31,\n", + " \"32\": 32,\n", + " \"33\": 33,\n", + " \"34\": 34,\n", + " \"35\": 35,\n", + " \"36\": 36,\n", + " \"37\": 37,\n", + " \"38\": 38,\n", + " \"39\": 39,\n", + " \"40\": 40,\n", + " \"41\": 41,\n", + " \"42\": 42,\n", + " \"43\": 43,\n", + " \"44\": 44,\n", + " \"45\": 45,\n", + " \"46\": 46,\n", + " \"47\": 47,\n", + " \"48\": 48,\n", + " \"49\": 49,\n", + " \"50\": 50,\n", + " \"51\": 51,\n", + " \"52\": 52,\n", + " \"53\": 53,\n", + " \"54\": 54,\n", + " \"55\": 55,\n", + " \"56\": 56,\n", + " \"57\": 57,\n", + " \"58\": 58,\n", + " \"59\": 59\n", + " },\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50265\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 14:53:22,817 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/dae6454603d0b1d10a2446ffc1a21ccd636b0ca6a4c77a79fb9dfde03f4a51b8.a80f232f572026f92499b14999a8ed4e044e04cf3d01b9f2be298c98e78e8498\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 14:53:22,817 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/2e77e6f778d3fd8875349675d408521ca20c1f1acac2fd57d60ca945d82b926e.a0dfc41f9d0f03a56ba7a5401d770f6e43071045a0bd79073380d408d17a0d92\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 14:53:22,817 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/98a6808b3aa08b6d84e8b30dfa6892d15e9e631eebff8652b37ab29d75a0b98a.2a5dc806edc00ab3a329cb22b9973596ca75b24ba0e5e4963bf1308de7237a3d\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 14:53:22,817 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 14:53:22,817 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/1f5a4bde3e85f3c7b914d0e6b43b2f72d6b3b2f9ddbec7be9e4b0521a429f67f.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 14:53:22,817 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/7a419a17bf4372869932365630632f434d402e93dd7e609e73607cf71ec1bdf7.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|modeling_utils.py:1271] 2021-07-19 14:53:23,622 >> loading weights file https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/d61c66f6163d7933bcffb5de3a666094c2c7c8d54145ec0cea640f72204427e0.50b5552cf09535e0a4b85cc39c83be233674d4cab0836dd8fedc97aa778c802c\n", + "[WARNING|modeling_utils.py:1502] 2021-07-19 14:53:31,610 >> Some weights of the model checkpoint at bertin-project/bertin-base-gaussian were not used when initializing RobertaForTokenClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.dense.weight']\n", + "- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:1513] 2021-07-19 14:53:31,611 >> Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at bertin-project/bertin-base-gaussian and are newly initialized: ['classifier.weight', 'classifier.bias']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "Running tokenizer on train dataset: 0% 0/9 [00:00> The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens, id, pos_tags.\n", + "[INFO|trainer.py:1162] 2021-07-19 14:53:43,083 >> ***** Running training *****\n", + "[INFO|trainer.py:1163] 2021-07-19 14:53:43,083 >> Num examples = 8324\n", + "[INFO|trainer.py:1164] 2021-07-19 14:53:43,083 >> Num Epochs = 5\n", + "[INFO|trainer.py:1165] 2021-07-19 14:53:43,083 >> Instantaneous batch size per device = 8\n", + "[INFO|trainer.py:1166] 2021-07-19 14:53:43,083 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "[INFO|trainer.py:1167] 2021-07-19 14:53:43,083 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1168] 2021-07-19 14:53:43,083 >> Total optimization steps = 5205\n", + "[INFO|integrations.py:417] 2021-07-19 14:53:43,102 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mversae\u001b[0m (use `wandb login --relogin` to force relogin)\n", + "2021-07-19 14:53:44.752564: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m./outputs\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval/runs/srzdgyct\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in /content/wandb/run-20210719_145343-srzdgyct\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run `wandb offline` to turn off syncing.\n", + "\n", + " 10% 500/5205 [02:52<30:50, 2.54it/s]{'loss': 0.3213, 'learning_rate': 4.5196926032660905e-05, 'epoch': 0.48}\n", + "[INFO|trainer.py:1917] 2021-07-19 14:56:38,524 >> Saving model checkpoint to ./outputs/checkpoint-500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 14:56:38,531 >> Configuration saved in ./outputs/checkpoint-500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 14:56:40,026 >> Model weights saved in ./outputs/checkpoint-500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 14:56:40,027 >> tokenizer config file saved in ./outputs/checkpoint-500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 14:56:40,027 >> Special tokens file saved in ./outputs/checkpoint-500/special_tokens_map.json\n", + "{'loss': 0.1156, 'learning_rate': 4.039385206532181e-05, 'epoch': 0.96}\n", + " 19% 1000/5205 [05:50<27:14, 2.57it/s][INFO|trainer.py:1917] 2021-07-19 14:59:36,570 >> Saving model checkpoint to ./outputs/checkpoint-1000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 14:59:36,572 >> Configuration saved in ./outputs/checkpoint-1000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 14:59:37,987 >> Model weights saved in ./outputs/checkpoint-1000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 14:59:37,988 >> tokenizer config file saved in ./outputs/checkpoint-1000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 14:59:37,988 >> Special tokens file saved in ./outputs/checkpoint-1000/special_tokens_map.json\n", + " 29% 1500/5205 [08:47<24:24, 2.53it/s]{'loss': 0.0763, 'learning_rate': 3.5590778097982716e-05, 'epoch': 1.44}\n", + " 29% 1500/5205 [08:47<24:24, 2.53it/s][INFO|trainer.py:1917] 2021-07-19 15:02:33,921 >> Saving model checkpoint to ./outputs/checkpoint-1500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 15:02:33,923 >> Configuration saved in ./outputs/checkpoint-1500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 15:02:35,521 >> Model weights saved in ./outputs/checkpoint-1500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 15:02:35,521 >> tokenizer config file saved in ./outputs/checkpoint-1500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 15:02:35,522 >> Special tokens file saved in ./outputs/checkpoint-1500/special_tokens_map.json\n", + "{'loss': 0.0699, 'learning_rate': 3.078770413064361e-05, 'epoch': 1.92}\n", + " 38% 2000/5205 [11:45<20:52, 2.56it/s][INFO|trainer.py:1917] 2021-07-19 15:05:31,139 >> Saving model checkpoint to ./outputs/checkpoint-2000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 15:05:31,141 >> Configuration saved in ./outputs/checkpoint-2000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 15:05:32,718 >> Model weights saved in ./outputs/checkpoint-2000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 15:05:32,719 >> tokenizer config file saved in ./outputs/checkpoint-2000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 15:05:32,719 >> Special tokens file saved in ./outputs/checkpoint-2000/special_tokens_map.json\n", + "{'loss': 0.0473, 'learning_rate': 2.5984630163304517e-05, 'epoch': 2.4}\n", + " 48% 2500/5205 [14:42<17:32, 2.57it/s][INFO|trainer.py:1917] 2021-07-19 15:08:28,636 >> Saving model checkpoint to ./outputs/checkpoint-2500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 15:08:28,638 >> Configuration saved in ./outputs/checkpoint-2500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 15:08:30,199 >> Model weights saved in ./outputs/checkpoint-2500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 15:08:30,200 >> tokenizer config file saved in ./outputs/checkpoint-2500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 15:08:30,201 >> Special tokens file saved in ./outputs/checkpoint-2500/special_tokens_map.json\n", + " 58% 3000/5205 [17:40<14:25, 2.55it/s]{'loss': 0.0432, 'learning_rate': 2.118155619596542e-05, 'epoch': 2.88}\n", + "[INFO|trainer.py:1917] 2021-07-19 15:11:26,276 >> Saving model checkpoint to ./outputs/checkpoint-3000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 15:11:26,279 >> Configuration saved in ./outputs/checkpoint-3000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 15:11:28,019 >> Model weights saved in ./outputs/checkpoint-3000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 15:11:28,020 >> tokenizer config file saved in ./outputs/checkpoint-3000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 15:11:28,020 >> Special tokens file saved in ./outputs/checkpoint-3000/special_tokens_map.json\n", + " 67% 3500/5205 [20:37<11:04, 2.56it/s]{'loss': 0.029, 'learning_rate': 1.6378482228626322e-05, 'epoch': 3.36}\n", + " 67% 3500/5205 [20:37<11:04, 2.56it/s][INFO|trainer.py:1917] 2021-07-19 15:14:23,295 >> Saving model checkpoint to ./outputs/checkpoint-3500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 15:14:23,297 >> Configuration saved in ./outputs/checkpoint-3500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 15:14:24,881 >> Model weights saved in ./outputs/checkpoint-3500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 15:14:24,882 >> tokenizer config file saved in ./outputs/checkpoint-3500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 15:14:24,882 >> Special tokens file saved in ./outputs/checkpoint-3500/special_tokens_map.json\n", + "{'loss': 0.0258, 'learning_rate': 1.1575408261287224e-05, 'epoch': 3.84}\n", + " 77% 4000/5205 [23:34<07:50, 2.56it/s][INFO|trainer.py:1917] 2021-07-19 15:17:20,488 >> Saving model checkpoint to ./outputs/checkpoint-4000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 15:17:20,490 >> Configuration saved in ./outputs/checkpoint-4000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 15:17:22,034 >> Model weights saved in ./outputs/checkpoint-4000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 15:17:22,035 >> tokenizer config file saved in ./outputs/checkpoint-4000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 15:17:22,035 >> Special tokens file saved in ./outputs/checkpoint-4000/special_tokens_map.json\n", + "{'loss': 0.0188, 'learning_rate': 6.7723342939481265e-06, 'epoch': 4.32}\n", + " 86% 4500/5205 [26:31<04:35, 2.56it/s][INFO|trainer.py:1917] 2021-07-19 15:20:17,497 >> Saving model checkpoint to ./outputs/checkpoint-4500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 15:20:17,499 >> Configuration saved in ./outputs/checkpoint-4500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 15:20:19,183 >> Model weights saved in ./outputs/checkpoint-4500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 15:20:19,184 >> tokenizer config file saved in ./outputs/checkpoint-4500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 15:20:19,185 >> Special tokens file saved in ./outputs/checkpoint-4500/special_tokens_map.json\n", + "{'loss': 0.0146, 'learning_rate': 1.96926032660903e-06, 'epoch': 4.8}\n", + " 96% 5000/5205 [29:28<01:19, 2.57it/s][INFO|trainer.py:1917] 2021-07-19 15:23:14,540 >> Saving model checkpoint to ./outputs/checkpoint-5000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 15:23:14,542 >> Configuration saved in ./outputs/checkpoint-5000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 15:23:16,074 >> Model weights saved in ./outputs/checkpoint-5000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 15:23:16,075 >> tokenizer config file saved in ./outputs/checkpoint-5000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 15:23:16,075 >> Special tokens file saved in ./outputs/checkpoint-5000/special_tokens_map.json\n", + "100% 5205/5205 [30:44<00:00, 3.24it/s][INFO|trainer.py:1358] 2021-07-19 15:24:30,596 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + " {'train_runtime': 1847.5133, 'train_samples_per_second': 22.528, 'train_steps_per_second': 2.817, 'train_loss': 0.07378956023371071, 'epoch': 5.0}\n", + "100% 5205/5205 [30:44<00:00, 2.82it/s]\n", + "[INFO|trainer.py:1917] 2021-07-19 15:24:30,606 >> Saving model checkpoint to ./outputs\n", + "[INFO|configuration_utils.py:379] 2021-07-19 15:24:30,608 >> Configuration saved in ./outputs/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 15:24:32,024 >> Model weights saved in ./outputs/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 15:24:32,024 >> tokenizer config file saved in ./outputs/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 15:24:32,029 >> Special tokens file saved in ./outputs/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 5.0\n", + " train_loss = 0.0738\n", + " train_runtime = 0:30:47.51\n", + " train_samples = 8324\n", + " train_samples_per_second = 22.528\n", + " train_steps_per_second = 2.817\n", + "[INFO|trainer.py:522] 2021-07-19 15:24:32,180 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens, id, pos_tags.\n", + "07/19/2021 15:24:32 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:2163] 2021-07-19 15:24:32,296 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2165] 2021-07-19 15:24:32,297 >> Num examples = 1916\n", + "[INFO|trainer.py:2168] 2021-07-19 15:24:32,297 >> Batch size = 8\n", + "100% 239/240 [00:25<00:00, 9.62it/s]/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: NC seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fpa seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fpt seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fc seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Z seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: NP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fp seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fg seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DA seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: AQ seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: SP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PR seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: RG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: CS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: CC seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fe seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fx seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DD seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PD seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: P0 seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMM seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fd seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: AO seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: RN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fat seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Faa seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DE seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fz seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PT seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PX seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DT seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fh seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fs seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSM seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Y seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fia seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "100% 240/240 [00:28<00:00, 8.40it/s]\n", + "***** eval metrics *****\n", + " epoch = 5.0\n", + " eval_accuracy = 0.9709\n", + " eval_f1 = 0.9662\n", + " eval_loss = 0.1285\n", + " eval_precision = 0.9665\n", + " eval_recall = 0.9659\n", + " eval_runtime = 0:00:28.69\n", + " eval_samples = 1916\n", + " eval_samples_per_second = 66.777\n", + " eval_steps_per_second = 8.365\n", + "\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish, PID 1079\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Program ended successfully.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find user logs for this run at: /content/wandb/run-20210719_145343-srzdgyct/logs/debug.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find internal logs for this run at: /content/wandb/run-20210719_145343-srzdgyct/logs/debug-internal.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.0146\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 5205\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime 1877\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp 1626708300\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step 11\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime 1847.5133\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second 22.528\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second 2.817\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos 1.08808736772096e+16\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss 0.07379\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 0.12853\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision 0.96649\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall 0.96593\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 0.96621\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy 0.97086\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 28.6925\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 66.777\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 8.365\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▃▂▂▂▂▁▁▁▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate █▇▆▆▅▄▃▃▂▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step ▁▂▂▃▄▄▅▅▆▇▇█\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced \u001b[33m./outputs\u001b[0m: \u001b[34mhttps://wandb.ai/versae/bertin-eval/runs/srzdgyct\u001b[0m\n", + "2021-07-19 15:25:13.480929: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "07/19/2021 15:25:16 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "07/19/2021 15:25:16 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_find_unused_parameters=None,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=False,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_steps=500,\n", + "evaluation_strategy=IntervalStrategy.NO,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "gradient_accumulation_steps=1,\n", + "greater_is_better=None,\n", + "group_by_length=False,\n", + "ignore_data_skip=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=5e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=False,\n", + "local_rank=-1,\n", + "log_level=-1,\n", + "log_level_replica=-1,\n", + "log_on_each_node=True,\n", + "logging_dir=./outputs/runs/Jul19_15-25-16_eab5184de9ff,\n", + "logging_first_step=False,\n", + "logging_steps=500,\n", + "logging_strategy=IntervalStrategy.STEPS,\n", + "lr_scheduler_type=SchedulerType.LINEAR,\n", + "max_grad_norm=1.0,\n", + "max_steps=-1,\n", + "metric_for_best_model=None,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=5.0,\n", + "output_dir=./outputs,\n", + "overwrite_output_dir=True,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=outputs,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=None,\n", + "remove_unused_columns=True,\n", + "report_to=['tensorboard', 'wandb'],\n", + "resume_from_checkpoint=None,\n", + "run_name=./outputs,\n", + "save_on_each_node=False,\n", + "save_steps=500,\n", + "save_strategy=IntervalStrategy.STEPS,\n", + "save_total_limit=None,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_legacy_prediction_loop=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + ")\n", + "07/19/2021 15:25:17 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 15:25:17 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 15:25:17 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 15:25:17 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 15:25:17 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 15:25:18 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 15:25:18 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 15:25:18 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 15:25:18 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 15:25:18 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 15:25:18 - INFO - datasets.info - Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 15:25:18 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "07/19/2021 15:25:18 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 15:25:18 - WARNING - datasets.builder - Reusing dataset conll2002 (/root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5)\n", + "07/19/2021 15:25:18 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "100% 3/3 [00:00<00:00, 26.65it/s]\n", + "[INFO|configuration_utils.py:545] 2021-07-19 15:25:19,679 >> loading configuration file https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/367bb30bd1ae06268e9d1c64ae1fb923fc9931913fa478dfa01d79a4c7086238.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|configuration_utils.py:581] 2021-07-19 15:25:19,680 >> Model config RobertaConfig {\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"bos_token_id\": 0,\n", + " \"eos_token_id\": 2,\n", + " \"finetuning_task\": \"pos\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"id2label\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8,\n", + " \"9\": 9,\n", + " \"10\": 10,\n", + " \"11\": 11,\n", + " \"12\": 12,\n", + " \"13\": 13,\n", + " \"14\": 14,\n", + " \"15\": 15,\n", + " \"16\": 16,\n", + " \"17\": 17,\n", + " \"18\": 18,\n", + " \"19\": 19,\n", + " \"20\": 20,\n", + " \"21\": 21,\n", + " \"22\": 22,\n", + " \"23\": 23,\n", + " \"24\": 24,\n", + " \"25\": 25,\n", + " \"26\": 26,\n", + " \"27\": 27,\n", + " \"28\": 28,\n", + " \"29\": 29,\n", + " \"30\": 30,\n", + " \"31\": 31,\n", + " \"32\": 32,\n", + " \"33\": 33,\n", + " \"34\": 34,\n", + " \"35\": 35,\n", + " \"36\": 36,\n", + " \"37\": 37,\n", + " \"38\": 38,\n", + " \"39\": 39,\n", + " \"40\": 40,\n", + " \"41\": 41,\n", + " \"42\": 42,\n", + " \"43\": 43,\n", + " \"44\": 44,\n", + " \"45\": 45,\n", + " \"46\": 46,\n", + " \"47\": 47,\n", + " \"48\": 48,\n", + " \"49\": 49,\n", + " \"50\": 50,\n", + " \"51\": 51,\n", + " \"52\": 52,\n", + " \"53\": 53,\n", + " \"54\": 54,\n", + " \"55\": 55,\n", + " \"56\": 56,\n", + " \"57\": 57,\n", + " \"58\": 58,\n", + " \"59\": 59\n", + " },\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"label2id\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8,\n", + " \"9\": 9,\n", + " \"10\": 10,\n", + " \"11\": 11,\n", + " \"12\": 12,\n", + " \"13\": 13,\n", + " \"14\": 14,\n", + " \"15\": 15,\n", + " \"16\": 16,\n", + " \"17\": 17,\n", + " \"18\": 18,\n", + " \"19\": 19,\n", + " \"20\": 20,\n", + " \"21\": 21,\n", + " \"22\": 22,\n", + " \"23\": 23,\n", + " \"24\": 24,\n", + " \"25\": 25,\n", + " \"26\": 26,\n", + " \"27\": 27,\n", + " \"28\": 28,\n", + " \"29\": 29,\n", + " \"30\": 30,\n", + " \"31\": 31,\n", + " \"32\": 32,\n", + " \"33\": 33,\n", + " \"34\": 34,\n", + " \"35\": 35,\n", + " \"36\": 36,\n", + " \"37\": 37,\n", + " \"38\": 38,\n", + " \"39\": 39,\n", + " \"40\": 40,\n", + " \"41\": 41,\n", + " \"42\": 42,\n", + " \"43\": 43,\n", + " \"44\": 44,\n", + " \"45\": 45,\n", + " \"46\": 46,\n", + " \"47\": 47,\n", + " \"48\": 48,\n", + " \"49\": 49,\n", + " \"50\": 50,\n", + " \"51\": 51,\n", + " \"52\": 52,\n", + " \"53\": 53,\n", + " \"54\": 54,\n", + " \"55\": 55,\n", + " \"56\": 56,\n", + " \"57\": 57,\n", + " \"58\": 58,\n", + " \"59\": 59\n", + " },\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50265\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 15:25:25,275 >> loading file https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/dc7971c78d10d920138338883fd23b96f3994bce40018345ab1ba2ba8c8f6bdd.a80f232f572026f92499b14999a8ed4e044e04cf3d01b9f2be298c98e78e8498\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 15:25:25,275 >> loading file https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/5f573076405f6fab314615142ba9deec180a84917e495fecbf81f61afb2965cb.a0dfc41f9d0f03a56ba7a5401d770f6e43071045a0bd79073380d408d17a0d92\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 15:25:25,275 >> loading file https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/9a541e4855ef267ea4879cc9c2277f67dd5569f68cc688d212822bed1ca8755f.2a5dc806edc00ab3a329cb22b9973596ca75b24ba0e5e4963bf1308de7237a3d\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 15:25:25,275 >> loading file https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 15:25:25,275 >> loading file https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/8e21c2757a0c3938b80989bb3dabd355f9221ed98847fb957a5c4e9a86209c03.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 15:25:25,275 >> loading file https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/e577044edb84fa4a576105e2202c62cf62f831634f9581da80435c97b8034fba.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|modeling_utils.py:1271] 2021-07-19 15:25:26,072 >> loading weights file https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/4832a73c0f4a13adaab71151bf2413717416da487cdb2a79247f10198c6421f8.aebba6b503a22a0c70b362f8b026aa0f030aae594f7580f0164a0a73fb0001af\n", + "[WARNING|modeling_utils.py:1502] 2021-07-19 15:25:34,070 >> Some weights of the model checkpoint at bertin-project/bertin-base-stepwise were not used when initializing RobertaForTokenClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.bias']\n", + "- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:1513] 2021-07-19 15:25:34,071 >> Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at bertin-project/bertin-base-stepwise and are newly initialized: ['classifier.bias', 'classifier.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "Running tokenizer on train dataset: 0% 0/9 [00:00> The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens, pos_tags, id.\n", + "[INFO|trainer.py:1162] 2021-07-19 15:25:45,883 >> ***** Running training *****\n", + "[INFO|trainer.py:1163] 2021-07-19 15:25:45,883 >> Num examples = 8324\n", + "[INFO|trainer.py:1164] 2021-07-19 15:25:45,883 >> Num Epochs = 5\n", + "[INFO|trainer.py:1165] 2021-07-19 15:25:45,883 >> Instantaneous batch size per device = 8\n", + "[INFO|trainer.py:1166] 2021-07-19 15:25:45,883 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "[INFO|trainer.py:1167] 2021-07-19 15:25:45,883 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1168] 2021-07-19 15:25:45,883 >> Total optimization steps = 5205\n", + "[INFO|integrations.py:417] 2021-07-19 15:25:45,898 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mversae\u001b[0m (use `wandb login --relogin` to force relogin)\n", + "2021-07-19 15:25:47.566224: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m./outputs\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval/runs/2qatt290\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in /content/wandb/run-20210719_152546-2qatt290\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run `wandb offline` to turn off syncing.\n", + "\n", + " 10% 500/5205 [02:53<31:14, 2.51it/s]{'loss': 0.3355, 'learning_rate': 4.5196926032660905e-05, 'epoch': 0.48}\n", + "[INFO|trainer.py:1917] 2021-07-19 15:28:42,151 >> Saving model checkpoint to ./outputs/checkpoint-500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 15:28:42,153 >> Configuration saved in ./outputs/checkpoint-500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 15:28:43,613 >> Model weights saved in ./outputs/checkpoint-500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 15:28:43,614 >> tokenizer config file saved in ./outputs/checkpoint-500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 15:28:43,615 >> Special tokens file saved in ./outputs/checkpoint-500/special_tokens_map.json\n", + " 19% 1000/5205 [05:52<27:19, 2.56it/s]{'loss': 0.1163, 'learning_rate': 4.039385206532181e-05, 'epoch': 0.96}\n", + " 19% 1000/5205 [05:52<27:19, 2.56it/s][INFO|trainer.py:1917] 2021-07-19 15:31:41,087 >> Saving model checkpoint to ./outputs/checkpoint-1000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 15:31:41,089 >> Configuration saved in ./outputs/checkpoint-1000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 15:31:42,481 >> Model weights saved in ./outputs/checkpoint-1000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 15:31:42,482 >> tokenizer config file saved in ./outputs/checkpoint-1000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 15:31:42,482 >> Special tokens file saved in ./outputs/checkpoint-1000/special_tokens_map.json\n", + " 29% 1500/5205 [08:50<24:24, 2.53it/s]{'loss': 0.0776, 'learning_rate': 3.5590778097982716e-05, 'epoch': 1.44}\n", + " 29% 1500/5205 [08:50<24:24, 2.53it/s][INFO|trainer.py:1917] 2021-07-19 15:34:39,102 >> Saving model checkpoint to ./outputs/checkpoint-1500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 15:34:39,104 >> Configuration saved in ./outputs/checkpoint-1500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 15:34:40,663 >> Model weights saved in ./outputs/checkpoint-1500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 15:34:40,664 >> tokenizer config file saved in ./outputs/checkpoint-1500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 15:34:40,664 >> Special tokens file saved in ./outputs/checkpoint-1500/special_tokens_map.json\n", + " 38% 2000/5205 [11:48<20:47, 2.57it/s]{'loss': 0.0723, 'learning_rate': 3.078770413064361e-05, 'epoch': 1.92}\n", + "[INFO|trainer.py:1917] 2021-07-19 15:37:37,390 >> Saving model checkpoint to ./outputs/checkpoint-2000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 15:37:37,395 >> Configuration saved in ./outputs/checkpoint-2000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 15:37:38,782 >> Model weights saved in ./outputs/checkpoint-2000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 15:37:38,783 >> tokenizer config file saved in ./outputs/checkpoint-2000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 15:37:38,783 >> Special tokens file saved in ./outputs/checkpoint-2000/special_tokens_map.json\n", + " 48% 2500/5205 [14:46<17:39, 2.55it/s]{'loss': 0.0487, 'learning_rate': 2.5984630163304517e-05, 'epoch': 2.4}\n", + " 48% 2500/5205 [14:46<17:39, 2.55it/s][INFO|trainer.py:1917] 2021-07-19 15:40:35,202 >> Saving model checkpoint to ./outputs/checkpoint-2500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 15:40:35,204 >> Configuration saved in ./outputs/checkpoint-2500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 15:40:36,692 >> Model weights saved in ./outputs/checkpoint-2500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 15:40:36,693 >> tokenizer config file saved in ./outputs/checkpoint-2500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 15:40:36,693 >> Special tokens file saved in ./outputs/checkpoint-2500/special_tokens_map.json\n", + "{'loss': 0.044, 'learning_rate': 2.118155619596542e-05, 'epoch': 2.88}\n", + " 58% 3000/5205 [17:44<14:29, 2.54it/s][INFO|trainer.py:1917] 2021-07-19 15:43:33,536 >> Saving model checkpoint to ./outputs/checkpoint-3000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 15:43:33,538 >> Configuration saved in ./outputs/checkpoint-3000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 15:43:35,230 >> Model weights saved in ./outputs/checkpoint-3000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 15:43:35,231 >> tokenizer config file saved in ./outputs/checkpoint-3000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 15:43:35,231 >> Special tokens file saved in ./outputs/checkpoint-3000/special_tokens_map.json\n", + "{'loss': 0.0302, 'learning_rate': 1.6378482228626322e-05, 'epoch': 3.36}\n", + " 67% 3500/5205 [20:42<11:03, 2.57it/s][INFO|trainer.py:1917] 2021-07-19 15:46:31,314 >> Saving model checkpoint to ./outputs/checkpoint-3500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 15:46:31,320 >> Configuration saved in ./outputs/checkpoint-3500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 15:46:32,767 >> Model weights saved in ./outputs/checkpoint-3500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 15:46:32,768 >> tokenizer config file saved in ./outputs/checkpoint-3500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 15:46:32,768 >> Special tokens file saved in ./outputs/checkpoint-3500/special_tokens_map.json\n", + " 77% 4000/5205 [23:40<07:54, 2.54it/s]{'loss': 0.0264, 'learning_rate': 1.1575408261287224e-05, 'epoch': 3.84}\n", + "[INFO|trainer.py:1917] 2021-07-19 15:49:29,301 >> Saving model checkpoint to ./outputs/checkpoint-4000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 15:49:29,304 >> Configuration saved in ./outputs/checkpoint-4000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 15:49:30,796 >> Model weights saved in ./outputs/checkpoint-4000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 15:49:30,863 >> tokenizer config file saved in ./outputs/checkpoint-4000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 15:49:30,864 >> Special tokens file saved in ./outputs/checkpoint-4000/special_tokens_map.json\n", + " 86% 4500/5205 [26:38<04:34, 2.56it/s]{'loss': 0.0196, 'learning_rate': 6.7723342939481265e-06, 'epoch': 4.32}\n", + " 86% 4500/5205 [26:38<04:34, 2.56it/s][INFO|trainer.py:1917] 2021-07-19 15:52:27,009 >> Saving model checkpoint to ./outputs/checkpoint-4500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 15:52:27,011 >> Configuration saved in ./outputs/checkpoint-4500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 15:52:28,527 >> Model weights saved in ./outputs/checkpoint-4500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 15:52:28,528 >> tokenizer config file saved in ./outputs/checkpoint-4500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 15:52:28,528 >> Special tokens file saved in ./outputs/checkpoint-4500/special_tokens_map.json\n", + " 96% 5000/5205 [29:35<01:20, 2.55it/s]{'loss': 0.0157, 'learning_rate': 1.96926032660903e-06, 'epoch': 4.8}\n", + "[INFO|trainer.py:1917] 2021-07-19 15:55:24,733 >> Saving model checkpoint to ./outputs/checkpoint-5000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 15:55:24,735 >> Configuration saved in ./outputs/checkpoint-5000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 15:55:26,325 >> Model weights saved in ./outputs/checkpoint-5000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 15:55:26,326 >> tokenizer config file saved in ./outputs/checkpoint-5000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 15:55:26,326 >> Special tokens file saved in ./outputs/checkpoint-5000/special_tokens_map.json\n", + "100% 5205/5205 [30:52<00:00, 3.25it/s][INFO|trainer.py:1358] 2021-07-19 15:56:41,099 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "100% 5205/5205 [30:52<00:00, 3.25it/s]{'train_runtime': 1855.2157, 'train_samples_per_second': 22.434, 'train_steps_per_second': 2.806, 'train_loss': 0.07618391575662135, 'epoch': 5.0}\n", + "100% 5205/5205 [30:52<00:00, 2.81it/s]\n", + "[INFO|trainer.py:1917] 2021-07-19 15:56:41,107 >> Saving model checkpoint to ./outputs\n", + "[INFO|configuration_utils.py:379] 2021-07-19 15:56:41,111 >> Configuration saved in ./outputs/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 15:56:42,720 >> Model weights saved in ./outputs/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 15:56:42,721 >> tokenizer config file saved in ./outputs/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 15:56:42,721 >> Special tokens file saved in ./outputs/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 5.0\n", + " train_loss = 0.0762\n", + " train_runtime = 0:30:55.21\n", + " train_samples = 8324\n", + " train_samples_per_second = 22.434\n", + " train_steps_per_second = 2.806\n", + "[INFO|trainer.py:522] 2021-07-19 15:56:42,842 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens, pos_tags, id.\n", + "07/19/2021 15:56:42 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:2163] 2021-07-19 15:56:42,919 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2165] 2021-07-19 15:56:42,919 >> Num examples = 1916\n", + "[INFO|trainer.py:2168] 2021-07-19 15:56:42,919 >> Batch size = 8\n", + "100% 239/240 [00:24<00:00, 9.68it/s]/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: NC seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fpa seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fpt seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fc seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Z seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: NP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fp seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fg seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DA seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: AQ seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: SP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PR seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: RG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: CS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: CC seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fe seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fx seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DD seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PD seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: P0 seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMM seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fd seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: AO seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: RN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fat seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Faa seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DE seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fz seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PT seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PX seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DT seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fh seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fs seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSM seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Y seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fit seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fia seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "100% 240/240 [00:28<00:00, 8.47it/s]\n", + "***** eval metrics *****\n", + " epoch = 5.0\n", + " eval_accuracy = 0.9707\n", + " eval_f1 = 0.9656\n", + " eval_loss = 0.1244\n", + " eval_precision = 0.9659\n", + " eval_recall = 0.9653\n", + " eval_runtime = 0:00:28.46\n", + " eval_samples = 1916\n", + " eval_samples_per_second = 67.321\n", + " eval_steps_per_second = 8.433\n", + "\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish, PID 1143\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Program ended successfully.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find user logs for this run at: /content/wandb/run-20210719_152546-2qatt290/logs/debug.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find internal logs for this run at: /content/wandb/run-20210719_152546-2qatt290/logs/debug-internal.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.0157\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 5205\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime 1885\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp 1626710231\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step 11\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime 1855.2157\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second 22.434\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second 2.806\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos 1.08808736772096e+16\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss 0.07618\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 0.12443\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision 0.96588\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall 0.96526\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 0.96557\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy 0.97067\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 28.4605\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 67.321\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 8.433\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▃▂▂▂▂▁▁▁▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate █▇▆▆▅▄▃▃▂▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step ▁▂▂▃▄▄▅▅▆▇▇█\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced \u001b[33m./outputs\u001b[0m: \u001b[34mhttps://wandb.ai/versae/bertin-eval/runs/2qatt290\u001b[0m\n", + "2021-07-19 15:57:23.970722: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "07/19/2021 15:57:26 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "07/19/2021 15:57:26 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_find_unused_parameters=None,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=False,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_steps=500,\n", + "evaluation_strategy=IntervalStrategy.NO,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "gradient_accumulation_steps=1,\n", + "greater_is_better=None,\n", + "group_by_length=False,\n", + "ignore_data_skip=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=5e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=False,\n", + "local_rank=-1,\n", + "log_level=-1,\n", + "log_level_replica=-1,\n", + "log_on_each_node=True,\n", + "logging_dir=./outputs/runs/Jul19_15-57-26_eab5184de9ff,\n", + "logging_first_step=False,\n", + "logging_steps=500,\n", + "logging_strategy=IntervalStrategy.STEPS,\n", + "lr_scheduler_type=SchedulerType.LINEAR,\n", + "max_grad_norm=1.0,\n", + "max_steps=-1,\n", + "metric_for_best_model=None,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=5.0,\n", + "output_dir=./outputs,\n", + "overwrite_output_dir=True,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=outputs,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=None,\n", + "remove_unused_columns=True,\n", + "report_to=['tensorboard', 'wandb'],\n", + "resume_from_checkpoint=None,\n", + "run_name=./outputs,\n", + "save_on_each_node=False,\n", + "save_steps=500,\n", + "save_strategy=IntervalStrategy.STEPS,\n", + "save_total_limit=None,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_legacy_prediction_loop=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + ")\n", + "07/19/2021 15:57:28 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 15:57:28 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 15:57:28 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 15:57:28 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 15:57:28 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 15:57:29 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 15:57:29 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 15:57:29 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 15:57:29 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 15:57:29 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 15:57:29 - INFO - datasets.info - Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 15:57:29 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "07/19/2021 15:57:29 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 15:57:29 - WARNING - datasets.builder - Reusing dataset conll2002 (/root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5)\n", + "07/19/2021 15:57:29 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "100% 3/3 [00:00<00:00, 26.12it/s]\n", + "[INFO|configuration_utils.py:545] 2021-07-19 15:57:30,060 >> loading configuration file https://huggingface.co/bertin-project/bertin-base-random/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/206b1dac57f81203b68e667a852122cb8107d8f6ec61c5f61ff3911b995464bc.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|configuration_utils.py:581] 2021-07-19 15:57:30,061 >> Model config RobertaConfig {\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"bos_token_id\": 0,\n", + " \"eos_token_id\": 2,\n", + " \"finetuning_task\": \"pos\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"id2label\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8,\n", + " \"9\": 9,\n", + " \"10\": 10,\n", + " \"11\": 11,\n", + " \"12\": 12,\n", + " \"13\": 13,\n", + " \"14\": 14,\n", + " \"15\": 15,\n", + " \"16\": 16,\n", + " \"17\": 17,\n", + " \"18\": 18,\n", + " \"19\": 19,\n", + " \"20\": 20,\n", + " \"21\": 21,\n", + " \"22\": 22,\n", + " \"23\": 23,\n", + " \"24\": 24,\n", + " \"25\": 25,\n", + " \"26\": 26,\n", + " \"27\": 27,\n", + " \"28\": 28,\n", + " \"29\": 29,\n", + " \"30\": 30,\n", + " \"31\": 31,\n", + " \"32\": 32,\n", + " \"33\": 33,\n", + " \"34\": 34,\n", + " \"35\": 35,\n", + " \"36\": 36,\n", + " \"37\": 37,\n", + " \"38\": 38,\n", + " \"39\": 39,\n", + " \"40\": 40,\n", + " \"41\": 41,\n", + " \"42\": 42,\n", + " \"43\": 43,\n", + " \"44\": 44,\n", + " \"45\": 45,\n", + " \"46\": 46,\n", + " \"47\": 47,\n", + " \"48\": 48,\n", + " \"49\": 49,\n", + " \"50\": 50,\n", + " \"51\": 51,\n", + " \"52\": 52,\n", + " \"53\": 53,\n", + " \"54\": 54,\n", + " \"55\": 55,\n", + " \"56\": 56,\n", + " \"57\": 57,\n", + " \"58\": 58,\n", + " \"59\": 59\n", + " },\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"label2id\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8,\n", + " \"9\": 9,\n", + " \"10\": 10,\n", + " \"11\": 11,\n", + " \"12\": 12,\n", + " \"13\": 13,\n", + " \"14\": 14,\n", + " \"15\": 15,\n", + " \"16\": 16,\n", + " \"17\": 17,\n", + " \"18\": 18,\n", + " \"19\": 19,\n", + " \"20\": 20,\n", + " \"21\": 21,\n", + " \"22\": 22,\n", + " \"23\": 23,\n", + " \"24\": 24,\n", + " \"25\": 25,\n", + " \"26\": 26,\n", + " \"27\": 27,\n", + " \"28\": 28,\n", + " \"29\": 29,\n", + " \"30\": 30,\n", + " \"31\": 31,\n", + " \"32\": 32,\n", + " \"33\": 33,\n", + " \"34\": 34,\n", + " \"35\": 35,\n", + " \"36\": 36,\n", + " \"37\": 37,\n", + " \"38\": 38,\n", + " \"39\": 39,\n", + " \"40\": 40,\n", + " \"41\": 41,\n", + " \"42\": 42,\n", + " \"43\": 43,\n", + " \"44\": 44,\n", + " \"45\": 45,\n", + " \"46\": 46,\n", + " \"47\": 47,\n", + " \"48\": 48,\n", + " \"49\": 49,\n", + " \"50\": 50,\n", + " \"51\": 51,\n", + " \"52\": 52,\n", + " \"53\": 53,\n", + " \"54\": 54,\n", + " \"55\": 55,\n", + " \"56\": 56,\n", + " \"57\": 57,\n", + " \"58\": 58,\n", + " \"59\": 59\n", + " },\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50265\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 15:57:35,215 >> loading file https://huggingface.co/bertin-project/bertin-base-random/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/b4006ecc1f0c7264bbecc09c6cb89b65ec7db509c3c0604de97e88f24ea4d1f6.a80f232f572026f92499b14999a8ed4e044e04cf3d01b9f2be298c98e78e8498\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 15:57:35,215 >> loading file https://huggingface.co/bertin-project/bertin-base-random/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/edf6b87725489a8b69bfa2267d17e097798faa72e64ff4c5cc3d18672be1064c.a0dfc41f9d0f03a56ba7a5401d770f6e43071045a0bd79073380d408d17a0d92\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 15:57:35,216 >> loading file https://huggingface.co/bertin-project/bertin-base-random/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/a2564e0d9105fa93a9e315cbeba7570df09180efa1d54facf9770aa67f46bfe6.2a5dc806edc00ab3a329cb22b9973596ca75b24ba0e5e4963bf1308de7237a3d\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 15:57:35,216 >> loading file https://huggingface.co/bertin-project/bertin-base-random/resolve/main/added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 15:57:35,216 >> loading file https://huggingface.co/bertin-project/bertin-base-random/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/92bfdce86055cda7484e6cda39edf00079de963108ce7a53ac914029100d8a99.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 15:57:35,216 >> loading file https://huggingface.co/bertin-project/bertin-base-random/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/115b6bf4f08e65d03dda82a834c42cf46339813770aadf59679bd51d0f2ea3a5.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|modeling_utils.py:1271] 2021-07-19 15:57:36,040 >> loading weights file https://huggingface.co/bertin-project/bertin-base-random/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/5bec117e97fb2ec50c8615b1e2cde784a5851d7664417aa49e21403f91722df2.fc71d0bf8be2b57b708f6397729f6f12e8e95a3e69e8a44030293be69ee5bc0d\n", + "[WARNING|modeling_utils.py:1502] 2021-07-19 15:57:44,020 >> Some weights of the model checkpoint at bertin-project/bertin-base-random were not used when initializing RobertaForTokenClassification: ['lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight']\n", + "- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:1513] 2021-07-19 15:57:44,020 >> Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at bertin-project/bertin-base-random and are newly initialized: ['classifier.weight', 'classifier.bias']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "Running tokenizer on train dataset: 0% 0/9 [00:00> The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id, pos_tags.\n", + "[INFO|trainer.py:1162] 2021-07-19 15:57:55,686 >> ***** Running training *****\n", + "[INFO|trainer.py:1163] 2021-07-19 15:57:55,686 >> Num examples = 8324\n", + "[INFO|trainer.py:1164] 2021-07-19 15:57:55,686 >> Num Epochs = 5\n", + "[INFO|trainer.py:1165] 2021-07-19 15:57:55,687 >> Instantaneous batch size per device = 8\n", + "[INFO|trainer.py:1166] 2021-07-19 15:57:55,687 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "[INFO|trainer.py:1167] 2021-07-19 15:57:55,687 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1168] 2021-07-19 15:57:55,687 >> Total optimization steps = 5205\n", + "[INFO|integrations.py:417] 2021-07-19 15:57:55,705 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mversae\u001b[0m (use `wandb login --relogin` to force relogin)\n", + "2021-07-19 15:57:57.309125: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m./outputs\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval/runs/21v515t2\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in /content/wandb/run-20210719_155755-21v515t2\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run `wandb offline` to turn off syncing.\n", + "\n", + " 10% 500/5205 [02:53<30:46, 2.55it/s]{'loss': 0.3451, 'learning_rate': 4.5196926032660905e-05, 'epoch': 0.48}\n", + " 10% 500/5205 [02:53<30:46, 2.55it/s][INFO|trainer.py:1917] 2021-07-19 16:00:52,060 >> Saving model checkpoint to ./outputs/checkpoint-500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 16:00:52,062 >> Configuration saved in ./outputs/checkpoint-500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 16:00:53,570 >> Model weights saved in ./outputs/checkpoint-500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 16:00:53,571 >> tokenizer config file saved in ./outputs/checkpoint-500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 16:00:53,571 >> Special tokens file saved in ./outputs/checkpoint-500/special_tokens_map.json\n", + "{'loss': 0.1204, 'learning_rate': 4.039385206532181e-05, 'epoch': 0.96}\n", + " 19% 1000/5205 [05:52<27:33, 2.54it/s][INFO|trainer.py:1917] 2021-07-19 16:03:50,729 >> Saving model checkpoint to ./outputs/checkpoint-1000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 16:03:50,731 >> Configuration saved in ./outputs/checkpoint-1000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 16:03:52,169 >> Model weights saved in ./outputs/checkpoint-1000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 16:03:52,170 >> tokenizer config file saved in ./outputs/checkpoint-1000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 16:03:52,171 >> Special tokens file saved in ./outputs/checkpoint-1000/special_tokens_map.json\n", + "{'loss': 0.0793, 'learning_rate': 3.5590778097982716e-05, 'epoch': 1.44}\n", + " 29% 1500/5205 [08:50<24:07, 2.56it/s][INFO|trainer.py:1917] 2021-07-19 16:06:48,737 >> Saving model checkpoint to ./outputs/checkpoint-1500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 16:06:48,738 >> Configuration saved in ./outputs/checkpoint-1500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 16:06:50,181 >> Model weights saved in ./outputs/checkpoint-1500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 16:06:50,182 >> tokenizer config file saved in ./outputs/checkpoint-1500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 16:06:50,182 >> Special tokens file saved in ./outputs/checkpoint-1500/special_tokens_map.json\n", + "{'loss': 0.0726, 'learning_rate': 3.078770413064361e-05, 'epoch': 1.92}\n", + " 38% 2000/5205 [11:48<20:46, 2.57it/s][INFO|trainer.py:1917] 2021-07-19 16:09:46,782 >> Saving model checkpoint to ./outputs/checkpoint-2000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 16:09:46,784 >> Configuration saved in ./outputs/checkpoint-2000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 16:09:48,219 >> Model weights saved in ./outputs/checkpoint-2000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 16:09:48,220 >> tokenizer config file saved in ./outputs/checkpoint-2000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 16:09:48,220 >> Special tokens file saved in ./outputs/checkpoint-2000/special_tokens_map.json\n", + " 48% 2500/5205 [14:46<17:47, 2.54it/s][INFO|trainer.py:1917] 2021-07-19 16:12:44,588 >> Saving model checkpoint to ./outputs/checkpoint-2500\n", + "{'loss': 0.0491, 'learning_rate': 2.5984630163304517e-05, 'epoch': 2.4}\n", + "[INFO|configuration_utils.py:379] 2021-07-19 16:12:44,590 >> Configuration saved in ./outputs/checkpoint-2500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 16:12:46,083 >> Model weights saved in ./outputs/checkpoint-2500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 16:12:46,084 >> tokenizer config file saved in ./outputs/checkpoint-2500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 16:12:46,085 >> Special tokens file saved in ./outputs/checkpoint-2500/special_tokens_map.json\n", + " 58% 3000/5205 [17:43<14:24, 2.55it/s]{'loss': 0.0442, 'learning_rate': 2.118155619596542e-05, 'epoch': 2.88}\n", + " 58% 3000/5205 [17:43<14:24, 2.55it/s][INFO|trainer.py:1917] 2021-07-19 16:15:42,327 >> Saving model checkpoint to ./outputs/checkpoint-3000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 16:15:42,329 >> Configuration saved in ./outputs/checkpoint-3000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 16:15:44,005 >> Model weights saved in ./outputs/checkpoint-3000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 16:15:44,005 >> tokenizer config file saved in ./outputs/checkpoint-3000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 16:15:44,006 >> Special tokens file saved in ./outputs/checkpoint-3000/special_tokens_map.json\n", + " 67% 3500/5205 [20:40<11:11, 2.54it/s]{'loss': 0.0297, 'learning_rate': 1.6378482228626322e-05, 'epoch': 3.36}\n", + " 67% 3500/5205 [20:40<11:11, 2.54it/s][INFO|trainer.py:1917] 2021-07-19 16:18:39,550 >> Saving model checkpoint to ./outputs/checkpoint-3500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 16:18:39,552 >> Configuration saved in ./outputs/checkpoint-3500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 16:18:40,968 >> Model weights saved in ./outputs/checkpoint-3500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 16:18:40,969 >> tokenizer config file saved in ./outputs/checkpoint-3500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 16:18:40,969 >> Special tokens file saved in ./outputs/checkpoint-3500/special_tokens_map.json\n", + "{'loss': 0.0248, 'learning_rate': 1.1575408261287224e-05, 'epoch': 3.84}\n", + " 77% 4000/5205 [23:38<07:51, 2.56it/s][INFO|trainer.py:1917] 2021-07-19 16:21:37,282 >> Saving model checkpoint to ./outputs/checkpoint-4000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 16:21:37,284 >> Configuration saved in ./outputs/checkpoint-4000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 16:21:38,766 >> Model weights saved in ./outputs/checkpoint-4000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 16:21:38,767 >> tokenizer config file saved in ./outputs/checkpoint-4000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 16:21:38,768 >> Special tokens file saved in ./outputs/checkpoint-4000/special_tokens_map.json\n", + " 86% 4500/5205 [26:35<04:32, 2.58it/s]{'loss': 0.0199, 'learning_rate': 6.7723342939481265e-06, 'epoch': 4.32}\n", + " 86% 4500/5205 [26:35<04:32, 2.58it/s][INFO|trainer.py:1917] 2021-07-19 16:24:34,468 >> Saving model checkpoint to ./outputs/checkpoint-4500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 16:24:34,470 >> Configuration saved in ./outputs/checkpoint-4500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 16:24:36,130 >> Model weights saved in ./outputs/checkpoint-4500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 16:24:36,131 >> tokenizer config file saved in ./outputs/checkpoint-4500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 16:24:36,132 >> Special tokens file saved in ./outputs/checkpoint-4500/special_tokens_map.json\n", + " 96% 5000/5205 [29:33<01:20, 2.54it/s]{'loss': 0.0159, 'learning_rate': 1.96926032660903e-06, 'epoch': 4.8}\n", + "[INFO|trainer.py:1917] 2021-07-19 16:27:32,111 >> Saving model checkpoint to ./outputs/checkpoint-5000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 16:27:32,117 >> Configuration saved in ./outputs/checkpoint-5000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 16:27:33,589 >> Model weights saved in ./outputs/checkpoint-5000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 16:27:33,590 >> tokenizer config file saved in ./outputs/checkpoint-5000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 16:27:33,591 >> Special tokens file saved in ./outputs/checkpoint-5000/special_tokens_map.json\n", + "100% 5205/5205 [30:49<00:00, 3.24it/s][INFO|trainer.py:1358] 2021-07-19 16:28:48,288 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "100% 5205/5205 [30:49<00:00, 3.24it/s]{'train_runtime': 1852.6013, 'train_samples_per_second': 22.466, 'train_steps_per_second': 2.81, 'train_loss': 0.07757037586025271, 'epoch': 5.0}\n", + "100% 5205/5205 [30:49<00:00, 2.81it/s]\n", + "[INFO|trainer.py:1917] 2021-07-19 16:28:48,294 >> Saving model checkpoint to ./outputs\n", + "[INFO|configuration_utils.py:379] 2021-07-19 16:28:48,299 >> Configuration saved in ./outputs/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 16:28:49,924 >> Model weights saved in ./outputs/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 16:28:49,925 >> tokenizer config file saved in ./outputs/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 16:28:49,925 >> Special tokens file saved in ./outputs/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 5.0\n", + " train_loss = 0.0776\n", + " train_runtime = 0:30:52.60\n", + " train_samples = 8324\n", + " train_samples_per_second = 22.466\n", + " train_steps_per_second = 2.81\n", + "07/19/2021 16:28:50 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:522] 2021-07-19 16:28:50,053 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id, pos_tags.\n", + "[INFO|trainer.py:2163] 2021-07-19 16:28:50,189 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2165] 2021-07-19 16:28:50,189 >> Num examples = 1916\n", + "[INFO|trainer.py:2168] 2021-07-19 16:28:50,189 >> Batch size = 8\n", + "100% 239/240 [00:24<00:00, 9.66it/s]/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: NC seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fpa seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fpt seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fc seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Z seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: NP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fp seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fg seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DA seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: AQ seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: SP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PR seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: RG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: CS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: CC seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fe seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fx seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DD seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PD seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: P0 seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMM seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fd seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: AO seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: RN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fat seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Faa seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DE seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fz seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PT seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PX seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DT seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fh seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fs seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSM seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Y seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fia seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fit seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "100% 240/240 [00:28<00:00, 8.48it/s]\n", + "***** eval metrics *****\n", + " epoch = 5.0\n", + " eval_accuracy = 0.9704\n", + " eval_f1 = 0.9656\n", + " eval_loss = 0.1342\n", + " eval_precision = 0.9662\n", + " eval_recall = 0.9651\n", + " eval_runtime = 0:00:28.41\n", + " eval_samples = 1916\n", + " eval_samples_per_second = 67.437\n", + " eval_steps_per_second = 8.447\n", + "\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish, PID 1207\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Program ended successfully.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find user logs for this run at: /content/wandb/run-20210719_155755-21v515t2/logs/debug.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find internal logs for this run at: /content/wandb/run-20210719_155755-21v515t2/logs/debug-internal.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.0159\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 5205\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime 1882\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp 1626712158\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step 11\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime 1852.6013\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second 22.466\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second 2.81\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos 1.08808736772096e+16\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss 0.07757\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 0.13416\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision 0.96616\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall 0.96507\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 0.96561\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy 0.97043\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 28.4118\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 67.437\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 8.447\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▃▂▂▂▂▁▁▁▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate █▇▆▆▅▄▃▃▂▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step ▁▂▂▃▄▄▅▅▆▇▇█\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced \u001b[33m./outputs\u001b[0m: \u001b[34mhttps://wandb.ai/versae/bertin-eval/runs/21v515t2\u001b[0m\n", + "2021-07-19 16:29:31.414800: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "07/19/2021 16:29:34 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "07/19/2021 16:29:34 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_find_unused_parameters=None,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=False,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_steps=500,\n", + "evaluation_strategy=IntervalStrategy.NO,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "gradient_accumulation_steps=1,\n", + "greater_is_better=None,\n", + "group_by_length=False,\n", + "ignore_data_skip=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=5e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=False,\n", + "local_rank=-1,\n", + "log_level=-1,\n", + "log_level_replica=-1,\n", + "log_on_each_node=True,\n", + "logging_dir=./outputs/runs/Jul19_16-29-34_eab5184de9ff,\n", + "logging_first_step=False,\n", + "logging_steps=500,\n", + "logging_strategy=IntervalStrategy.STEPS,\n", + "lr_scheduler_type=SchedulerType.LINEAR,\n", + "max_grad_norm=1.0,\n", + "max_steps=-1,\n", + "metric_for_best_model=None,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=5.0,\n", + "output_dir=./outputs,\n", + "overwrite_output_dir=True,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=outputs,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=None,\n", + "remove_unused_columns=True,\n", + "report_to=['tensorboard', 'wandb'],\n", + "resume_from_checkpoint=None,\n", + "run_name=./outputs,\n", + "save_on_each_node=False,\n", + "save_steps=500,\n", + "save_strategy=IntervalStrategy.STEPS,\n", + "save_total_limit=None,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_legacy_prediction_loop=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + ")\n", + "07/19/2021 16:29:35 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 16:29:35 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 16:29:35 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 16:29:35 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 16:29:35 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 16:29:36 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 16:29:36 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 16:29:36 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 16:29:36 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 16:29:36 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 16:29:36 - INFO - datasets.info - Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 16:29:36 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "07/19/2021 16:29:36 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 16:29:36 - WARNING - datasets.builder - Reusing dataset conll2002 (/root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5)\n", + "07/19/2021 16:29:36 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "100% 3/3 [00:00<00:00, 27.90it/s]\n", + "[INFO|configuration_utils.py:545] 2021-07-19 16:29:37,792 >> loading configuration file https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/aa24361f0b7bed62876f6cd0a784a2b622c1959523906d89eeb1112139a4864a.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|configuration_utils.py:581] 2021-07-19 16:29:37,793 >> Model config RobertaConfig {\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"bos_token_id\": 0,\n", + " \"eos_token_id\": 2,\n", + " \"finetuning_task\": \"pos\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"id2label\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8,\n", + " \"9\": 9,\n", + " \"10\": 10,\n", + " \"11\": 11,\n", + " \"12\": 12,\n", + " \"13\": 13,\n", + " \"14\": 14,\n", + " \"15\": 15,\n", + " \"16\": 16,\n", + " \"17\": 17,\n", + " \"18\": 18,\n", + " \"19\": 19,\n", + " \"20\": 20,\n", + " \"21\": 21,\n", + " \"22\": 22,\n", + " \"23\": 23,\n", + " \"24\": 24,\n", + " \"25\": 25,\n", + " \"26\": 26,\n", + " \"27\": 27,\n", + " \"28\": 28,\n", + " \"29\": 29,\n", + " \"30\": 30,\n", + " \"31\": 31,\n", + " \"32\": 32,\n", + " \"33\": 33,\n", + " \"34\": 34,\n", + " \"35\": 35,\n", + " \"36\": 36,\n", + " \"37\": 37,\n", + " \"38\": 38,\n", + " \"39\": 39,\n", + " \"40\": 40,\n", + " \"41\": 41,\n", + " \"42\": 42,\n", + " \"43\": 43,\n", + " \"44\": 44,\n", + " \"45\": 45,\n", + " \"46\": 46,\n", + " \"47\": 47,\n", + " \"48\": 48,\n", + " \"49\": 49,\n", + " \"50\": 50,\n", + " \"51\": 51,\n", + " \"52\": 52,\n", + " \"53\": 53,\n", + " \"54\": 54,\n", + " \"55\": 55,\n", + " \"56\": 56,\n", + " \"57\": 57,\n", + " \"58\": 58,\n", + " \"59\": 59\n", + " },\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"label2id\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8,\n", + " \"9\": 9,\n", + " \"10\": 10,\n", + " \"11\": 11,\n", + " \"12\": 12,\n", + " \"13\": 13,\n", + " \"14\": 14,\n", + " \"15\": 15,\n", + " \"16\": 16,\n", + " \"17\": 17,\n", + " \"18\": 18,\n", + " \"19\": 19,\n", + " \"20\": 20,\n", + " \"21\": 21,\n", + " \"22\": 22,\n", + " \"23\": 23,\n", + " \"24\": 24,\n", + " \"25\": 25,\n", + " \"26\": 26,\n", + " \"27\": 27,\n", + " \"28\": 28,\n", + " \"29\": 29,\n", + " \"30\": 30,\n", + " \"31\": 31,\n", + " \"32\": 32,\n", + " \"33\": 33,\n", + " \"34\": 34,\n", + " \"35\": 35,\n", + " \"36\": 36,\n", + " \"37\": 37,\n", + " \"38\": 38,\n", + " \"39\": 39,\n", + " \"40\": 40,\n", + " \"41\": 41,\n", + " \"42\": 42,\n", + " \"43\": 43,\n", + " \"44\": 44,\n", + " \"45\": 45,\n", + " \"46\": 46,\n", + " \"47\": 47,\n", + " \"48\": 48,\n", + " \"49\": 49,\n", + " \"50\": 50,\n", + " \"51\": 51,\n", + " \"52\": 52,\n", + " \"53\": 53,\n", + " \"54\": 54,\n", + " \"55\": 55,\n", + " \"56\": 56,\n", + " \"57\": 57,\n", + " \"58\": 58,\n", + " \"59\": 59\n", + " },\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50265\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 16:29:42,916 >> loading file https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/f47efb87887425ef9a4ef795bfaa907d57ac9a650d733c7ca621b9eced3235e8.af6be2925520943ae92cd64250bdaa83a8c9d6f91422efbac7ed33a20e0b6e1b\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 16:29:42,916 >> loading file https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/aba9e0895dea47dd4208a36012ffd3eb21eb4c5f7ce0be6547afb37cdd4ddef4.360a60d1c13cbafd862a8e2a85b267c2b3eb0226d5469633a889b33ba0dde234\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 16:29:42,916 >> loading file https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/baad57d0f574d3e660cafb14601d0ecebe83f25071d59f3e51d225d75285b773.0803a0fadc521f273c2682a34dc285f90fe9de9e932bbe77401d11672f7fcb60\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 16:29:42,916 >> loading file https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 16:29:42,916 >> loading file https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/68d1fdfe72fdcac403d8f363239c379d8125162f50a954030c4476982f88d69e.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 16:29:42,916 >> loading file https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/72bf61c243630a112d8fa8c8d9162f1a5e01fab0602d2f2a7792cecdc0a4986f.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|modeling_utils.py:1271] 2021-07-19 16:29:43,748 >> loading weights file https://huggingface.co/bertin-project/bertin-roberta-base-spanish/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/8a611c8e6ab409ea523e84173bef4b1ef257262487d732b05c68d31b674788e5.ae8a25c127f59d8b0d9d11c53667336d7c491e5b06338b3ce56369ee735acd6f\n", + "[WARNING|modeling_utils.py:1502] 2021-07-19 16:29:51,687 >> Some weights of the model checkpoint at bertin-project/bertin-roberta-base-spanish were not used when initializing RobertaForTokenClassification: ['lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.weight']\n", + "- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:1513] 2021-07-19 16:29:51,687 >> Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at bertin-project/bertin-roberta-base-spanish and are newly initialized: ['classifier.bias', 'classifier.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "Running tokenizer on train dataset: 0% 0/9 [00:00> The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: pos_tags, id, tokens, ner_tags.\n", + "[INFO|trainer.py:1162] 2021-07-19 16:30:03,256 >> ***** Running training *****\n", + "[INFO|trainer.py:1163] 2021-07-19 16:30:03,256 >> Num examples = 8324\n", + "[INFO|trainer.py:1164] 2021-07-19 16:30:03,256 >> Num Epochs = 5\n", + "[INFO|trainer.py:1165] 2021-07-19 16:30:03,256 >> Instantaneous batch size per device = 8\n", + "[INFO|trainer.py:1166] 2021-07-19 16:30:03,256 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "[INFO|trainer.py:1167] 2021-07-19 16:30:03,257 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1168] 2021-07-19 16:30:03,257 >> Total optimization steps = 5205\n", + "[INFO|integrations.py:417] 2021-07-19 16:30:03,274 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mversae\u001b[0m (use `wandb login --relogin` to force relogin)\n", + "2021-07-19 16:30:04.895679: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m./outputs\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval/runs/giu49zyo\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in /content/wandb/run-20210719_163003-giu49zyo\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run `wandb offline` to turn off syncing.\n", + "\n", + " 10% 500/5205 [02:53<30:47, 2.55it/s]{'loss': 0.3973, 'learning_rate': 4.5196926032660905e-05, 'epoch': 0.48}\n", + " 10% 500/5205 [02:53<30:47, 2.55it/s][INFO|trainer.py:1917] 2021-07-19 16:32:59,838 >> Saving model checkpoint to ./outputs/checkpoint-500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 16:32:59,841 >> Configuration saved in ./outputs/checkpoint-500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 16:33:01,329 >> Model weights saved in ./outputs/checkpoint-500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 16:33:01,330 >> tokenizer config file saved in ./outputs/checkpoint-500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 16:33:01,331 >> Special tokens file saved in ./outputs/checkpoint-500/special_tokens_map.json\n", + " 19% 1000/5205 [05:52<27:20, 2.56it/s]{'loss': 0.1271, 'learning_rate': 4.039385206532181e-05, 'epoch': 0.96}\n", + " 19% 1000/5205 [05:52<27:20, 2.56it/s][INFO|trainer.py:1917] 2021-07-19 16:35:58,841 >> Saving model checkpoint to ./outputs/checkpoint-1000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 16:35:58,843 >> Configuration saved in ./outputs/checkpoint-1000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 16:36:00,231 >> Model weights saved in ./outputs/checkpoint-1000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 16:36:00,232 >> tokenizer config file saved in ./outputs/checkpoint-1000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 16:36:00,232 >> Special tokens file saved in ./outputs/checkpoint-1000/special_tokens_map.json\n", + " 29% 1500/5205 [08:51<24:04, 2.56it/s]{'loss': 0.0865, 'learning_rate': 3.5590778097982716e-05, 'epoch': 1.44}\n", + "[INFO|trainer.py:1917] 2021-07-19 16:38:57,818 >> Saving model checkpoint to ./outputs/checkpoint-1500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 16:38:57,820 >> Configuration saved in ./outputs/checkpoint-1500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 16:38:59,318 >> Model weights saved in ./outputs/checkpoint-1500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 16:38:59,319 >> tokenizer config file saved in ./outputs/checkpoint-1500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 16:38:59,319 >> Special tokens file saved in ./outputs/checkpoint-1500/special_tokens_map.json\n", + " 38% 2000/5205 [11:50<20:59, 2.54it/s]{'loss': 0.0797, 'learning_rate': 3.078770413064361e-05, 'epoch': 1.92}\n", + " 38% 2000/5205 [11:50<20:59, 2.54it/s][INFO|trainer.py:1917] 2021-07-19 16:41:56,862 >> Saving model checkpoint to ./outputs/checkpoint-2000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 16:41:56,864 >> Configuration saved in ./outputs/checkpoint-2000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 16:41:58,291 >> Model weights saved in ./outputs/checkpoint-2000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 16:41:58,292 >> tokenizer config file saved in ./outputs/checkpoint-2000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 16:41:58,293 >> Special tokens file saved in ./outputs/checkpoint-2000/special_tokens_map.json\n", + "{'loss': 0.055, 'learning_rate': 2.5984630163304517e-05, 'epoch': 2.4}\n", + " 48% 2500/5205 [14:49<17:37, 2.56it/s][INFO|trainer.py:1917] 2021-07-19 16:44:55,537 >> Saving model checkpoint to ./outputs/checkpoint-2500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 16:44:55,539 >> Configuration saved in ./outputs/checkpoint-2500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 16:44:57,222 >> Model weights saved in ./outputs/checkpoint-2500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 16:44:57,227 >> tokenizer config file saved in ./outputs/checkpoint-2500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 16:44:57,228 >> Special tokens file saved in ./outputs/checkpoint-2500/special_tokens_map.json\n", + " 58% 3000/5205 [17:48<14:39, 2.51it/s]{'loss': 0.0498, 'learning_rate': 2.118155619596542e-05, 'epoch': 2.88}\n", + " 58% 3000/5205 [17:48<14:39, 2.51it/s][INFO|trainer.py:1917] 2021-07-19 16:47:54,336 >> Saving model checkpoint to ./outputs/checkpoint-3000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 16:47:54,340 >> Configuration saved in ./outputs/checkpoint-3000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 16:47:55,927 >> Model weights saved in ./outputs/checkpoint-3000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 16:47:55,928 >> tokenizer config file saved in ./outputs/checkpoint-3000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 16:47:55,930 >> Special tokens file saved in ./outputs/checkpoint-3000/special_tokens_map.json\n", + " 67% 3500/5205 [20:46<11:06, 2.56it/s]{'loss': 0.0349, 'learning_rate': 1.6378482228626322e-05, 'epoch': 3.36}\n", + " 67% 3500/5205 [20:46<11:06, 2.56it/s][INFO|trainer.py:1917] 2021-07-19 16:50:52,582 >> Saving model checkpoint to ./outputs/checkpoint-3500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 16:50:52,584 >> Configuration saved in ./outputs/checkpoint-3500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 16:50:54,163 >> Model weights saved in ./outputs/checkpoint-3500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 16:50:54,164 >> tokenizer config file saved in ./outputs/checkpoint-3500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 16:50:54,165 >> Special tokens file saved in ./outputs/checkpoint-3500/special_tokens_map.json\n", + " 77% 4000/5205 [23:44<07:53, 2.54it/s]{'loss': 0.0313, 'learning_rate': 1.1575408261287224e-05, 'epoch': 3.84}\n", + "[INFO|trainer.py:1917] 2021-07-19 16:53:50,996 >> Saving model checkpoint to ./outputs/checkpoint-4000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 16:53:50,998 >> Configuration saved in ./outputs/checkpoint-4000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 16:53:52,518 >> Model weights saved in ./outputs/checkpoint-4000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 16:53:52,519 >> tokenizer config file saved in ./outputs/checkpoint-4000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 16:53:52,609 >> Special tokens file saved in ./outputs/checkpoint-4000/special_tokens_map.json\n", + " 86% 4500/5205 [26:43<04:33, 2.58it/s][INFO|trainer.py:1917] 2021-07-19 16:56:49,291 >> Saving model checkpoint to ./outputs/checkpoint-4500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 16:56:49,293 >> Configuration saved in ./outputs/checkpoint-4500/config.json\n", + "{'loss': 0.0242, 'learning_rate': 6.7723342939481265e-06, 'epoch': 4.32}\n", + "[INFO|modeling_utils.py:997] 2021-07-19 16:56:50,861 >> Model weights saved in ./outputs/checkpoint-4500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 16:56:50,862 >> tokenizer config file saved in ./outputs/checkpoint-4500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 16:56:50,862 >> Special tokens file saved in ./outputs/checkpoint-4500/special_tokens_map.json\n", + " 96% 5000/5205 [29:41<01:19, 2.58it/s][INFO|trainer.py:1917] 2021-07-19 16:59:47,339 >> Saving model checkpoint to ./outputs/checkpoint-5000\n", + "{'loss': 0.02, 'learning_rate': 1.96926032660903e-06, 'epoch': 4.8}\n", + "[INFO|configuration_utils.py:379] 2021-07-19 16:59:47,341 >> Configuration saved in ./outputs/checkpoint-5000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 16:59:48,836 >> Model weights saved in ./outputs/checkpoint-5000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 16:59:48,837 >> tokenizer config file saved in ./outputs/checkpoint-5000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 16:59:48,838 >> Special tokens file saved in ./outputs/checkpoint-5000/special_tokens_map.json\n", + "100% 5205/5205 [30:57<00:00, 3.26it/s][INFO|trainer.py:1358] 2021-07-19 17:01:03,736 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "{'train_runtime': 1860.4796, 'train_samples_per_second': 22.371, 'train_steps_per_second': 2.798, 'train_loss': 0.08782961375431643, 'epoch': 5.0}\n", + "100% 5205/5205 [30:57<00:00, 2.80it/s]\n", + "[INFO|trainer.py:1917] 2021-07-19 17:01:03,739 >> Saving model checkpoint to ./outputs\n", + "[INFO|configuration_utils.py:379] 2021-07-19 17:01:03,741 >> Configuration saved in ./outputs/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 17:01:05,198 >> Model weights saved in ./outputs/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 17:01:05,198 >> tokenizer config file saved in ./outputs/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 17:01:05,199 >> Special tokens file saved in ./outputs/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 5.0\n", + " train_loss = 0.0878\n", + " train_runtime = 0:31:00.47\n", + " train_samples = 8324\n", + " train_samples_per_second = 22.371\n", + " train_steps_per_second = 2.798\n", + "[INFO|trainer.py:522] 2021-07-19 17:01:05,328 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: pos_tags, id, tokens, ner_tags.\n", + "07/19/2021 17:01:05 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:2163] 2021-07-19 17:01:05,360 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2165] 2021-07-19 17:01:05,361 >> Num examples = 1916\n", + "[INFO|trainer.py:2168] 2021-07-19 17:01:05,361 >> Batch size = 8\n", + "100% 239/240 [00:24<00:00, 9.59it/s]/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: NC seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fpa seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fpt seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fc seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Z seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: NP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fp seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fg seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DA seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: AQ seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: SP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PR seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: RG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: CS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: CC seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fe seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fx seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DD seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PD seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: P0 seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMM seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fd seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: AO seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: RN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fat seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Faa seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DE seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fz seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PT seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PX seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DT seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fh seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fs seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSM seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Y seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fit seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fia seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "100% 240/240 [00:28<00:00, 8.46it/s]\n", + "***** eval metrics *****\n", + " epoch = 5.0\n", + " eval_accuracy = 0.969\n", + " eval_f1 = 0.9638\n", + " eval_loss = 0.1236\n", + " eval_precision = 0.9644\n", + " eval_recall = 0.9632\n", + " eval_runtime = 0:00:28.48\n", + " eval_samples = 1916\n", + " eval_samples_per_second = 67.263\n", + " eval_steps_per_second = 8.425\n", + "\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish, PID 1271\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Program ended successfully.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find user logs for this run at: /content/wandb/run-20210719_163003-giu49zyo/logs/debug.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find internal logs for this run at: /content/wandb/run-20210719_163003-giu49zyo/logs/debug-internal.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.02\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 5205\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime 1890\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp 1626714093\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step 11\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime 1860.4796\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second 22.371\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second 2.798\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos 1.08808736772096e+16\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss 0.08783\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 0.12361\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision 0.96442\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall 0.96322\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 0.96382\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy 0.96897\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 28.4852\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 67.263\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 8.425\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▃▂▂▂▂▁▁▁▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate █▇▆▆▅▄▃▃▂▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step ▁▂▂▃▄▄▅▅▆▇▇█\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced \u001b[33m./outputs\u001b[0m: \u001b[34mhttps://wandb.ai/versae/bertin-eval/runs/giu49zyo\u001b[0m\n", + "2021-07-19 17:01:45.878971: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "07/19/2021 17:01:48 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "07/19/2021 17:01:48 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_find_unused_parameters=None,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=False,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_steps=500,\n", + "evaluation_strategy=IntervalStrategy.NO,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "gradient_accumulation_steps=1,\n", + "greater_is_better=None,\n", + "group_by_length=False,\n", + "ignore_data_skip=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=5e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=False,\n", + "local_rank=-1,\n", + "log_level=-1,\n", + "log_level_replica=-1,\n", + "log_on_each_node=True,\n", + "logging_dir=./outputs/runs/Jul19_17-01-48_eab5184de9ff,\n", + "logging_first_step=False,\n", + "logging_steps=500,\n", + "logging_strategy=IntervalStrategy.STEPS,\n", + "lr_scheduler_type=SchedulerType.LINEAR,\n", + "max_grad_norm=1.0,\n", + "max_steps=-1,\n", + "metric_for_best_model=None,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=5.0,\n", + "output_dir=./outputs,\n", + "overwrite_output_dir=True,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=outputs,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=None,\n", + "remove_unused_columns=True,\n", + "report_to=['tensorboard', 'wandb'],\n", + "resume_from_checkpoint=None,\n", + "run_name=./outputs,\n", + "save_on_each_node=False,\n", + "save_steps=500,\n", + "save_strategy=IntervalStrategy.STEPS,\n", + "save_total_limit=None,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_legacy_prediction_loop=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + ")\n", + "07/19/2021 17:01:50 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 17:01:50 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 17:01:50 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 17:01:50 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 17:01:50 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 17:01:51 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 17:01:51 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 17:01:51 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 17:01:51 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 17:01:51 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 17:01:51 - INFO - datasets.info - Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 17:01:51 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "07/19/2021 17:01:51 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 17:01:51 - WARNING - datasets.builder - Reusing dataset conll2002 (/root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5)\n", + "07/19/2021 17:01:51 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "100% 3/3 [00:00<00:00, 25.86it/s]\n", + "[INFO|configuration_utils.py:545] 2021-07-19 17:01:51,886 >> loading configuration file https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/d32cc9b6c8f303dbd398a937552d8d9c26d842d418f1cd79adb3fc25f169f722.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|configuration_utils.py:581] 2021-07-19 17:01:51,887 >> Model config RobertaConfig {\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"bos_token_id\": 0,\n", + " \"eos_token_id\": 2,\n", + " \"finetuning_task\": \"pos\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"id2label\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8,\n", + " \"9\": 9,\n", + " \"10\": 10,\n", + " \"11\": 11,\n", + " \"12\": 12,\n", + " \"13\": 13,\n", + " \"14\": 14,\n", + " \"15\": 15,\n", + " \"16\": 16,\n", + " \"17\": 17,\n", + " \"18\": 18,\n", + " \"19\": 19,\n", + " \"20\": 20,\n", + " \"21\": 21,\n", + " \"22\": 22,\n", + " \"23\": 23,\n", + " \"24\": 24,\n", + " \"25\": 25,\n", + " \"26\": 26,\n", + " \"27\": 27,\n", + " \"28\": 28,\n", + " \"29\": 29,\n", + " \"30\": 30,\n", + " \"31\": 31,\n", + " \"32\": 32,\n", + " \"33\": 33,\n", + " \"34\": 34,\n", + " \"35\": 35,\n", + " \"36\": 36,\n", + " \"37\": 37,\n", + " \"38\": 38,\n", + " \"39\": 39,\n", + " \"40\": 40,\n", + " \"41\": 41,\n", + " \"42\": 42,\n", + " \"43\": 43,\n", + " \"44\": 44,\n", + " \"45\": 45,\n", + " \"46\": 46,\n", + " \"47\": 47,\n", + " \"48\": 48,\n", + " \"49\": 49,\n", + " \"50\": 50,\n", + " \"51\": 51,\n", + " \"52\": 52,\n", + " \"53\": 53,\n", + " \"54\": 54,\n", + " \"55\": 55,\n", + " \"56\": 56,\n", + " \"57\": 57,\n", + " \"58\": 58,\n", + " \"59\": 59\n", + " },\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"label2id\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8,\n", + " \"9\": 9,\n", + " \"10\": 10,\n", + " \"11\": 11,\n", + " \"12\": 12,\n", + " \"13\": 13,\n", + " \"14\": 14,\n", + " \"15\": 15,\n", + " \"16\": 16,\n", + " \"17\": 17,\n", + " \"18\": 18,\n", + " \"19\": 19,\n", + " \"20\": 20,\n", + " \"21\": 21,\n", + " \"22\": 22,\n", + " \"23\": 23,\n", + " \"24\": 24,\n", + " \"25\": 25,\n", + " \"26\": 26,\n", + " \"27\": 27,\n", + " \"28\": 28,\n", + " \"29\": 29,\n", + " \"30\": 30,\n", + " \"31\": 31,\n", + " \"32\": 32,\n", + " \"33\": 33,\n", + " \"34\": 34,\n", + " \"35\": 35,\n", + " \"36\": 36,\n", + " \"37\": 37,\n", + " \"38\": 38,\n", + " \"39\": 39,\n", + " \"40\": 40,\n", + " \"41\": 41,\n", + " \"42\": 42,\n", + " \"43\": 43,\n", + " \"44\": 44,\n", + " \"45\": 45,\n", + " \"46\": 46,\n", + " \"47\": 47,\n", + " \"48\": 48,\n", + " \"49\": 49,\n", + " \"50\": 50,\n", + " \"51\": 51,\n", + " \"52\": 52,\n", + " \"53\": 53,\n", + " \"54\": 54,\n", + " \"55\": 55,\n", + " \"56\": 56,\n", + " \"57\": 57,\n", + " \"58\": 58,\n", + " \"59\": 59\n", + " },\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50265\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 17:01:57,277 >> loading file https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/20af099ba4d2572dbbdf666ab59e4864bd3ec4867e50cc70d7989d1638fced71.af6be2925520943ae92cd64250bdaa83a8c9d6f91422efbac7ed33a20e0b6e1b\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 17:01:57,277 >> loading file https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/45c757f0d795baeb7913a8678519d1c353de20333a2e2e181a005f8f5b4dc60c.360a60d1c13cbafd862a8e2a85b267c2b3eb0226d5469633a889b33ba0dde234\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 17:01:57,277 >> loading file https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/abcefe6cb866709f8793e0130a2d9bd883fee0cbcc25ffee734858e4d002c5b6.0803a0fadc521f273c2682a34dc285f90fe9de9e932bbe77401d11672f7fcb60\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 17:01:57,277 >> loading file https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 17:01:57,277 >> loading file https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/73738db8ccbbbf8b6685f923eeb9e382d4c8e36fcfa5c8124d82ed3eed582725.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 17:01:57,277 >> loading file https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/4f604c1e66964e3a55ecf1d4959f4ebb3416a50476c866ac53fedd6a93b68c2b.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|modeling_utils.py:1271] 2021-07-19 17:01:58,106 >> loading weights file https://huggingface.co/flax-community/bertin-roberta-large-spanish/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/7189e822ad05437642e8c70b270aad1da7745b3f42ae88ee851bd7336559387c.8970e1b3c4890a283e6d9c30f1437e32aec6a36c0f9aae84920ff5a363d4b636\n", + "[WARNING|modeling_utils.py:1502] 2021-07-19 17:02:06,056 >> Some weights of the model checkpoint at flax-community/bertin-roberta-large-spanish were not used when initializing RobertaForTokenClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.bias']\n", + "- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:1513] 2021-07-19 17:02:06,057 >> Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at flax-community/bertin-roberta-large-spanish and are newly initialized: ['classifier.weight', 'classifier.bias']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "Running tokenizer on train dataset: 0% 0/9 [00:00> The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: id, tokens, pos_tags, ner_tags.\n", + "[INFO|trainer.py:1162] 2021-07-19 17:02:17,713 >> ***** Running training *****\n", + "[INFO|trainer.py:1163] 2021-07-19 17:02:17,714 >> Num examples = 8324\n", + "[INFO|trainer.py:1164] 2021-07-19 17:02:17,714 >> Num Epochs = 5\n", + "[INFO|trainer.py:1165] 2021-07-19 17:02:17,714 >> Instantaneous batch size per device = 8\n", + "[INFO|trainer.py:1166] 2021-07-19 17:02:17,714 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "[INFO|trainer.py:1167] 2021-07-19 17:02:17,714 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1168] 2021-07-19 17:02:17,714 >> Total optimization steps = 5205\n", + "[INFO|integrations.py:417] 2021-07-19 17:02:17,731 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mversae\u001b[0m (use `wandb login --relogin` to force relogin)\n", + "2021-07-19 17:02:19.333152: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m./outputs\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval/runs/9u82spar\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in /content/wandb/run-20210719_170217-9u82spar\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run `wandb offline` to turn off syncing.\n", + "\n", + " 10% 500/5205 [02:53<30:45, 2.55it/s]{'loss': 0.3908, 'learning_rate': 4.5196926032660905e-05, 'epoch': 0.48}\n", + "[INFO|trainer.py:1917] 2021-07-19 17:05:14,500 >> Saving model checkpoint to ./outputs/checkpoint-500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 17:05:14,502 >> Configuration saved in ./outputs/checkpoint-500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 17:05:16,023 >> Model weights saved in ./outputs/checkpoint-500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 17:05:16,024 >> tokenizer config file saved in ./outputs/checkpoint-500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 17:05:16,024 >> Special tokens file saved in ./outputs/checkpoint-500/special_tokens_map.json\n", + "{'loss': 0.1294, 'learning_rate': 4.039385206532181e-05, 'epoch': 0.96}\n", + " 19% 1000/5205 [05:52<27:26, 2.55it/s][INFO|trainer.py:1917] 2021-07-19 17:08:13,474 >> Saving model checkpoint to ./outputs/checkpoint-1000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 17:08:13,476 >> Configuration saved in ./outputs/checkpoint-1000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 17:08:14,926 >> Model weights saved in ./outputs/checkpoint-1000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 17:08:14,927 >> tokenizer config file saved in ./outputs/checkpoint-1000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 17:08:14,927 >> Special tokens file saved in ./outputs/checkpoint-1000/special_tokens_map.json\n", + " 29% 1500/5205 [08:51<24:21, 2.53it/s]{'loss': 0.0878, 'learning_rate': 3.5590778097982716e-05, 'epoch': 1.44}\n", + " 29% 1500/5205 [08:51<24:21, 2.53it/s][INFO|trainer.py:1917] 2021-07-19 17:11:12,356 >> Saving model checkpoint to ./outputs/checkpoint-1500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 17:11:12,358 >> Configuration saved in ./outputs/checkpoint-1500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 17:11:13,859 >> Model weights saved in ./outputs/checkpoint-1500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 17:11:13,860 >> tokenizer config file saved in ./outputs/checkpoint-1500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 17:11:13,861 >> Special tokens file saved in ./outputs/checkpoint-1500/special_tokens_map.json\n", + " 38% 2000/5205 [11:51<21:04, 2.53it/s]{'loss': 0.0794, 'learning_rate': 3.078770413064361e-05, 'epoch': 1.92}\n", + "[INFO|trainer.py:1917] 2021-07-19 17:14:12,133 >> Saving model checkpoint to ./outputs/checkpoint-2000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 17:14:12,137 >> Configuration saved in ./outputs/checkpoint-2000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 17:14:13,618 >> Model weights saved in ./outputs/checkpoint-2000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 17:14:13,619 >> tokenizer config file saved in ./outputs/checkpoint-2000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 17:14:13,619 >> Special tokens file saved in ./outputs/checkpoint-2000/special_tokens_map.json\n", + "{'loss': 0.0572, 'learning_rate': 2.5984630163304517e-05, 'epoch': 2.4}\n", + " 48% 2500/5205 [14:51<17:39, 2.55it/s][INFO|trainer.py:1917] 2021-07-19 17:17:11,978 >> Saving model checkpoint to ./outputs/checkpoint-2500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 17:17:11,980 >> Configuration saved in ./outputs/checkpoint-2500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 17:17:13,501 >> Model weights saved in ./outputs/checkpoint-2500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 17:17:13,502 >> tokenizer config file saved in ./outputs/checkpoint-2500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 17:17:13,502 >> Special tokens file saved in ./outputs/checkpoint-2500/special_tokens_map.json\n", + " 58% 3000/5205 [17:50<14:35, 2.52it/s]{'loss': 0.0508, 'learning_rate': 2.118155619596542e-05, 'epoch': 2.88}\n", + "[INFO|trainer.py:1917] 2021-07-19 17:20:11,596 >> Saving model checkpoint to ./outputs/checkpoint-3000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 17:20:11,603 >> Configuration saved in ./outputs/checkpoint-3000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 17:20:13,238 >> Model weights saved in ./outputs/checkpoint-3000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 17:20:13,239 >> tokenizer config file saved in ./outputs/checkpoint-3000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 17:20:13,240 >> Special tokens file saved in ./outputs/checkpoint-3000/special_tokens_map.json\n", + " 67% 3500/5205 [20:50<11:10, 2.54it/s]{'loss': 0.0362, 'learning_rate': 1.6378482228626322e-05, 'epoch': 3.36}\n", + "[INFO|trainer.py:1917] 2021-07-19 17:23:10,623 >> Saving model checkpoint to ./outputs/checkpoint-3500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 17:23:10,625 >> Configuration saved in ./outputs/checkpoint-3500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 17:23:12,162 >> Model weights saved in ./outputs/checkpoint-3500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 17:23:12,163 >> tokenizer config file saved in ./outputs/checkpoint-3500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 17:23:12,163 >> Special tokens file saved in ./outputs/checkpoint-3500/special_tokens_map.json\n", + " 77% 4000/5205 [23:49<07:57, 2.52it/s]{'loss': 0.0326, 'learning_rate': 1.1575408261287224e-05, 'epoch': 3.84}\n", + "[INFO|trainer.py:1917] 2021-07-19 17:26:10,082 >> Saving model checkpoint to ./outputs/checkpoint-4000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 17:26:10,084 >> Configuration saved in ./outputs/checkpoint-4000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 17:26:11,615 >> Model weights saved in ./outputs/checkpoint-4000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 17:26:11,616 >> tokenizer config file saved in ./outputs/checkpoint-4000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 17:26:11,644 >> Special tokens file saved in ./outputs/checkpoint-4000/special_tokens_map.json\n", + "{'loss': 0.0248, 'learning_rate': 6.7723342939481265e-06, 'epoch': 4.32}\n", + " 86% 4500/5205 [26:48<04:37, 2.54it/s][INFO|trainer.py:1917] 2021-07-19 17:29:09,325 >> Saving model checkpoint to ./outputs/checkpoint-4500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 17:29:09,327 >> Configuration saved in ./outputs/checkpoint-4500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 17:29:11,036 >> Model weights saved in ./outputs/checkpoint-4500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 17:29:11,037 >> tokenizer config file saved in ./outputs/checkpoint-4500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 17:29:11,038 >> Special tokens file saved in ./outputs/checkpoint-4500/special_tokens_map.json\n", + "{'loss': 0.0204, 'learning_rate': 1.96926032660903e-06, 'epoch': 4.8}\n", + " 96% 5000/5205 [29:47<01:21, 2.52it/s][INFO|trainer.py:1917] 2021-07-19 17:32:08,591 >> Saving model checkpoint to ./outputs/checkpoint-5000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 17:32:08,593 >> Configuration saved in ./outputs/checkpoint-5000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 17:32:10,040 >> Model weights saved in ./outputs/checkpoint-5000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 17:32:10,041 >> tokenizer config file saved in ./outputs/checkpoint-5000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 17:32:10,041 >> Special tokens file saved in ./outputs/checkpoint-5000/special_tokens_map.json\n", + "100% 5205/5205 [31:04<00:00, 3.24it/s][INFO|trainer.py:1358] 2021-07-19 17:33:25,415 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "100% 5205/5205 [31:04<00:00, 3.24it/s]{'train_runtime': 1867.7015, 'train_samples_per_second': 22.284, 'train_steps_per_second': 2.787, 'train_loss': 0.08822382634700661, 'epoch': 5.0}\n", + "100% 5205/5205 [31:04<00:00, 2.79it/s]\n", + "[INFO|trainer.py:1917] 2021-07-19 17:33:25,419 >> Saving model checkpoint to ./outputs\n", + "[INFO|configuration_utils.py:379] 2021-07-19 17:33:25,421 >> Configuration saved in ./outputs/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 17:33:27,066 >> Model weights saved in ./outputs/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 17:33:27,067 >> tokenizer config file saved in ./outputs/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 17:33:27,067 >> Special tokens file saved in ./outputs/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 5.0\n", + " train_loss = 0.0882\n", + " train_runtime = 0:31:07.70\n", + " train_samples = 8324\n", + " train_samples_per_second = 22.284\n", + " train_steps_per_second = 2.787\n", + "[INFO|trainer.py:522] 2021-07-19 17:33:27,276 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: id, tokens, pos_tags, ner_tags.\n", + "07/19/2021 17:33:27 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:2163] 2021-07-19 17:33:27,335 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2165] 2021-07-19 17:33:27,335 >> Num examples = 1916\n", + "[INFO|trainer.py:2168] 2021-07-19 17:33:27,335 >> Batch size = 8\n", + "100% 239/240 [00:24<00:00, 9.48it/s]/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: NC seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fpa seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fpt seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fc seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Z seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: NP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fp seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fg seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DA seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: AQ seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: SP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PR seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: RG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: CS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: CC seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fe seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fx seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DD seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PD seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: P0 seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMM seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fd seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: AO seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: RN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fat seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Faa seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DE seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fz seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PT seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PX seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DT seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fh seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fs seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSM seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Y seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fit seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fia seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "100% 240/240 [00:28<00:00, 8.39it/s]\n", + "***** eval metrics *****\n", + " epoch = 5.0\n", + " eval_accuracy = 0.9697\n", + " eval_f1 = 0.9646\n", + " eval_loss = 0.1256\n", + " eval_precision = 0.965\n", + " eval_recall = 0.9642\n", + " eval_runtime = 0:00:28.71\n", + " eval_samples = 1916\n", + " eval_samples_per_second = 66.725\n", + " eval_steps_per_second = 8.358\n", + "\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish, PID 1335\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Program ended successfully.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find user logs for this run at: /content/wandb/run-20210719_170217-9u82spar/logs/debug.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find internal logs for this run at: /content/wandb/run-20210719_170217-9u82spar/logs/debug-internal.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.0204\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 5205\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime 1898\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp 1626716036\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step 11\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime 1867.7015\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second 22.284\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second 2.787\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos 1.08808736772096e+16\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss 0.08822\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 0.12556\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision 0.96496\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall 0.96421\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 0.96459\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy 0.96965\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 28.7149\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 66.725\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 8.358\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▃▂▂▂▂▁▁▁▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate █▇▆▆▅▄▃▃▂▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step ▁▂▂▃▄▄▅▅▆▇▇█\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced \u001b[33m./outputs\u001b[0m: \u001b[34mhttps://wandb.ai/versae/bertin-eval/runs/9u82spar\u001b[0m\n", + "2021-07-19 17:34:08.180943: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "07/19/2021 17:34:10 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "07/19/2021 17:34:10 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_find_unused_parameters=None,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=False,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_steps=500,\n", + "evaluation_strategy=IntervalStrategy.NO,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "gradient_accumulation_steps=1,\n", + "greater_is_better=None,\n", + "group_by_length=False,\n", + "ignore_data_skip=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=5e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=False,\n", + "local_rank=-1,\n", + "log_level=-1,\n", + "log_level_replica=-1,\n", + "log_on_each_node=True,\n", + "logging_dir=./outputs/runs/Jul19_17-34-10_eab5184de9ff,\n", + "logging_first_step=False,\n", + "logging_steps=500,\n", + "logging_strategy=IntervalStrategy.STEPS,\n", + "lr_scheduler_type=SchedulerType.LINEAR,\n", + "max_grad_norm=1.0,\n", + "max_steps=-1,\n", + "metric_for_best_model=None,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=5.0,\n", + "output_dir=./outputs,\n", + "overwrite_output_dir=True,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=outputs,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=None,\n", + "remove_unused_columns=True,\n", + "report_to=['tensorboard', 'wandb'],\n", + "resume_from_checkpoint=None,\n", + "run_name=./outputs,\n", + "save_on_each_node=False,\n", + "save_steps=500,\n", + "save_strategy=IntervalStrategy.STEPS,\n", + "save_total_limit=None,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_legacy_prediction_loop=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + ")\n", + "07/19/2021 17:34:12 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 17:34:12 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 17:34:12 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 17:34:12 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 17:34:12 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 17:34:13 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 17:34:13 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 17:34:13 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 17:34:13 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 17:34:13 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 17:34:13 - INFO - datasets.info - Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 17:34:13 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "07/19/2021 17:34:13 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 17:34:13 - WARNING - datasets.builder - Reusing dataset conll2002 (/root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5)\n", + "07/19/2021 17:34:13 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "100% 3/3 [00:00<00:00, 27.30it/s]\n", + "[INFO|configuration_utils.py:545] 2021-07-19 17:34:14,330 >> loading configuration file https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3b60ce79e9e99338ab683eee75e146acc49852f2f34f2c5610dfd78221a176ee.33b0b03a5bf5e640494a22a3aa4909c661effc0fa0e186b1513b17d9b058ca59\n", + "[INFO|configuration_utils.py:581] 2021-07-19 17:34:14,331 >> Model config RobertaConfig {\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.0,\n", + " \"bos_token_id\": 0,\n", + " \"eos_token_id\": 2,\n", + " \"finetuning_task\": \"pos\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.0,\n", + " \"hidden_size\": 768,\n", + " \"id2label\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8,\n", + " \"9\": 9,\n", + " \"10\": 10,\n", + " \"11\": 11,\n", + " \"12\": 12,\n", + " \"13\": 13,\n", + " \"14\": 14,\n", + " \"15\": 15,\n", + " \"16\": 16,\n", + " \"17\": 17,\n", + " \"18\": 18,\n", + " \"19\": 19,\n", + " \"20\": 20,\n", + " \"21\": 21,\n", + " \"22\": 22,\n", + " \"23\": 23,\n", + " \"24\": 24,\n", + " \"25\": 25,\n", + " \"26\": 26,\n", + " \"27\": 27,\n", + " \"28\": 28,\n", + " \"29\": 29,\n", + " \"30\": 30,\n", + " \"31\": 31,\n", + " \"32\": 32,\n", + " \"33\": 33,\n", + " \"34\": 34,\n", + " \"35\": 35,\n", + " \"36\": 36,\n", + " \"37\": 37,\n", + " \"38\": 38,\n", + " \"39\": 39,\n", + " \"40\": 40,\n", + " \"41\": 41,\n", + " \"42\": 42,\n", + " \"43\": 43,\n", + " \"44\": 44,\n", + " \"45\": 45,\n", + " \"46\": 46,\n", + " \"47\": 47,\n", + " \"48\": 48,\n", + " \"49\": 49,\n", + " \"50\": 50,\n", + " \"51\": 51,\n", + " \"52\": 52,\n", + " \"53\": 53,\n", + " \"54\": 54,\n", + " \"55\": 55,\n", + " \"56\": 56,\n", + " \"57\": 57,\n", + " \"58\": 58,\n", + " \"59\": 59\n", + " },\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"label2id\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8,\n", + " \"9\": 9,\n", + " \"10\": 10,\n", + " \"11\": 11,\n", + " \"12\": 12,\n", + " \"13\": 13,\n", + " \"14\": 14,\n", + " \"15\": 15,\n", + " \"16\": 16,\n", + " \"17\": 17,\n", + " \"18\": 18,\n", + " \"19\": 19,\n", + " \"20\": 20,\n", + " \"21\": 21,\n", + " \"22\": 22,\n", + " \"23\": 23,\n", + " \"24\": 24,\n", + " \"25\": 25,\n", + " \"26\": 26,\n", + " \"27\": 27,\n", + " \"28\": 28,\n", + " \"29\": 29,\n", + " \"30\": 30,\n", + " \"31\": 31,\n", + " \"32\": 32,\n", + " \"33\": 33,\n", + " \"34\": 34,\n", + " \"35\": 35,\n", + " \"36\": 36,\n", + " \"37\": 37,\n", + " \"38\": 38,\n", + " \"39\": 39,\n", + " \"40\": 40,\n", + " \"41\": 41,\n", + " \"42\": 42,\n", + " \"43\": 43,\n", + " \"44\": 44,\n", + " \"45\": 45,\n", + " \"46\": 46,\n", + " \"47\": 47,\n", + " \"48\": 48,\n", + " \"49\": 49,\n", + " \"50\": 50,\n", + " \"51\": 51,\n", + " \"52\": 52,\n", + " \"53\": 53,\n", + " \"54\": 54,\n", + " \"55\": 55,\n", + " \"56\": 56,\n", + " \"57\": 57,\n", + " \"58\": 58,\n", + " \"59\": 59\n", + " },\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50262\n", + "}\n", + "\n", + "[INFO|configuration_utils.py:545] 2021-07-19 17:34:16,050 >> loading configuration file https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3b60ce79e9e99338ab683eee75e146acc49852f2f34f2c5610dfd78221a176ee.33b0b03a5bf5e640494a22a3aa4909c661effc0fa0e186b1513b17d9b058ca59\n", + "[INFO|configuration_utils.py:581] 2021-07-19 17:34:16,050 >> Model config RobertaConfig {\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.0,\n", + " \"bos_token_id\": 0,\n", + " \"eos_token_id\": 2,\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.0,\n", + " \"hidden_size\": 768,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50262\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 17:34:20,946 >> loading file https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/aec224417e3c4a4bda2283292ca7898205329eeb16f3b8db7ea5a36e51d55257.26eadee3bbe78c0682ce89a698fbb1698a0eee50c36cf83be2280a0f2a7b23c1\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 17:34:20,946 >> loading file https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/6688b2cc67bbc01f3e369230731a072338dc286bf25b97e1513d359ab00f2ea3.0d24ae8bd5fabb1f5020f91bc602cefeb5a2938ab77e21769d28776345634b23\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 17:34:20,946 >> loading file https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/2f6b692045224588b9a5a4d5639db72d5e3fd2c18cbd0accf4c6dc81a4adc413.bd775ba884c9e650b58a3a333a97e47c8d1b9d37cdbe19b22fb04b1e41beb19d\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 17:34:20,946 >> loading file https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 17:34:20,946 >> loading file https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/0f2dbdd0b96b43180e16d660cb1b274e46fa0cf63da426f09c1c82900fb52da1.cb2244924ab24d706b02fd7fcedaea4531566537687a539ebb94db511fd122a0\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 17:34:20,946 >> loading file https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/e025b5b045564995a040386d80efa4d55779730827b72b4e40908073db9f0630.d8a7d006294d83173a76ac51a95b5a8470bbbc87c93c63633eaf9476656ed660\n", + "[INFO|configuration_utils.py:545] 2021-07-19 17:34:21,672 >> loading configuration file https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3b60ce79e9e99338ab683eee75e146acc49852f2f34f2c5610dfd78221a176ee.33b0b03a5bf5e640494a22a3aa4909c661effc0fa0e186b1513b17d9b058ca59\n", + "[INFO|configuration_utils.py:581] 2021-07-19 17:34:21,672 >> Model config RobertaConfig {\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.0,\n", + " \"bos_token_id\": 0,\n", + " \"eos_token_id\": 2,\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.0,\n", + " \"hidden_size\": 768,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50262\n", + "}\n", + "\n", + "[INFO|modeling_utils.py:1271] 2021-07-19 17:34:22,484 >> loading weights file https://huggingface.co/BSC-TeMU/roberta-base-bne/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/a337644e63cac89729bd4fd067c987d2eb61b4b398d17d0ce0973c31dd76d2b0.c86d60e89da68465cb73e129befe8209faa3ac57b9aa272b87db45ba1f619582\n", + "[WARNING|modeling_utils.py:1502] 2021-07-19 17:34:30,475 >> Some weights of the model checkpoint at BSC-TeMU/roberta-base-bne were not used when initializing RobertaForTokenClassification: ['lm_head.decoder.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.bias']\n", + "- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:1513] 2021-07-19 17:34:30,475 >> Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at BSC-TeMU/roberta-base-bne and are newly initialized: ['classifier.bias', 'classifier.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "Running tokenizer on train dataset: 0% 0/9 [00:00> The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: pos_tags, ner_tags, id, tokens.\n", + "[INFO|trainer.py:1162] 2021-07-19 17:34:42,456 >> ***** Running training *****\n", + "[INFO|trainer.py:1163] 2021-07-19 17:34:42,456 >> Num examples = 8324\n", + "[INFO|trainer.py:1164] 2021-07-19 17:34:42,456 >> Num Epochs = 5\n", + "[INFO|trainer.py:1165] 2021-07-19 17:34:42,456 >> Instantaneous batch size per device = 8\n", + "[INFO|trainer.py:1166] 2021-07-19 17:34:42,456 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "[INFO|trainer.py:1167] 2021-07-19 17:34:42,456 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1168] 2021-07-19 17:34:42,456 >> Total optimization steps = 5205\n", + "[INFO|integrations.py:417] 2021-07-19 17:34:42,487 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mversae\u001b[0m (use `wandb login --relogin` to force relogin)\n", + "2021-07-19 17:34:44.164024: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m./outputs\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval/runs/1vhkvz54\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in /content/wandb/run-20210719_173442-1vhkvz54\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run `wandb offline` to turn off syncing.\n", + "\n", + "{'loss': 0.3477, 'learning_rate': 4.5196926032660905e-05, 'epoch': 0.48}\n", + " 10% 500/5205 [02:50<30:07, 2.60it/s][INFO|trainer.py:1917] 2021-07-19 17:37:35,757 >> Saving model checkpoint to ./outputs/checkpoint-500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 17:37:35,764 >> Configuration saved in ./outputs/checkpoint-500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 17:37:37,349 >> Model weights saved in ./outputs/checkpoint-500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 17:37:37,351 >> tokenizer config file saved in ./outputs/checkpoint-500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 17:37:37,351 >> Special tokens file saved in ./outputs/checkpoint-500/special_tokens_map.json\n", + "{'loss': 0.1027, 'learning_rate': 4.039385206532181e-05, 'epoch': 0.96}\n", + " 19% 1000/5205 [05:45<26:49, 2.61it/s][INFO|trainer.py:1917] 2021-07-19 17:40:31,168 >> Saving model checkpoint to ./outputs/checkpoint-1000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 17:40:31,170 >> Configuration saved in ./outputs/checkpoint-1000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 17:40:32,863 >> Model weights saved in ./outputs/checkpoint-1000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 17:40:32,872 >> tokenizer config file saved in ./outputs/checkpoint-1000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 17:40:32,876 >> Special tokens file saved in ./outputs/checkpoint-1000/special_tokens_map.json\n", + "{'loss': 0.0581, 'learning_rate': 3.5590778097982716e-05, 'epoch': 1.44}\n", + " 29% 1500/5205 [08:39<23:37, 2.61it/s][INFO|trainer.py:1917] 2021-07-19 17:43:25,078 >> Saving model checkpoint to ./outputs/checkpoint-1500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 17:43:25,080 >> Configuration saved in ./outputs/checkpoint-1500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 17:43:26,548 >> Model weights saved in ./outputs/checkpoint-1500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 17:43:26,551 >> tokenizer config file saved in ./outputs/checkpoint-1500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 17:43:26,551 >> Special tokens file saved in ./outputs/checkpoint-1500/special_tokens_map.json\n", + " 38% 2000/5205 [11:34<20:26, 2.61it/s]{'loss': 0.0528, 'learning_rate': 3.078770413064361e-05, 'epoch': 1.92}\n", + " 38% 2000/5205 [11:34<20:26, 2.61it/s][INFO|trainer.py:1917] 2021-07-19 17:46:19,766 >> Saving model checkpoint to ./outputs/checkpoint-2000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 17:46:19,768 >> Configuration saved in ./outputs/checkpoint-2000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 17:46:21,211 >> Model weights saved in ./outputs/checkpoint-2000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 17:46:21,215 >> tokenizer config file saved in ./outputs/checkpoint-2000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 17:46:21,216 >> Special tokens file saved in ./outputs/checkpoint-2000/special_tokens_map.json\n", + "{'loss': 0.0262, 'learning_rate': 2.5984630163304517e-05, 'epoch': 2.4}\n", + " 48% 2500/5205 [14:29<17:19, 2.60it/s][INFO|trainer.py:1917] 2021-07-19 17:49:14,544 >> Saving model checkpoint to ./outputs/checkpoint-2500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 17:49:14,547 >> Configuration saved in ./outputs/checkpoint-2500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 17:49:16,052 >> Model weights saved in ./outputs/checkpoint-2500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 17:49:16,055 >> tokenizer config file saved in ./outputs/checkpoint-2500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 17:49:16,056 >> Special tokens file saved in ./outputs/checkpoint-2500/special_tokens_map.json\n", + "{'loss': 0.0222, 'learning_rate': 2.118155619596542e-05, 'epoch': 2.88}\n", + " 58% 3000/5205 [17:23<13:59, 2.63it/s][INFO|trainer.py:1917] 2021-07-19 17:52:08,771 >> Saving model checkpoint to ./outputs/checkpoint-3000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 17:52:08,774 >> Configuration saved in ./outputs/checkpoint-3000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 17:52:10,316 >> Model weights saved in ./outputs/checkpoint-3000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 17:52:10,319 >> tokenizer config file saved in ./outputs/checkpoint-3000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 17:52:10,320 >> Special tokens file saved in ./outputs/checkpoint-3000/special_tokens_map.json\n", + "{'loss': 0.012, 'learning_rate': 1.6378482228626322e-05, 'epoch': 3.36}\n", + " 67% 3500/5205 [20:17<10:46, 2.64it/s][INFO|trainer.py:1917] 2021-07-19 17:55:02,991 >> Saving model checkpoint to ./outputs/checkpoint-3500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 17:55:02,993 >> Configuration saved in ./outputs/checkpoint-3500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 17:55:04,533 >> Model weights saved in ./outputs/checkpoint-3500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 17:55:04,537 >> tokenizer config file saved in ./outputs/checkpoint-3500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 17:55:04,538 >> Special tokens file saved in ./outputs/checkpoint-3500/special_tokens_map.json\n", + " 77% 4000/5205 [23:11<07:37, 2.64it/s]{'loss': 0.0073, 'learning_rate': 1.1575408261287224e-05, 'epoch': 3.84}\n", + "[INFO|trainer.py:1917] 2021-07-19 17:57:57,141 >> Saving model checkpoint to ./outputs/checkpoint-4000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 17:57:57,144 >> Configuration saved in ./outputs/checkpoint-4000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 17:57:58,725 >> Model weights saved in ./outputs/checkpoint-4000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 17:57:58,728 >> tokenizer config file saved in ./outputs/checkpoint-4000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 17:57:58,753 >> Special tokens file saved in ./outputs/checkpoint-4000/special_tokens_map.json\n", + "{'loss': 0.0047, 'learning_rate': 6.7723342939481265e-06, 'epoch': 4.32}\n", + " 86% 4500/5205 [26:05<04:29, 2.61it/s][INFO|trainer.py:1917] 2021-07-19 18:00:51,142 >> Saving model checkpoint to ./outputs/checkpoint-4500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 18:00:51,145 >> Configuration saved in ./outputs/checkpoint-4500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 18:00:52,911 >> Model weights saved in ./outputs/checkpoint-4500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 18:00:52,922 >> tokenizer config file saved in ./outputs/checkpoint-4500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 18:00:52,923 >> Special tokens file saved in ./outputs/checkpoint-4500/special_tokens_map.json\n", + " 96% 5000/5205 [28:59<01:17, 2.64it/s]{'loss': 0.0025, 'learning_rate': 1.96926032660903e-06, 'epoch': 4.8}\n", + "[INFO|trainer.py:1917] 2021-07-19 18:03:44,964 >> Saving model checkpoint to ./outputs/checkpoint-5000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 18:03:44,967 >> Configuration saved in ./outputs/checkpoint-5000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 18:03:46,368 >> Model weights saved in ./outputs/checkpoint-5000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 18:03:46,372 >> tokenizer config file saved in ./outputs/checkpoint-5000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 18:03:46,374 >> Special tokens file saved in ./outputs/checkpoint-5000/special_tokens_map.json\n", + "100% 5205/5205 [30:14<00:00, 3.29it/s][INFO|trainer.py:1358] 2021-07-19 18:04:59,740 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "{'train_runtime': 1817.2838, 'train_samples_per_second': 22.902, 'train_steps_per_second': 2.864, 'train_loss': 0.06121874419000719, 'epoch': 5.0}\n", + "100% 5205/5205 [30:14<00:00, 2.87it/s]\n", + "[INFO|trainer.py:1917] 2021-07-19 18:04:59,756 >> Saving model checkpoint to ./outputs\n", + "[INFO|configuration_utils.py:379] 2021-07-19 18:04:59,766 >> Configuration saved in ./outputs/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 18:05:01,318 >> Model weights saved in ./outputs/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 18:05:01,319 >> tokenizer config file saved in ./outputs/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 18:05:01,320 >> Special tokens file saved in ./outputs/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 5.0\n", + " train_loss = 0.0612\n", + " train_runtime = 0:30:17.28\n", + " train_samples = 8324\n", + " train_samples_per_second = 22.902\n", + " train_steps_per_second = 2.864\n", + "[INFO|trainer.py:522] 2021-07-19 18:05:02,114 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: pos_tags, ner_tags, id, tokens.\n", + "07/19/2021 18:05:02 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:2163] 2021-07-19 18:05:02,132 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2165] 2021-07-19 18:05:02,132 >> Num examples = 1916\n", + "[INFO|trainer.py:2168] 2021-07-19 18:05:02,133 >> Batch size = 8\n", + "100% 239/240 [00:25<00:00, 7.72it/s]/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: NC seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fpa seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fpt seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fc seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Z seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: NP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fp seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fg seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DA seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: AQ seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: SP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PR seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: RG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: CS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: CC seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fe seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fx seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DD seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PD seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: P0 seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMM seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fd seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: AO seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: RN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fat seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Faa seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DE seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fz seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PT seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PX seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DT seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fh seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fs seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSM seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Y seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fia seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "100% 240/240 [00:28<00:00, 8.30it/s]\n", + "***** eval metrics *****\n", + " epoch = 5.0\n", + " eval_accuracy = 0.9707\n", + " eval_f1 = 0.9659\n", + " eval_loss = 0.1581\n", + " eval_precision = 0.9671\n", + " eval_recall = 0.9647\n", + " eval_runtime = 0:00:29.01\n", + " eval_samples = 1916\n", + " eval_samples_per_second = 66.035\n", + " eval_steps_per_second = 8.272\n", + "\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish, PID 1399\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Program ended successfully.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find user logs for this run at: /content/wandb/run-20210719_173442-1vhkvz54/logs/debug.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find internal logs for this run at: /content/wandb/run-20210719_173442-1vhkvz54/logs/debug-internal.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.0025\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 5205\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime 1849\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp 1626717931\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step 11\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime 1817.2838\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second 22.902\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second 2.864\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos 1.08808736772096e+16\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss 0.06122\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 0.15812\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision 0.96707\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall 0.9647\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 0.96588\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy 0.97066\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 29.015\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 66.035\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 8.272\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▃▂▂▁▁▁▁▁▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate █▇▆▆▅▄▃▃▂▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step ▁▂▂▃▄▄▅▅▆▇▇█\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced \u001b[33m./outputs\u001b[0m: \u001b[34mhttps://wandb.ai/versae/bertin-eval/runs/1vhkvz54\u001b[0m\n", + "2021-07-19 18:05:43.870368: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "07/19/2021 18:05:46 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "07/19/2021 18:05:46 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_find_unused_parameters=None,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=False,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_steps=500,\n", + "evaluation_strategy=IntervalStrategy.NO,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "gradient_accumulation_steps=1,\n", + "greater_is_better=None,\n", + "group_by_length=False,\n", + "ignore_data_skip=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=5e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=False,\n", + "local_rank=-1,\n", + "log_level=-1,\n", + "log_level_replica=-1,\n", + "log_on_each_node=True,\n", + "logging_dir=./outputs/runs/Jul19_18-05-46_eab5184de9ff,\n", + "logging_first_step=False,\n", + "logging_steps=500,\n", + "logging_strategy=IntervalStrategy.STEPS,\n", + "lr_scheduler_type=SchedulerType.LINEAR,\n", + "max_grad_norm=1.0,\n", + "max_steps=-1,\n", + "metric_for_best_model=None,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=5.0,\n", + "output_dir=./outputs,\n", + "overwrite_output_dir=True,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=outputs,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=None,\n", + "remove_unused_columns=True,\n", + "report_to=['tensorboard', 'wandb'],\n", + "resume_from_checkpoint=None,\n", + "run_name=./outputs,\n", + "save_on_each_node=False,\n", + "save_steps=500,\n", + "save_strategy=IntervalStrategy.STEPS,\n", + "save_total_limit=None,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_legacy_prediction_loop=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + ")\n", + "07/19/2021 18:05:48 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 18:05:48 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 18:05:48 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 18:05:48 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 18:05:48 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 18:05:49 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 18:05:49 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 18:05:49 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 18:05:49 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 18:05:49 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 18:05:49 - INFO - datasets.info - Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 18:05:49 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "07/19/2021 18:05:49 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 18:05:49 - WARNING - datasets.builder - Reusing dataset conll2002 (/root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5)\n", + "07/19/2021 18:05:49 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "100% 3/3 [00:00<00:00, 25.89it/s]\n", + "[INFO|configuration_utils.py:545] 2021-07-19 18:05:50,327 >> loading configuration file https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/cb7cedb04246e225d56ba26d207f1d1809b31a9bbe9b63103371d835c6ac0502.f4e4777229bac528fa2a8d4833e2ef53624e985ebde0fd527064a5cc7c50832b\n", + "[INFO|configuration_utils.py:581] 2021-07-19 18:05:50,328 >> Model config BertConfig {\n", + " \"_name_or_path\": \"dccuchile/bert-base-spanish-wwm-cased\",\n", + " \"architectures\": [\n", + " \"BertForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"finetuning_task\": \"pos\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"id2label\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8,\n", + " \"9\": 9,\n", + " \"10\": 10,\n", + " \"11\": 11,\n", + " \"12\": 12,\n", + " \"13\": 13,\n", + " \"14\": 14,\n", + " \"15\": 15,\n", + " \"16\": 16,\n", + " \"17\": 17,\n", + " \"18\": 18,\n", + " \"19\": 19,\n", + " \"20\": 20,\n", + " \"21\": 21,\n", + " \"22\": 22,\n", + " \"23\": 23,\n", + " \"24\": 24,\n", + " \"25\": 25,\n", + " \"26\": 26,\n", + " \"27\": 27,\n", + " \"28\": 28,\n", + " \"29\": 29,\n", + " \"30\": 30,\n", + " \"31\": 31,\n", + " \"32\": 32,\n", + " \"33\": 33,\n", + " \"34\": 34,\n", + " \"35\": 35,\n", + " \"36\": 36,\n", + " \"37\": 37,\n", + " \"38\": 38,\n", + " \"39\": 39,\n", + " \"40\": 40,\n", + " \"41\": 41,\n", + " \"42\": 42,\n", + " \"43\": 43,\n", + " \"44\": 44,\n", + " \"45\": 45,\n", + " \"46\": 46,\n", + " \"47\": 47,\n", + " \"48\": 48,\n", + " \"49\": 49,\n", + " \"50\": 50,\n", + " \"51\": 51,\n", + " \"52\": 52,\n", + " \"53\": 53,\n", + " \"54\": 54,\n", + " \"55\": 55,\n", + " \"56\": 56,\n", + " \"57\": 57,\n", + " \"58\": 58,\n", + " \"59\": 59\n", + " },\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"label2id\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8,\n", + " \"9\": 9,\n", + " \"10\": 10,\n", + " \"11\": 11,\n", + " \"12\": 12,\n", + " \"13\": 13,\n", + " \"14\": 14,\n", + " \"15\": 15,\n", + " \"16\": 16,\n", + " \"17\": 17,\n", + " \"18\": 18,\n", + " \"19\": 19,\n", + " \"20\": 20,\n", + " \"21\": 21,\n", + " \"22\": 22,\n", + " \"23\": 23,\n", + " \"24\": 24,\n", + " \"25\": 25,\n", + " \"26\": 26,\n", + " \"27\": 27,\n", + " \"28\": 28,\n", + " \"29\": 29,\n", + " \"30\": 30,\n", + " \"31\": 31,\n", + " \"32\": 32,\n", + " \"33\": 33,\n", + " \"34\": 34,\n", + " \"35\": 35,\n", + " \"36\": 36,\n", + " \"37\": 37,\n", + " \"38\": 38,\n", + " \"39\": 39,\n", + " \"40\": 40,\n", + " \"41\": 41,\n", + " \"42\": 42,\n", + " \"43\": 43,\n", + " \"44\": 44,\n", + " \"45\": 45,\n", + " \"46\": 46,\n", + " \"47\": 47,\n", + " \"48\": 48,\n", + " \"49\": 49,\n", + " \"50\": 50,\n", + " \"51\": 51,\n", + " \"52\": 52,\n", + " \"53\": 53,\n", + " \"54\": 54,\n", + " \"55\": 55,\n", + " \"56\": 56,\n", + " \"57\": 57,\n", + " \"58\": 58,\n", + " \"59\": 59\n", + " },\n", + " \"layer_norm_eps\": 1e-12,\n", + " \"max_position_embeddings\": 512,\n", + " \"model_type\": \"bert\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"output_past\": true,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 2,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 31002\n", + "}\n", + "\n", + "[INFO|configuration_utils.py:545] 2021-07-19 18:05:51,786 >> loading configuration file https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/cb7cedb04246e225d56ba26d207f1d1809b31a9bbe9b63103371d835c6ac0502.f4e4777229bac528fa2a8d4833e2ef53624e985ebde0fd527064a5cc7c50832b\n", + "[INFO|configuration_utils.py:581] 2021-07-19 18:05:51,787 >> Model config BertConfig {\n", + " \"_name_or_path\": \"dccuchile/bert-base-spanish-wwm-cased\",\n", + " \"architectures\": [\n", + " \"BertForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"layer_norm_eps\": 1e-12,\n", + " \"max_position_embeddings\": 512,\n", + " \"model_type\": \"bert\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"output_past\": true,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 2,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 31002\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 18:05:55,430 >> loading file https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/vocab.txt from cache at /root/.cache/huggingface/transformers/6761cd0c3d282272f598fcc1fa8c4ecfff8c18762ec8acb40f9cbb562cb0901e.6587bde86239957281af55b2f7e564df111a2b4f9dfc0ad884f13ea7106e4dfb\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 18:05:55,430 >> loading file https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/44de7af89c157bf67367a71105165d92bebe0585543739a918e3870d25484c27.6a099cd4b12bf7db174fffe48b004eb919c325f108e0c36176a0fe0ad1848d31\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 18:05:55,431 >> loading file https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 18:05:55,431 >> loading file https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/9848a00af462c42dfb4ec88ef438fbab5256330f7f6f50badc48d277f9367d49.f982506b52498d4adb4bd491f593dc92b2ef6be61bfdbe9d30f53f963f9f5b66\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 18:05:55,431 >> loading file https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/ca34e6c1251888a8ed98da2a454f869d28e3438eef67c2f93aa8133459ac08a3.0e90f656d0426b15b4927d1fe8ca5ec4c2e7b0d0e878c9153c3ddc6ed9bbed3c\n", + "[INFO|configuration_utils.py:545] 2021-07-19 18:05:56,161 >> loading configuration file https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/cb7cedb04246e225d56ba26d207f1d1809b31a9bbe9b63103371d835c6ac0502.f4e4777229bac528fa2a8d4833e2ef53624e985ebde0fd527064a5cc7c50832b\n", + "[INFO|configuration_utils.py:581] 2021-07-19 18:05:56,161 >> Model config BertConfig {\n", + " \"_name_or_path\": \"dccuchile/bert-base-spanish-wwm-cased\",\n", + " \"architectures\": [\n", + " \"BertForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"layer_norm_eps\": 1e-12,\n", + " \"max_position_embeddings\": 512,\n", + " \"model_type\": \"bert\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"output_past\": true,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 2,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 31002\n", + "}\n", + "\n", + "[INFO|modeling_utils.py:1271] 2021-07-19 18:05:56,916 >> loading weights file https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/52382cbe7c1587c6b588daa81eaf247c5e2ad073d42b52192a8cd4202e7429b6.a88ccd19b1f271e63b6a901510804e6c0318089355c471334fe8b71b316a30ab\n", + "[WARNING|modeling_utils.py:1502] 2021-07-19 18:06:03,944 >> Some weights of the model checkpoint at dccuchile/bert-base-spanish-wwm-cased were not used when initializing BertForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']\n", + "- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:1513] 2021-07-19 18:06:03,944 >> Some weights of BertForTokenClassification were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['classifier.weight', 'classifier.bias']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "Running tokenizer on train dataset: 0% 0/9 [00:00> The following columns in the training set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: pos_tags, ner_tags, tokens, id.\n", + "[INFO|trainer.py:1162] 2021-07-19 18:06:15,814 >> ***** Running training *****\n", + "[INFO|trainer.py:1163] 2021-07-19 18:06:15,814 >> Num examples = 8324\n", + "[INFO|trainer.py:1164] 2021-07-19 18:06:15,814 >> Num Epochs = 5\n", + "[INFO|trainer.py:1165] 2021-07-19 18:06:15,814 >> Instantaneous batch size per device = 8\n", + "[INFO|trainer.py:1166] 2021-07-19 18:06:15,814 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "[INFO|trainer.py:1167] 2021-07-19 18:06:15,814 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1168] 2021-07-19 18:06:15,814 >> Total optimization steps = 5205\n", + "[INFO|integrations.py:417] 2021-07-19 18:06:15,833 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mversae\u001b[0m (use `wandb login --relogin` to force relogin)\n", + "2021-07-19 18:06:17.537370: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m./outputs\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval/runs/1felky5r\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in /content/wandb/run-20210719_180616-1felky5r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run `wandb offline` to turn off syncing.\n", + "\n", + " 10% 500/5205 [02:56<31:10, 2.51it/s]{'loss': 0.2778, 'learning_rate': 4.5196926032660905e-05, 'epoch': 0.48}\n", + " 10% 500/5205 [02:56<31:10, 2.51it/s][INFO|trainer.py:1917] 2021-07-19 18:09:14,980 >> Saving model checkpoint to ./outputs/checkpoint-500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 18:09:14,983 >> Configuration saved in ./outputs/checkpoint-500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 18:09:16,843 >> Model weights saved in ./outputs/checkpoint-500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 18:09:16,844 >> tokenizer config file saved in ./outputs/checkpoint-500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 18:09:16,845 >> Special tokens file saved in ./outputs/checkpoint-500/special_tokens_map.json\n", + " 19% 1000/5205 [05:57<27:27, 2.55it/s][INFO|trainer.py:1917] 2021-07-19 18:12:16,139 >> Saving model checkpoint to ./outputs/checkpoint-1000\n", + "{'loss': 0.103, 'learning_rate': 4.039385206532181e-05, 'epoch': 0.96}\n", + "[INFO|configuration_utils.py:379] 2021-07-19 18:12:16,147 >> Configuration saved in ./outputs/checkpoint-1000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 18:12:18,117 >> Model weights saved in ./outputs/checkpoint-1000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 18:12:18,118 >> tokenizer config file saved in ./outputs/checkpoint-1000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 18:12:18,118 >> Special tokens file saved in ./outputs/checkpoint-1000/special_tokens_map.json\n", + "{'loss': 0.0649, 'learning_rate': 3.5590778097982716e-05, 'epoch': 1.44}\n", + " 29% 1500/5205 [08:58<24:36, 2.51it/s][INFO|trainer.py:1917] 2021-07-19 18:15:16,993 >> Saving model checkpoint to ./outputs/checkpoint-1500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 18:15:16,995 >> Configuration saved in ./outputs/checkpoint-1500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 18:15:18,905 >> Model weights saved in ./outputs/checkpoint-1500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 18:15:18,906 >> tokenizer config file saved in ./outputs/checkpoint-1500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 18:15:18,907 >> Special tokens file saved in ./outputs/checkpoint-1500/special_tokens_map.json\n", + "{'loss': 0.058, 'learning_rate': 3.078770413064361e-05, 'epoch': 1.92}\n", + " 38% 2000/5205 [11:58<21:08, 2.53it/s][INFO|trainer.py:1917] 2021-07-19 18:18:17,862 >> Saving model checkpoint to ./outputs/checkpoint-2000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 18:18:17,864 >> Configuration saved in ./outputs/checkpoint-2000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 18:18:19,484 >> Model weights saved in ./outputs/checkpoint-2000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 18:18:19,485 >> tokenizer config file saved in ./outputs/checkpoint-2000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 18:18:19,485 >> Special tokens file saved in ./outputs/checkpoint-2000/special_tokens_map.json\n", + "{'loss': 0.0356, 'learning_rate': 2.5984630163304517e-05, 'epoch': 2.4}\n", + " 48% 2500/5205 [14:58<17:53, 2.52it/s][INFO|trainer.py:1917] 2021-07-19 18:21:17,596 >> Saving model checkpoint to ./outputs/checkpoint-2500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 18:21:17,599 >> Configuration saved in ./outputs/checkpoint-2500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 18:21:19,253 >> Model weights saved in ./outputs/checkpoint-2500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 18:21:19,254 >> tokenizer config file saved in ./outputs/checkpoint-2500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 18:21:19,254 >> Special tokens file saved in ./outputs/checkpoint-2500/special_tokens_map.json\n", + " 58% 3000/5205 [17:58<14:35, 2.52it/s]{'loss': 0.0308, 'learning_rate': 2.118155619596542e-05, 'epoch': 2.88}\n", + " 58% 3000/5205 [17:58<14:35, 2.52it/s][INFO|trainer.py:1917] 2021-07-19 18:24:17,180 >> Saving model checkpoint to ./outputs/checkpoint-3000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 18:24:17,182 >> Configuration saved in ./outputs/checkpoint-3000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 18:24:18,844 >> Model weights saved in ./outputs/checkpoint-3000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 18:24:18,845 >> tokenizer config file saved in ./outputs/checkpoint-3000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 18:24:18,845 >> Special tokens file saved in ./outputs/checkpoint-3000/special_tokens_map.json\n", + " 67% 3500/5205 [20:57<11:15, 2.52it/s]{'loss': 0.0189, 'learning_rate': 1.6378482228626322e-05, 'epoch': 3.36}\n", + " 67% 3500/5205 [20:57<11:15, 2.52it/s][INFO|trainer.py:1917] 2021-07-19 18:27:16,579 >> Saving model checkpoint to ./outputs/checkpoint-3500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 18:27:16,581 >> Configuration saved in ./outputs/checkpoint-3500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 18:27:18,239 >> Model weights saved in ./outputs/checkpoint-3500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 18:27:18,240 >> tokenizer config file saved in ./outputs/checkpoint-3500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 18:27:18,240 >> Special tokens file saved in ./outputs/checkpoint-3500/special_tokens_map.json\n", + " 77% 4000/5205 [23:57<07:53, 2.54it/s]{'loss': 0.0157, 'learning_rate': 1.1575408261287224e-05, 'epoch': 3.84}\n", + "[INFO|trainer.py:1917] 2021-07-19 18:30:16,127 >> Saving model checkpoint to ./outputs/checkpoint-4000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 18:30:16,129 >> Configuration saved in ./outputs/checkpoint-4000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 18:30:17,789 >> Model weights saved in ./outputs/checkpoint-4000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 18:30:17,790 >> tokenizer config file saved in ./outputs/checkpoint-4000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 18:30:17,790 >> Special tokens file saved in ./outputs/checkpoint-4000/special_tokens_map.json\n", + "{'loss': 0.0101, 'learning_rate': 6.7723342939481265e-06, 'epoch': 4.32}\n", + " 86% 4500/5205 [26:56<04:36, 2.55it/s][INFO|trainer.py:1917] 2021-07-19 18:33:15,138 >> Saving model checkpoint to ./outputs/checkpoint-4500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 18:33:15,140 >> Configuration saved in ./outputs/checkpoint-4500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 18:33:16,788 >> Model weights saved in ./outputs/checkpoint-4500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 18:33:16,789 >> tokenizer config file saved in ./outputs/checkpoint-4500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 18:33:16,789 >> Special tokens file saved in ./outputs/checkpoint-4500/special_tokens_map.json\n", + " 96% 5000/5205 [29:54<01:20, 2.56it/s]{'loss': 0.0065, 'learning_rate': 1.96926032660903e-06, 'epoch': 4.8}\n", + "[INFO|trainer.py:1917] 2021-07-19 18:36:13,857 >> Saving model checkpoint to ./outputs/checkpoint-5000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 18:36:13,864 >> Configuration saved in ./outputs/checkpoint-5000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 18:36:15,563 >> Model weights saved in ./outputs/checkpoint-5000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 18:36:15,564 >> tokenizer config file saved in ./outputs/checkpoint-5000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 18:36:15,565 >> Special tokens file saved in ./outputs/checkpoint-5000/special_tokens_map.json\n", + "100% 5205/5205 [31:11<00:00, 3.20it/s][INFO|trainer.py:1358] 2021-07-19 18:37:30,071 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "100% 5205/5205 [31:11<00:00, 3.20it/s]{'train_runtime': 1874.2569, 'train_samples_per_second': 22.206, 'train_steps_per_second': 2.777, 'train_loss': 0.059975996713702434, 'epoch': 5.0}\n", + "100% 5205/5205 [31:11<00:00, 2.78it/s]\n", + "[INFO|trainer.py:1917] 2021-07-19 18:37:30,085 >> Saving model checkpoint to ./outputs\n", + "[INFO|configuration_utils.py:379] 2021-07-19 18:37:30,091 >> Configuration saved in ./outputs/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 18:37:31,882 >> Model weights saved in ./outputs/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 18:37:31,883 >> tokenizer config file saved in ./outputs/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 18:37:31,883 >> Special tokens file saved in ./outputs/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 5.0\n", + " train_loss = 0.06\n", + " train_runtime = 0:31:14.25\n", + " train_samples = 8324\n", + " train_samples_per_second = 22.206\n", + " train_steps_per_second = 2.777\n", + "07/19/2021 18:37:31 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:522] 2021-07-19 18:37:31,977 >> The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: pos_tags, ner_tags, tokens, id.\n", + "[INFO|trainer.py:2163] 2021-07-19 18:37:32,041 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2165] 2021-07-19 18:37:32,041 >> Num examples = 1916\n", + "[INFO|trainer.py:2168] 2021-07-19 18:37:32,042 >> Batch size = 8\n", + "100% 239/240 [00:25<00:00, 9.17it/s]/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: NC seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fpa seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fpt seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fc seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Z seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: NP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fp seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fg seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DA seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: AQ seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: SP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PR seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: RG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: CS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: CC seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fe seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fx seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DD seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PD seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: P0 seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMM seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fd seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: AO seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: RN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fat seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Faa seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DE seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fz seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PT seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PX seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DT seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fh seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fs seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSM seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Y seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fia seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "100% 240/240 [00:29<00:00, 8.16it/s]\n", + "***** eval metrics *****\n", + " epoch = 5.0\n", + " eval_accuracy = 0.97\n", + " eval_f1 = 0.9642\n", + " eval_loss = 0.1621\n", + " eval_precision = 0.965\n", + " eval_recall = 0.9634\n", + " eval_runtime = 0:00:29.52\n", + " eval_samples = 1916\n", + " eval_samples_per_second = 64.885\n", + " eval_steps_per_second = 8.128\n", + "\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish, PID 1463\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Program ended successfully.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find user logs for this run at: /content/wandb/run-20210719_180616-1felky5r/logs/debug.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find internal logs for this run at: /content/wandb/run-20210719_180616-1felky5r/logs/debug-internal.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.0065\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 5205\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime 1905\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp 1626719881\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step 11\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime 1874.2569\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second 22.206\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second 2.777\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos 1.08808736772096e+16\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss 0.05998\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 0.16208\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision 0.96501\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall 0.9634\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 0.9642\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy 0.96998\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 29.5293\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 64.885\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 8.128\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▃▃▂▂▂▁▁▁▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate █▇▆▆▅▄▃▃▂▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step ▁▂▂▃▄▄▅▅▆▇▇█\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced \u001b[33m./outputs\u001b[0m: \u001b[34mhttps://wandb.ai/versae/bertin-eval/runs/1felky5r\u001b[0m\n", + "2021-07-19 18:38:13.403388: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "07/19/2021 18:38:16 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "07/19/2021 18:38:16 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_find_unused_parameters=None,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=False,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_steps=500,\n", + "evaluation_strategy=IntervalStrategy.NO,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "gradient_accumulation_steps=1,\n", + "greater_is_better=None,\n", + "group_by_length=False,\n", + "ignore_data_skip=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=5e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=False,\n", + "local_rank=-1,\n", + "log_level=-1,\n", + "log_level_replica=-1,\n", + "log_on_each_node=True,\n", + "logging_dir=./outputs/runs/Jul19_18-38-16_eab5184de9ff,\n", + "logging_first_step=False,\n", + "logging_steps=500,\n", + "logging_strategy=IntervalStrategy.STEPS,\n", + "lr_scheduler_type=SchedulerType.LINEAR,\n", + "max_grad_norm=1.0,\n", + "max_steps=-1,\n", + "metric_for_best_model=None,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=5.0,\n", + "output_dir=./outputs,\n", + "overwrite_output_dir=True,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=outputs,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=None,\n", + "remove_unused_columns=True,\n", + "report_to=['tensorboard', 'wandb'],\n", + "resume_from_checkpoint=None,\n", + "run_name=./outputs,\n", + "save_on_each_node=False,\n", + "save_steps=500,\n", + "save_strategy=IntervalStrategy.STEPS,\n", + "save_total_limit=None,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_legacy_prediction_loop=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + ")\n", + "07/19/2021 18:38:17 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 18:38:17 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 18:38:17 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 18:38:17 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 18:38:17 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 18:38:18 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002\n", + "07/19/2021 18:38:18 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 18:38:18 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.py\n", + "07/19/2021 18:38:18 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/dataset_infos.json\n", + "07/19/2021 18:38:18 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/conll2002/conll2002.py at /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5/conll2002.json\n", + "07/19/2021 18:38:18 - INFO - datasets.info - Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/conll2002/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 18:38:18 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "07/19/2021 18:38:18 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "07/19/2021 18:38:18 - WARNING - datasets.builder - Reusing dataset conll2002 (/root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5)\n", + "07/19/2021 18:38:18 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/conll2002/es/1.0.0/a3a8a8612caf57271f5b35c5ae1dd25f99ddb9efb9c1667abaa70ede33e863e5\n", + "100% 3/3 [00:00<00:00, 26.77it/s]\n", + "[INFO|configuration_utils.py:545] 2021-07-19 18:38:19,662 >> loading configuration file https://huggingface.co/bert-base-multilingual-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/6c4a5d81a58c9791cdf76a09bce1b5abfb9cf958aebada51200f4515403e5d08.0fe59f3f4f1335dadeb4bce8b8146199d9083512b50d07323c1c319f96df450c\n", + "[INFO|configuration_utils.py:581] 2021-07-19 18:38:19,664 >> Model config BertConfig {\n", + " \"architectures\": [\n", + " \"BertForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"directionality\": \"bidi\",\n", + " \"finetuning_task\": \"pos\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"id2label\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8,\n", + " \"9\": 9,\n", + " \"10\": 10,\n", + " \"11\": 11,\n", + " \"12\": 12,\n", + " \"13\": 13,\n", + " \"14\": 14,\n", + " \"15\": 15,\n", + " \"16\": 16,\n", + " \"17\": 17,\n", + " \"18\": 18,\n", + " \"19\": 19,\n", + " \"20\": 20,\n", + " \"21\": 21,\n", + " \"22\": 22,\n", + " \"23\": 23,\n", + " \"24\": 24,\n", + " \"25\": 25,\n", + " \"26\": 26,\n", + " \"27\": 27,\n", + " \"28\": 28,\n", + " \"29\": 29,\n", + " \"30\": 30,\n", + " \"31\": 31,\n", + " \"32\": 32,\n", + " \"33\": 33,\n", + " \"34\": 34,\n", + " \"35\": 35,\n", + " \"36\": 36,\n", + " \"37\": 37,\n", + " \"38\": 38,\n", + " \"39\": 39,\n", + " \"40\": 40,\n", + " \"41\": 41,\n", + " \"42\": 42,\n", + " \"43\": 43,\n", + " \"44\": 44,\n", + " \"45\": 45,\n", + " \"46\": 46,\n", + " \"47\": 47,\n", + " \"48\": 48,\n", + " \"49\": 49,\n", + " \"50\": 50,\n", + " \"51\": 51,\n", + " \"52\": 52,\n", + " \"53\": 53,\n", + " \"54\": 54,\n", + " \"55\": 55,\n", + " \"56\": 56,\n", + " \"57\": 57,\n", + " \"58\": 58,\n", + " \"59\": 59\n", + " },\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"label2id\": {\n", + " \"0\": 0,\n", + " \"1\": 1,\n", + " \"2\": 2,\n", + " \"3\": 3,\n", + " \"4\": 4,\n", + " \"5\": 5,\n", + " \"6\": 6,\n", + " \"7\": 7,\n", + " \"8\": 8,\n", + " \"9\": 9,\n", + " \"10\": 10,\n", + " \"11\": 11,\n", + " \"12\": 12,\n", + " \"13\": 13,\n", + " \"14\": 14,\n", + " \"15\": 15,\n", + " \"16\": 16,\n", + " \"17\": 17,\n", + " \"18\": 18,\n", + " \"19\": 19,\n", + " \"20\": 20,\n", + " \"21\": 21,\n", + " \"22\": 22,\n", + " \"23\": 23,\n", + " \"24\": 24,\n", + " \"25\": 25,\n", + " \"26\": 26,\n", + " \"27\": 27,\n", + " \"28\": 28,\n", + " \"29\": 29,\n", + " \"30\": 30,\n", + " \"31\": 31,\n", + " \"32\": 32,\n", + " \"33\": 33,\n", + " \"34\": 34,\n", + " \"35\": 35,\n", + " \"36\": 36,\n", + " \"37\": 37,\n", + " \"38\": 38,\n", + " \"39\": 39,\n", + " \"40\": 40,\n", + " \"41\": 41,\n", + " \"42\": 42,\n", + " \"43\": 43,\n", + " \"44\": 44,\n", + " \"45\": 45,\n", + " \"46\": 46,\n", + " \"47\": 47,\n", + " \"48\": 48,\n", + " \"49\": 49,\n", + " \"50\": 50,\n", + " \"51\": 51,\n", + " \"52\": 52,\n", + " \"53\": 53,\n", + " \"54\": 54,\n", + " \"55\": 55,\n", + " \"56\": 56,\n", + " \"57\": 57,\n", + " \"58\": 58,\n", + " \"59\": 59\n", + " },\n", + " \"layer_norm_eps\": 1e-12,\n", + " \"max_position_embeddings\": 512,\n", + " \"model_type\": \"bert\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 0,\n", + " \"pooler_fc_size\": 768,\n", + " \"pooler_num_attention_heads\": 12,\n", + " \"pooler_num_fc_layers\": 3,\n", + " \"pooler_size_per_head\": 128,\n", + " \"pooler_type\": \"first_token_transform\",\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 2,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 119547\n", + "}\n", + "\n", + "[INFO|configuration_utils.py:545] 2021-07-19 18:38:21,660 >> loading configuration file https://huggingface.co/bert-base-multilingual-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/6c4a5d81a58c9791cdf76a09bce1b5abfb9cf958aebada51200f4515403e5d08.0fe59f3f4f1335dadeb4bce8b8146199d9083512b50d07323c1c319f96df450c\n", + "[INFO|configuration_utils.py:581] 2021-07-19 18:38:21,661 >> Model config BertConfig {\n", + " \"architectures\": [\n", + " \"BertForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"directionality\": \"bidi\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"layer_norm_eps\": 1e-12,\n", + " \"max_position_embeddings\": 512,\n", + " \"model_type\": \"bert\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 0,\n", + " \"pooler_fc_size\": 768,\n", + " \"pooler_num_attention_heads\": 12,\n", + " \"pooler_num_fc_layers\": 3,\n", + " \"pooler_size_per_head\": 128,\n", + " \"pooler_type\": \"first_token_transform\",\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 2,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 119547\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 18:38:25,288 >> loading file https://huggingface.co/bert-base-multilingual-cased/resolve/main/vocab.txt from cache at /root/.cache/huggingface/transformers/eff018e45de5364a8368df1f2df3461d506e2a111e9dd50af1fae061cd460ead.6c5b6600e968f4b5e08c86d8891ea99e51537fc2bf251435fb46922e8f7a7b29\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 18:38:25,288 >> loading file https://huggingface.co/bert-base-multilingual-cased/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/46880f3b0081fda494a4e15b05787692aa4c1e21e0ff2428ba8b14d4eda0784d.b33e51591f94f17c238ee9b1fac75b96ff2678cbaed6e108feadb3449d18dc24\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 18:38:25,288 >> loading file https://huggingface.co/bert-base-multilingual-cased/resolve/main/added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 18:38:25,288 >> loading file https://huggingface.co/bert-base-multilingual-cased/resolve/main/special_tokens_map.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-19 18:38:25,288 >> loading file https://huggingface.co/bert-base-multilingual-cased/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/f55e7a2ad4f8d0fff2733b3f79777e1e99247f2e4583703e92ce74453af8c235.ec5c189f89475aac7d8cbd243960a0655cfadc3d0474da8ff2ed0bf1699c2a5f\n", + "[INFO|configuration_utils.py:545] 2021-07-19 18:38:26,019 >> loading configuration file https://huggingface.co/bert-base-multilingual-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/6c4a5d81a58c9791cdf76a09bce1b5abfb9cf958aebada51200f4515403e5d08.0fe59f3f4f1335dadeb4bce8b8146199d9083512b50d07323c1c319f96df450c\n", + "[INFO|configuration_utils.py:581] 2021-07-19 18:38:26,019 >> Model config BertConfig {\n", + " \"architectures\": [\n", + " \"BertForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"directionality\": \"bidi\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"layer_norm_eps\": 1e-12,\n", + " \"max_position_embeddings\": 512,\n", + " \"model_type\": \"bert\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 0,\n", + " \"pooler_fc_size\": 768,\n", + " \"pooler_num_attention_heads\": 12,\n", + " \"pooler_num_fc_layers\": 3,\n", + " \"pooler_size_per_head\": 128,\n", + " \"pooler_type\": \"first_token_transform\",\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 2,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 119547\n", + "}\n", + "\n", + "[INFO|modeling_utils.py:1271] 2021-07-19 18:38:26,896 >> loading weights file https://huggingface.co/bert-base-multilingual-cased/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/0a3fd51713dcbb4def175c7f85bddc995d5976ce1dde327f99104e4d33069f17.aa7be4c79d76f4066d9b354496ea477c9ee39c5d889156dd1efb680643c2b052\n", + "[WARNING|modeling_utils.py:1502] 2021-07-19 18:38:38,783 >> Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']\n", + "- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:1513] 2021-07-19 18:38:38,784 >> Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "Running tokenizer on train dataset: 0% 0/9 [00:00> The following columns in the training set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: pos_tags, ner_tags, id, tokens.\n", + "[INFO|trainer.py:1162] 2021-07-19 18:38:51,323 >> ***** Running training *****\n", + "[INFO|trainer.py:1163] 2021-07-19 18:38:51,323 >> Num examples = 8324\n", + "[INFO|trainer.py:1164] 2021-07-19 18:38:51,323 >> Num Epochs = 5\n", + "[INFO|trainer.py:1165] 2021-07-19 18:38:51,324 >> Instantaneous batch size per device = 8\n", + "[INFO|trainer.py:1166] 2021-07-19 18:38:51,324 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "[INFO|trainer.py:1167] 2021-07-19 18:38:51,324 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1168] 2021-07-19 18:38:51,324 >> Total optimization steps = 5205\n", + "[INFO|integrations.py:417] 2021-07-19 18:38:51,344 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mversae\u001b[0m (use `wandb login --relogin` to force relogin)\n", + "2021-07-19 18:38:53.056953: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m./outputs\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval/runs/2imdirdh\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in /content/wandb/run-20210719_183851-2imdirdh\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run `wandb offline` to turn off syncing.\n", + "\n", + " 10% 500/5205 [02:59<32:20, 2.43it/s]{'loss': 0.2935, 'learning_rate': 4.5196926032660905e-05, 'epoch': 0.48}\n", + " 10% 500/5205 [02:59<32:20, 2.43it/s][INFO|trainer.py:1917] 2021-07-19 18:41:54,160 >> Saving model checkpoint to ./outputs/checkpoint-500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 18:41:54,162 >> Configuration saved in ./outputs/checkpoint-500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 18:41:57,418 >> Model weights saved in ./outputs/checkpoint-500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 18:41:57,419 >> tokenizer config file saved in ./outputs/checkpoint-500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 18:41:57,419 >> Special tokens file saved in ./outputs/checkpoint-500/special_tokens_map.json\n", + "{'loss': 0.1177, 'learning_rate': 4.039385206532181e-05, 'epoch': 0.96}\n", + " 19% 1000/5205 [06:08<28:48, 2.43it/s][INFO|trainer.py:1917] 2021-07-19 18:45:02,966 >> Saving model checkpoint to ./outputs/checkpoint-1000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 18:45:02,968 >> Configuration saved in ./outputs/checkpoint-1000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 18:45:05,616 >> Model weights saved in ./outputs/checkpoint-1000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 18:45:05,617 >> tokenizer config file saved in ./outputs/checkpoint-1000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 18:45:05,618 >> Special tokens file saved in ./outputs/checkpoint-1000/special_tokens_map.json\n", + " 29% 1500/5205 [09:16<25:24, 2.43it/s]{'loss': 0.0794, 'learning_rate': 3.5590778097982716e-05, 'epoch': 1.44}\n", + "[INFO|trainer.py:1917] 2021-07-19 18:48:11,131 >> Saving model checkpoint to ./outputs/checkpoint-1500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 18:48:11,133 >> Configuration saved in ./outputs/checkpoint-1500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 18:48:13,690 >> Model weights saved in ./outputs/checkpoint-1500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 18:48:13,691 >> tokenizer config file saved in ./outputs/checkpoint-1500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 18:48:13,691 >> Special tokens file saved in ./outputs/checkpoint-1500/special_tokens_map.json\n", + " 38% 2000/5205 [12:27<21:56, 2.44it/s]{'loss': 0.0721, 'learning_rate': 3.078770413064361e-05, 'epoch': 1.92}\n", + " 38% 2000/5205 [12:27<21:56, 2.44it/s][INFO|trainer.py:1917] 2021-07-19 18:51:22,110 >> Saving model checkpoint to ./outputs/checkpoint-2000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 18:51:22,112 >> Configuration saved in ./outputs/checkpoint-2000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 18:51:24,734 >> Model weights saved in ./outputs/checkpoint-2000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 18:51:24,735 >> tokenizer config file saved in ./outputs/checkpoint-2000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 18:51:24,735 >> Special tokens file saved in ./outputs/checkpoint-2000/special_tokens_map.json\n", + " 48% 2500/5205 [15:36<18:33, 2.43it/s]{'loss': 0.0457, 'learning_rate': 2.5984630163304517e-05, 'epoch': 2.4}\n", + " 48% 2500/5205 [15:36<18:33, 2.43it/s][INFO|trainer.py:1917] 2021-07-19 18:54:30,708 >> Saving model checkpoint to ./outputs/checkpoint-2500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 18:54:30,710 >> Configuration saved in ./outputs/checkpoint-2500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 18:54:33,601 >> Model weights saved in ./outputs/checkpoint-2500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 18:54:33,602 >> tokenizer config file saved in ./outputs/checkpoint-2500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 18:54:33,603 >> Special tokens file saved in ./outputs/checkpoint-2500/special_tokens_map.json\n", + "{'loss': 0.0423, 'learning_rate': 2.118155619596542e-05, 'epoch': 2.88}\n", + " 58% 3000/5205 [18:44<15:13, 2.41it/s][INFO|trainer.py:1917] 2021-07-19 18:57:39,382 >> Saving model checkpoint to ./outputs/checkpoint-3000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 18:57:39,384 >> Configuration saved in ./outputs/checkpoint-3000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 18:57:42,172 >> Model weights saved in ./outputs/checkpoint-3000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 18:57:42,173 >> tokenizer config file saved in ./outputs/checkpoint-3000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 18:57:42,173 >> Special tokens file saved in ./outputs/checkpoint-3000/special_tokens_map.json\n", + " 67% 3500/5205 [21:53<11:29, 2.47it/s]{'loss': 0.0278, 'learning_rate': 1.6378482228626322e-05, 'epoch': 3.36}\n", + "[INFO|trainer.py:1917] 2021-07-19 19:00:47,421 >> Saving model checkpoint to ./outputs/checkpoint-3500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 19:00:47,425 >> Configuration saved in ./outputs/checkpoint-3500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 19:00:50,395 >> Model weights saved in ./outputs/checkpoint-3500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 19:00:50,396 >> tokenizer config file saved in ./outputs/checkpoint-3500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 19:00:50,396 >> Special tokens file saved in ./outputs/checkpoint-3500/special_tokens_map.json\n", + " 77% 4000/5205 [25:00<08:14, 2.44it/s]{'loss': 0.0228, 'learning_rate': 1.1575408261287224e-05, 'epoch': 3.84}\n", + "[INFO|trainer.py:1917] 2021-07-19 19:03:55,323 >> Saving model checkpoint to ./outputs/checkpoint-4000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 19:03:55,325 >> Configuration saved in ./outputs/checkpoint-4000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 19:03:58,214 >> Model weights saved in ./outputs/checkpoint-4000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 19:03:58,215 >> tokenizer config file saved in ./outputs/checkpoint-4000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 19:03:58,215 >> Special tokens file saved in ./outputs/checkpoint-4000/special_tokens_map.json\n", + " 86% 4500/5205 [28:08<04:45, 2.47it/s]{'loss': 0.0152, 'learning_rate': 6.7723342939481265e-06, 'epoch': 4.32}\n", + "[INFO|trainer.py:1917] 2021-07-19 19:07:02,887 >> Saving model checkpoint to ./outputs/checkpoint-4500\n", + "[INFO|configuration_utils.py:379] 2021-07-19 19:07:02,890 >> Configuration saved in ./outputs/checkpoint-4500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 19:07:05,844 >> Model weights saved in ./outputs/checkpoint-4500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 19:07:05,845 >> tokenizer config file saved in ./outputs/checkpoint-4500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 19:07:05,845 >> Special tokens file saved in ./outputs/checkpoint-4500/special_tokens_map.json\n", + "{'loss': 0.0114, 'learning_rate': 1.96926032660903e-06, 'epoch': 4.8}\n", + " 96% 5000/5205 [31:15<01:23, 2.44it/s][INFO|trainer.py:1917] 2021-07-19 19:10:10,280 >> Saving model checkpoint to ./outputs/checkpoint-5000\n", + "[INFO|configuration_utils.py:379] 2021-07-19 19:10:10,283 >> Configuration saved in ./outputs/checkpoint-5000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 19:10:13,378 >> Model weights saved in ./outputs/checkpoint-5000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 19:10:13,379 >> tokenizer config file saved in ./outputs/checkpoint-5000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 19:10:13,379 >> Special tokens file saved in ./outputs/checkpoint-5000/special_tokens_map.json\n", + "100% 5205/5205 [32:38<00:00, 3.12it/s][INFO|trainer.py:1358] 2021-07-19 19:11:32,764 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "{'train_runtime': 1961.4409, 'train_samples_per_second': 21.219, 'train_steps_per_second': 2.654, 'train_loss': 0.07034576014757843, 'epoch': 5.0}\n", + "100% 5205/5205 [32:38<00:00, 2.66it/s]\n", + "[INFO|trainer.py:1917] 2021-07-19 19:11:32,775 >> Saving model checkpoint to ./outputs\n", + "[INFO|configuration_utils.py:379] 2021-07-19 19:11:32,780 >> Configuration saved in ./outputs/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-19 19:11:35,689 >> Model weights saved in ./outputs/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-19 19:11:35,689 >> tokenizer config file saved in ./outputs/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-19 19:11:35,690 >> Special tokens file saved in ./outputs/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 5.0\n", + " train_loss = 0.0703\n", + " train_runtime = 0:32:41.44\n", + " train_samples = 8324\n", + " train_samples_per_second = 21.219\n", + " train_steps_per_second = 2.654\n", + "07/19/2021 19:11:35 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:522] 2021-07-19 19:11:35,971 >> The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: pos_tags, ner_tags, id, tokens.\n", + "[INFO|trainer.py:2163] 2021-07-19 19:11:35,991 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2165] 2021-07-19 19:11:35,992 >> Num examples = 1916\n", + "[INFO|trainer.py:2168] 2021-07-19 19:11:35,992 >> Batch size = 8\n", + "100% 239/240 [00:25<00:00, 9.28it/s]/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: NC seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fpa seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fpt seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fc seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Z seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: NP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fp seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fg seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DA seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: AQ seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: SP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PR seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: RG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: CS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: CC seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fe seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fx seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DD seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PD seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: P0 seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMM seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fd seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: AO seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PI seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: RN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fat seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Faa seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DE seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VMS seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fz seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PT seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAN seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSG seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PX seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: DT seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fh seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VAP seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fs seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: VSM seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Y seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fia seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: Fit seems not to be NE tag.\n", + " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "100% 240/240 [00:29<00:00, 8.24it/s]\n", + "***** eval metrics *****\n", + " epoch = 5.0\n", + " eval_accuracy = 0.9687\n", + " eval_f1 = 0.9629\n", + " eval_loss = 0.1579\n", + " eval_precision = 0.9637\n", + " eval_recall = 0.9621\n", + " eval_runtime = 0:00:29.26\n", + " eval_samples = 1916\n", + " eval_samples_per_second = 65.46\n", + " eval_steps_per_second = 8.2\n", + "\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish, PID 1527\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Program ended successfully.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find user logs for this run at: /content/wandb/run-20210719_183851-2imdirdh/logs/debug.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find internal logs for this run at: /content/wandb/run-20210719_183851-2imdirdh/logs/debug-internal.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.0114\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 5205\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime 1994\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp 1626721925\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step 11\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime 1961.4409\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second 21.219\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second 2.654\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos 1.08808736772096e+16\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss 0.07035\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 0.15789\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision 0.96365\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall 0.96213\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 0.96289\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy 0.96865\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 29.2699\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 65.46\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 8.2\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▄▃▃▂▂▁▁▁▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate █▇▆▆▅▄▃▃▂▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp ▁▂▂▃▄▅▅▆▇███\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step ▁▂▂▃▄▄▅▅▆▇▇█\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/precision ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/recall ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/f1 ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced \u001b[33m./outputs\u001b[0m: \u001b[34mhttps://wandb.ai/versae/bertin-eval/runs/2imdirdh\u001b[0m\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yxSsy8vOVwu7" + }, + "source": [ + "----" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y-A-WE75Ffv6" + }, + "source": [ + "## Sequence" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "n8XiKFkjbVzh", + "outputId": "b8454d93-4afa-409b-a934-943a7349014b" + }, + "source": [ + "!wget https://raw.githubusercontent.com/huggingface/transformers/master/examples/pytorch/text-classification/run_glue.py" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "--2021-07-20 08:36:03-- https://raw.githubusercontent.com/huggingface/transformers/master/examples/pytorch/text-classification/run_glue.py\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 24931 (24K) [text/plain]\n", + "Saving to: ‘run_glue.py’\n", + "\n", + "\rrun_glue.py 0%[ ] 0 --.-KB/s \rrun_glue.py 100%[===================>] 24.35K --.-KB/s in 0s \n", + "\n", + "2021-07-20 08:36:04 (94.8 MB/s) - ‘run_glue.py’ saved [24931/24931]\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "background_save": true, + "base_uri": "https://localhost:8080/" + }, + "id": "PhlGkj5_9mj8", + "outputId": "ce4374be-3410-4b1a-92ae-b7dc5bf84b66" + }, + "source": [ + "# !wget https://raw.githubusercontent.com/huggingface/transformers/master/examples/pytorch/token-classification/run_ner.py\n", + "for model in models:\n", + " !WANDB_PROJECT=bertin-eval TOKENIZERS_PARALLELISM=false CUDA_LAUNCH_BLOCKING=1 python run_glue.py \\\n", + " --model_name_or_path $model \\\n", + " --dataset_name \"paws-x\" \\\n", + " --task_name \"paws-x\" \\\n", + " --dataset_config_name \"es\" \\\n", + " --output_dir ./outputs \\\n", + " --overwrite_output_dir \\\n", + " --pad_to_max_length \\\n", + " --num_train_epochs 5 \\\n", + " --do_train \\\n", + " --do_eval" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "2021-07-20 09:49:17.108527: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "07/20/2021 09:49:18 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "07/20/2021 09:49:18 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_find_unused_parameters=None,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=False,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_steps=500,\n", + "evaluation_strategy=IntervalStrategy.NO,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "gradient_accumulation_steps=1,\n", + "greater_is_better=None,\n", + "group_by_length=False,\n", + "ignore_data_skip=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=5e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=False,\n", + "local_rank=-1,\n", + "log_level=-1,\n", + "log_level_replica=-1,\n", + "log_on_each_node=True,\n", + "logging_dir=./outputs/runs/Jul20_09-49-18_48f3f265b421,\n", + "logging_first_step=False,\n", + "logging_steps=500,\n", + "logging_strategy=IntervalStrategy.STEPS,\n", + "lr_scheduler_type=SchedulerType.LINEAR,\n", + "max_grad_norm=1.0,\n", + "max_steps=-1,\n", + "metric_for_best_model=None,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=5.0,\n", + "output_dir=./outputs,\n", + "overwrite_output_dir=True,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=outputs,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=None,\n", + "remove_unused_columns=True,\n", + "report_to=['tensorboard', 'wandb'],\n", + "resume_from_checkpoint=None,\n", + "run_name=./outputs,\n", + "save_on_each_node=False,\n", + "save_steps=500,\n", + "save_strategy=IntervalStrategy.STEPS,\n", + "save_total_limit=None,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_legacy_prediction_loop=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + ")\n", + "07/20/2021 09:49:19 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py at /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x\n", + "07/20/2021 09:49:19 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py at /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af\n", + "07/20/2021 09:49:19 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py to /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/paws-x.py\n", + "07/20/2021 09:49:19 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/dataset_infos.json\n", + "07/20/2021 09:49:19 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py at /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/paws-x.json\n", + "07/20/2021 09:49:20 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py at /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x\n", + "07/20/2021 09:49:20 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py at /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af\n", + "07/20/2021 09:49:20 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py to /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/paws-x.py\n", + "07/20/2021 09:49:20 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/dataset_infos.json\n", + "07/20/2021 09:49:20 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py at /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/paws-x.json\n", + "07/20/2021 09:49:20 - INFO - datasets.info - Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af\n", + "07/20/2021 09:49:20 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "07/20/2021 09:49:20 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/pawsx/es/1.1.0/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af\n", + "07/20/2021 09:49:20 - WARNING - datasets.builder - Reusing dataset pawsx (/root/.cache/huggingface/datasets/pawsx/es/1.1.0/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af)\n", + "07/20/2021 09:49:20 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/pawsx/es/1.1.0/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af\n", + "100% 3/3 [00:00<00:00, 647.94it/s]\n", + "[INFO|configuration_utils.py:545] 2021-07-20 09:49:20,512 >> loading configuration file https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/ff85ac64e56df502ec043af591c8b7be85583b22e6a4f5715146d461cd789f97.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|configuration_utils.py:581] 2021-07-20 09:49:20,512 >> Model config RobertaConfig {\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"bos_token_id\": 0,\n", + " \"eos_token_id\": 2,\n", + " \"finetuning_task\": \"paws-x\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50265\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-20 09:49:22,941 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/6eaae493de84fa6ec66bcb5673055437acaefce1f59d8601bc2fe5c67e118d1c.a80f232f572026f92499b14999a8ed4e044e04cf3d01b9f2be298c98e78e8498\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-20 09:49:22,941 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/6f7eacc3a8be0f2ccac79d197ccc70f65831409c32f4f49f8a43968d0ec8d04e.a0dfc41f9d0f03a56ba7a5401d770f6e43071045a0bd79073380d408d17a0d92\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-20 09:49:22,941 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/fa0ebe33b40c5fb911a969102ec8f72a5a7de108098917d817b5924edf9fe90d.2a5dc806edc00ab3a329cb22b9973596ca75b24ba0e5e4963bf1308de7237a3d\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-20 09:49:22,941 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-20 09:49:22,942 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/051ab3c041e9debc74f58f307de15b70a96de57e0f4b31ceaae21fe4eea531ec.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-20 09:49:22,942 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/1d0cecadbe0c9f16993a436d0ab40b879322ac605869d265787a93ea1e00ec7a.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|modeling_utils.py:1271] 2021-07-20 09:49:23,355 >> loading weights file https://huggingface.co/bertin-project/bertin-base-gaussian-exp-512seqlen/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/c4dacb0e6c084991812c2661ba4b4e6fc953317a1ed82b01bba8d1ceb63b27f9.129191ea8bf16f2e8f2c4683b881711c55eb0c166d86300274c92ee25c49aedc\n", + "[WARNING|modeling_utils.py:1502] 2021-07-20 09:49:24,672 >> Some weights of the model checkpoint at bertin-project/bertin-base-gaussian-exp-512seqlen were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.dense.weight']\n", + "- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:1513] 2021-07-20 09:49:24,673 >> Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at bertin-project/bertin-base-gaussian-exp-512seqlen and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "07/20/2021 09:49:24 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /root/.cache/huggingface/datasets/pawsx/es/1.1.0/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/cache-84d89421334c2350.arrow\n", + "07/20/2021 09:49:24 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /root/.cache/huggingface/datasets/pawsx/es/1.1.0/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/cache-d75a7dc622a49f4d.arrow\n", + "07/20/2021 09:49:24 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /root/.cache/huggingface/datasets/pawsx/es/1.1.0/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/cache-9d49171b30b97c19.arrow\n", + "07/20/2021 09:49:24 - INFO - __main__ - Sample 41905 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'id': 41906, 'input_ids': [548, 1929, 692, 6768, 339, 34513, 1345, 69, 445, 329, 5588, 7357, 16, 298, 12773, 43545, 263, 15438, 405, 4843, 263, 14133, 18, 548, 1929, 692, 6768, 339, 34513, 1345, 69, 445, 329, 5588, 7357, 16, 298, 12773, 43545, 263, 15438, 405, 12137, 263, 15284, 18, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'label': 1, 'sentence1': 'El gobierno fue dirigido por Gordon Coates del Partido Unido, con George Forbes de Reforma como ministro de finanzas.', 'sentence2': 'El gobierno fue dirigido por Gordon Coates del Partido Unido, con George Forbes de Reforma como Ministro de Finanzas.'}.\n", + "07/20/2021 09:49:24 - INFO - __main__ - Sample 7296 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'id': 7297, 'input_ids': [548, 1229, 32484, 88, 291, 280, 477, 23041, 16, 486, 11944, 5436, 443, 552, 302, 41794, 11944, 285, 771, 34831, 686, 3543, 631, 46, 2930, 51, 14260, 6, 879, 75, 11944, 8535, 18, 1131, 879, 75, 11944, 406, 528, 302, 879, 75, 11944, 14720, 34831, 686, 3543, 25719, 68, 508, 4298, 454, 18295, 1608, 32484, 88, 291, 363, 477, 23041, 1332, 11944, 5436, 443, 8535, 21440, 18, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'label': 0, 'sentence1': 'El gujarat y mumbai, maharashtra son un sanghar en parte hindú también llamado \"JAMOTAR\" Sanghar India.', 'sentence2': \"Los Sanghar (son un Sanghar parcialmente hindú también llamado `` Jamotar '' Gujarat y Mumbai Maharashtra India llamaron.\"}.\n", + "07/20/2021 09:49:24 - INFO - __main__ - Sample 1639 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'id': 1640, 'input_ids': [626, 2472, 285, 47724, 3628, 4361, 285, 21565, 271, 1183, 263, 302, 2889, 1531, 263, 7187, 291, 18433, 7644, 5936, 285, 295, 19819, 263, 10097, 263, 283, 8693, 16, 48831, 18, 626, 2472, 575, 47724, 3628, 4361, 285, 21565, 339, 302, 2889, 1531, 263, 7187, 291, 18433, 7644, 5936, 285, 295, 19819, 263, 10097, 263, 283, 8693, 285, 48831, 18, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'label': 1, 'sentence1': 'La investigación en fisiología militar comenzó en 1950 a través de un pequeño grupo de científicos y fisiólogos médicos en el Laboratorio de Ciencia de la Defensa, Delhi.', 'sentence2': 'La investigación sobre fisiología militar comenzó en 1950 por un pequeño grupo de científicos y fisiólogos médicos en el Laboratorio de Ciencia de la Defensa en Delhi.'}.\n", + "07/20/2021 09:49:25 - INFO - datasets.load - Found main folder for metric https://raw.githubusercontent.com/huggingface/datasets/1.9.0/metrics/accuracy/accuracy.py at /root/.cache/huggingface/modules/datasets_modules/metrics/accuracy\n", + "07/20/2021 09:49:25 - INFO - datasets.load - Found specific version folder for metric https://raw.githubusercontent.com/huggingface/datasets/1.9.0/metrics/accuracy/accuracy.py at /root/.cache/huggingface/modules/datasets_modules/metrics/accuracy/d60e08bd37e7c5a7bcc3620dd0d2788d25d233238ee0bdb3cfabde6c43d60574\n", + "07/20/2021 09:49:25 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/metrics/accuracy/accuracy.py to /root/.cache/huggingface/modules/datasets_modules/metrics/accuracy/d60e08bd37e7c5a7bcc3620dd0d2788d25d233238ee0bdb3cfabde6c43d60574/accuracy.py\n", + "07/20/2021 09:49:25 - INFO - datasets.load - Couldn't find dataset infos file at https://raw.githubusercontent.com/huggingface/datasets/1.9.0/metrics/accuracy/dataset_infos.json\n", + "07/20/2021 09:49:25 - INFO - datasets.load - Found metadata file for metric https://raw.githubusercontent.com/huggingface/datasets/1.9.0/metrics/accuracy/accuracy.py at /root/.cache/huggingface/modules/datasets_modules/metrics/accuracy/d60e08bd37e7c5a7bcc3620dd0d2788d25d233238ee0bdb3cfabde6c43d60574/accuracy.json\n", + "[INFO|trainer.py:522] 2021-07-20 09:49:28,301 >> The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: sentence1, id, sentence2.\n", + "[INFO|trainer.py:1162] 2021-07-20 09:49:28,314 >> ***** Running training *****\n", + "[INFO|trainer.py:1163] 2021-07-20 09:49:28,314 >> Num examples = 49401\n", + "[INFO|trainer.py:1164] 2021-07-20 09:49:28,314 >> Num Epochs = 5\n", + "[INFO|trainer.py:1165] 2021-07-20 09:49:28,314 >> Instantaneous batch size per device = 8\n", + "[INFO|trainer.py:1166] 2021-07-20 09:49:28,315 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "[INFO|trainer.py:1167] 2021-07-20 09:49:28,315 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1168] 2021-07-20 09:49:28,315 >> Total optimization steps = 30880\n", + "[INFO|integrations.py:446] 2021-07-20 09:49:28,326 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mversae\u001b[0m (use `wandb login --relogin` to force relogin)\n", + "2021-07-20 09:49:29.625318: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m./outputs\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval/runs/3kwtbkiz\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in /content/wandb/run-20210720_094928-3kwtbkiz\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run `wandb offline` to turn off syncing.\n", + "\n", + "{'loss': 0.6452, 'learning_rate': 4.919041450777203e-05, 'epoch': 0.08}\n", + " 2% 500/30880 [01:36<2:48:52, 3.00it/s][INFO|trainer.py:1917] 2021-07-20 09:51:06,931 >> Saving model checkpoint to ./outputs/checkpoint-500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 09:51:06,933 >> Configuration saved in ./outputs/checkpoint-500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 09:51:08,483 >> Model weights saved in ./outputs/checkpoint-500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 09:51:08,483 >> tokenizer config file saved in ./outputs/checkpoint-500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 09:51:08,484 >> Special tokens file saved in ./outputs/checkpoint-500/special_tokens_map.json\n", + " {'loss': 0.491, 'learning_rate': 4.8380829015544046e-05, 'epoch': 0.16}\n", + " 3% 1000/30880 [03:18<2:44:01, 3.04it/s][INFO|trainer.py:1917] 2021-07-20 09:52:49,299 >> Saving model checkpoint to ./outputs/checkpoint-1000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 09:52:49,300 >> Configuration saved in ./outputs/checkpoint-1000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 09:52:50,782 >> Model weights saved in ./outputs/checkpoint-1000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 09:52:50,783 >> tokenizer config file saved in ./outputs/checkpoint-1000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 09:52:50,783 >> Special tokens file saved in ./outputs/checkpoint-1000/special_tokens_map.json\n", + "{'loss': 0.4463, 'learning_rate': 4.7571243523316064e-05, 'epoch': 0.24}\n", + " 5% 1500/30880 [05:00<2:42:57, 3.00it/s][INFO|trainer.py:1917] 2021-07-20 09:54:31,122 >> Saving model checkpoint to ./outputs/checkpoint-1500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 09:54:31,123 >> Configuration saved in ./outputs/checkpoint-1500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 09:54:32,493 >> Model weights saved in ./outputs/checkpoint-1500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 09:54:32,494 >> tokenizer config file saved in ./outputs/checkpoint-1500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 09:54:32,494 >> Special tokens file saved in ./outputs/checkpoint-1500/special_tokens_map.json\n", + " 6% 2000/30880 [06:42<2:40:15, 3.00it/s]{'loss': 0.4246, 'learning_rate': 4.676165803108808e-05, 'epoch': 0.32}\n", + "[INFO|trainer.py:1917] 2021-07-20 09:56:12,981 >> Saving model checkpoint to ./outputs/checkpoint-2000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 09:56:12,982 >> Configuration saved in ./outputs/checkpoint-2000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 09:56:14,367 >> Model weights saved in ./outputs/checkpoint-2000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 09:56:14,368 >> tokenizer config file saved in ./outputs/checkpoint-2000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 09:56:14,368 >> Special tokens file saved in ./outputs/checkpoint-2000/special_tokens_map.json\n", + "{'loss': 0.4079, 'learning_rate': 4.595207253886011e-05, 'epoch': 0.4}\n", + " 8% 2500/30880 [08:23<2:36:12, 3.03it/s][INFO|trainer.py:1917] 2021-07-20 09:57:54,652 >> Saving model checkpoint to ./outputs/checkpoint-2500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 09:57:54,653 >> Configuration saved in ./outputs/checkpoint-2500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 09:57:56,007 >> Model weights saved in ./outputs/checkpoint-2500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 09:57:56,008 >> tokenizer config file saved in ./outputs/checkpoint-2500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 09:57:56,008 >> Special tokens file saved in ./outputs/checkpoint-2500/special_tokens_map.json\n", + "{'loss': 0.3853, 'learning_rate': 4.5142487046632126e-05, 'epoch': 0.49}\n", + " 10% 3000/30880 [10:05<2:30:38, 3.08it/s][INFO|trainer.py:1917] 2021-07-20 09:59:36,412 >> Saving model checkpoint to ./outputs/checkpoint-3000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 09:59:36,415 >> Configuration saved in ./outputs/checkpoint-3000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 09:59:37,812 >> Model weights saved in ./outputs/checkpoint-3000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 09:59:37,813 >> tokenizer config file saved in ./outputs/checkpoint-3000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 09:59:37,813 >> Special tokens file saved in ./outputs/checkpoint-3000/special_tokens_map.json\n", + " 11% 3500/30880 [11:47<2:33:12, 2.98it/s]{'loss': 0.3867, 'learning_rate': 4.433290155440415e-05, 'epoch': 0.57}\n", + "[INFO|trainer.py:1917] 2021-07-20 10:01:18,109 >> Saving model checkpoint to ./outputs/checkpoint-3500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:01:18,110 >> Configuration saved in ./outputs/checkpoint-3500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:01:19,463 >> Model weights saved in ./outputs/checkpoint-3500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:01:19,464 >> tokenizer config file saved in ./outputs/checkpoint-3500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:01:19,464 >> Special tokens file saved in ./outputs/checkpoint-3500/special_tokens_map.json\n", + " 13% 4000/30880 [13:28<2:28:15, 3.02it/s]{'loss': 0.3563, 'learning_rate': 4.352331606217617e-05, 'epoch': 0.65}\n", + "[INFO|trainer.py:1917] 2021-07-20 10:02:59,566 >> Saving model checkpoint to ./outputs/checkpoint-4000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:02:59,567 >> Configuration saved in ./outputs/checkpoint-4000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:03:01,069 >> Model weights saved in ./outputs/checkpoint-4000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:03:01,070 >> tokenizer config file saved in ./outputs/checkpoint-4000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:03:01,071 >> Special tokens file saved in ./outputs/checkpoint-4000/special_tokens_map.json\n", + "{'loss': 0.3608, 'learning_rate': 4.271373056994819e-05, 'epoch': 0.73}\n", + " 15% 4500/30880 [15:10<2:27:49, 2.97it/s][INFO|trainer.py:1917] 2021-07-20 10:04:41,717 >> Saving model checkpoint to ./outputs/checkpoint-4500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:04:41,718 >> Configuration saved in ./outputs/checkpoint-4500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:04:43,227 >> Model weights saved in ./outputs/checkpoint-4500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:04:43,228 >> tokenizer config file saved in ./outputs/checkpoint-4500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:04:43,228 >> Special tokens file saved in ./outputs/checkpoint-4500/special_tokens_map.json\n", + "{'loss': 0.3633, 'learning_rate': 4.190414507772021e-05, 'epoch': 0.81}\n", + " 16% 5000/30880 [16:52<2:24:21, 2.99it/s][INFO|trainer.py:1917] 2021-07-20 10:06:23,477 >> Saving model checkpoint to ./outputs/checkpoint-5000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:06:23,479 >> Configuration saved in ./outputs/checkpoint-5000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:06:24,875 >> Model weights saved in ./outputs/checkpoint-5000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:06:24,876 >> tokenizer config file saved in ./outputs/checkpoint-5000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:06:24,878 >> Special tokens file saved in ./outputs/checkpoint-5000/special_tokens_map.json\n", + " 18% 5500/30880 [18:34<2:20:37, 3.01it/s]{'loss': 0.3467, 'learning_rate': 4.109455958549223e-05, 'epoch': 0.89}\n", + " 18% 5500/30880 [18:34<2:20:37, 3.01it/s][INFO|trainer.py:1917] 2021-07-20 10:08:05,279 >> Saving model checkpoint to ./outputs/checkpoint-5500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:08:05,280 >> Configuration saved in ./outputs/checkpoint-5500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:08:06,822 >> Model weights saved in ./outputs/checkpoint-5500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:08:06,822 >> tokenizer config file saved in ./outputs/checkpoint-5500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:08:06,823 >> Special tokens file saved in ./outputs/checkpoint-5500/special_tokens_map.json\n", + " 19% 6000/30880 [20:16<2:18:01, 3.00it/s]{'loss': 0.3277, 'learning_rate': 4.028497409326425e-05, 'epoch': 0.97}\n", + " 19% 6000/30880 [20:16<2:18:01, 3.00it/s][INFO|trainer.py:1917] 2021-07-20 10:09:47,508 >> Saving model checkpoint to ./outputs/checkpoint-6000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:09:47,509 >> Configuration saved in ./outputs/checkpoint-6000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:09:48,941 >> Model weights saved in ./outputs/checkpoint-6000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:09:48,942 >> tokenizer config file saved in ./outputs/checkpoint-6000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:09:48,942 >> Special tokens file saved in ./outputs/checkpoint-6000/special_tokens_map.json\n", + "{'loss': 0.3022, 'learning_rate': 3.9475388601036275e-05, 'epoch': 1.05}\n", + " 21% 6500/30880 [21:57<2:13:46, 3.04it/s][INFO|trainer.py:1917] 2021-07-20 10:11:28,697 >> Saving model checkpoint to ./outputs/checkpoint-6500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:11:28,699 >> Configuration saved in ./outputs/checkpoint-6500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:11:30,027 >> Model weights saved in ./outputs/checkpoint-6500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:11:30,028 >> tokenizer config file saved in ./outputs/checkpoint-6500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:11:30,028 >> Special tokens file saved in ./outputs/checkpoint-6500/special_tokens_map.json\n", + "{'loss': 0.2927, 'learning_rate': 3.8665803108808294e-05, 'epoch': 1.13}\n", + " 23% 7000/30880 [23:38<2:11:19, 3.03it/s][INFO|trainer.py:1917] 2021-07-20 10:13:09,240 >> Saving model checkpoint to ./outputs/checkpoint-7000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:13:09,241 >> Configuration saved in ./outputs/checkpoint-7000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:13:10,606 >> Model weights saved in ./outputs/checkpoint-7000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:13:10,607 >> tokenizer config file saved in ./outputs/checkpoint-7000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:13:10,608 >> Special tokens file saved in ./outputs/checkpoint-7000/special_tokens_map.json\n", + "{'loss': 0.2814, 'learning_rate': 3.785621761658031e-05, 'epoch': 1.21}\n", + " 24% 7500/30880 [25:20<2:07:42, 3.05it/s][INFO|trainer.py:1917] 2021-07-20 10:14:50,903 >> Saving model checkpoint to ./outputs/checkpoint-7500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:14:50,904 >> Configuration saved in ./outputs/checkpoint-7500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:14:52,298 >> Model weights saved in ./outputs/checkpoint-7500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:14:52,298 >> tokenizer config file saved in ./outputs/checkpoint-7500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:14:52,299 >> Special tokens file saved in ./outputs/checkpoint-7500/special_tokens_map.json\n", + "{'loss': 0.2721, 'learning_rate': 3.704663212435233e-05, 'epoch': 1.3}\n", + " 26% 8000/30880 [27:00<2:07:24, 2.99it/s][INFO|trainer.py:1917] 2021-07-20 10:16:31,643 >> Saving model checkpoint to ./outputs/checkpoint-8000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:16:31,645 >> Configuration saved in ./outputs/checkpoint-8000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:16:33,048 >> Model weights saved in ./outputs/checkpoint-8000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:16:33,049 >> tokenizer config file saved in ./outputs/checkpoint-8000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:16:33,049 >> Special tokens file saved in ./outputs/checkpoint-8000/special_tokens_map.json\n", + "{'loss': 0.2911, 'learning_rate': 3.6237046632124356e-05, 'epoch': 1.38}\n", + " 28% 8500/30880 [28:41<2:05:11, 2.98it/s][INFO|trainer.py:1917] 2021-07-20 10:18:12,737 >> Saving model checkpoint to ./outputs/checkpoint-8500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:18:12,738 >> Configuration saved in ./outputs/checkpoint-8500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:18:14,216 >> Model weights saved in ./outputs/checkpoint-8500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:18:14,217 >> tokenizer config file saved in ./outputs/checkpoint-8500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:18:14,217 >> Special tokens file saved in ./outputs/checkpoint-8500/special_tokens_map.json\n", + " 29% 9000/30880 [30:23<2:01:15, 3.01it/s]{'loss': 0.2841, 'learning_rate': 3.5427461139896374e-05, 'epoch': 1.46}\n", + " 29% 9000/30880 [30:23<2:01:15, 3.01it/s][INFO|trainer.py:1917] 2021-07-20 10:19:53,846 >> Saving model checkpoint to ./outputs/checkpoint-9000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:19:53,847 >> Configuration saved in ./outputs/checkpoint-9000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:19:55,214 >> Model weights saved in ./outputs/checkpoint-9000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:19:55,215 >> tokenizer config file saved in ./outputs/checkpoint-9000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:19:55,215 >> Special tokens file saved in ./outputs/checkpoint-9000/special_tokens_map.json\n", + " 31% 9500/30880 [32:03<2:00:20, 2.96it/s][INFO|trainer.py:1917] 2021-07-20 10:21:34,729 >> Saving model checkpoint to ./outputs/checkpoint-9500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:21:34,731 >> Configuration saved in ./outputs/checkpoint-9500/config.json\n", + "{'loss': 0.2569, 'learning_rate': 3.46178756476684e-05, 'epoch': 1.54}\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:21:36,092 >> Model weights saved in ./outputs/checkpoint-9500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:21:36,093 >> tokenizer config file saved in ./outputs/checkpoint-9500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:21:36,094 >> Special tokens file saved in ./outputs/checkpoint-9500/special_tokens_map.json\n", + " 32% 10000/30880 [33:45<1:58:45, 2.93it/s]{'loss': 0.2648, 'learning_rate': 3.380829015544041e-05, 'epoch': 1.62}\n", + "[INFO|trainer.py:1917] 2021-07-20 10:23:15,925 >> Saving model checkpoint to ./outputs/checkpoint-10000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:23:15,928 >> Configuration saved in ./outputs/checkpoint-10000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:23:17,300 >> Model weights saved in ./outputs/checkpoint-10000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:23:17,301 >> tokenizer config file saved in ./outputs/checkpoint-10000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:23:17,301 >> Special tokens file saved in ./outputs/checkpoint-10000/special_tokens_map.json\n", + "{'loss': 0.2568, 'learning_rate': 3.2998704663212436e-05, 'epoch': 1.7}\n", + " 34% 10500/30880 [35:25<1:51:46, 3.04it/s][INFO|trainer.py:1917] 2021-07-20 10:24:56,564 >> Saving model checkpoint to ./outputs/checkpoint-10500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:24:56,565 >> Configuration saved in ./outputs/checkpoint-10500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:24:57,912 >> Model weights saved in ./outputs/checkpoint-10500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:24:57,913 >> tokenizer config file saved in ./outputs/checkpoint-10500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:24:57,913 >> Special tokens file saved in ./outputs/checkpoint-10500/special_tokens_map.json\n", + "{'loss': 0.2866, 'learning_rate': 3.2189119170984454e-05, 'epoch': 1.78}\n", + " 36% 11000/30880 [37:06<1:52:03, 2.96it/s][INFO|trainer.py:1917] 2021-07-20 10:26:37,552 >> Saving model checkpoint to ./outputs/checkpoint-11000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:26:37,554 >> Configuration saved in ./outputs/checkpoint-11000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:26:38,845 >> Model weights saved in ./outputs/checkpoint-11000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:26:38,846 >> tokenizer config file saved in ./outputs/checkpoint-11000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:26:38,847 >> Special tokens file saved in ./outputs/checkpoint-11000/special_tokens_map.json\n", + " 37% 11500/30880 [38:47<1:48:31, 2.98it/s]{'loss': 0.2592, 'learning_rate': 3.137953367875648e-05, 'epoch': 1.86}\n", + " 37% 11500/30880 [38:47<1:48:31, 2.98it/s][INFO|trainer.py:1917] 2021-07-20 10:28:18,441 >> Saving model checkpoint to ./outputs/checkpoint-11500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:28:18,445 >> Configuration saved in ./outputs/checkpoint-11500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:28:19,791 >> Model weights saved in ./outputs/checkpoint-11500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:28:19,791 >> tokenizer config file saved in ./outputs/checkpoint-11500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:28:19,792 >> Special tokens file saved in ./outputs/checkpoint-11500/special_tokens_map.json\n", + " 39% 12000/30880 [40:28<1:44:47, 3.00it/s]{'loss': 0.2849, 'learning_rate': 3.05699481865285e-05, 'epoch': 1.94}\n", + " 39% 12000/30880 [40:28<1:44:47, 3.00it/s][INFO|trainer.py:1917] 2021-07-20 10:29:59,129 >> Saving model checkpoint to ./outputs/checkpoint-12000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:29:59,130 >> Configuration saved in ./outputs/checkpoint-12000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:30:00,332 >> Model weights saved in ./outputs/checkpoint-12000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:30:00,332 >> tokenizer config file saved in ./outputs/checkpoint-12000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:30:00,333 >> Special tokens file saved in ./outputs/checkpoint-12000/special_tokens_map.json\n", + " 40% 12500/30880 [42:08<1:44:50, 2.92it/s]{'loss': 0.2305, 'learning_rate': 2.976036269430052e-05, 'epoch': 2.02}\n", + " 40% 12500/30880 [42:09<1:44:50, 2.92it/s][INFO|trainer.py:1917] 2021-07-20 10:31:39,780 >> Saving model checkpoint to ./outputs/checkpoint-12500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:31:39,781 >> Configuration saved in ./outputs/checkpoint-12500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:31:41,032 >> Model weights saved in ./outputs/checkpoint-12500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:31:41,033 >> tokenizer config file saved in ./outputs/checkpoint-12500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:31:41,034 >> Special tokens file saved in ./outputs/checkpoint-12500/special_tokens_map.json\n", + " 42% 13000/30880 [43:49<1:40:14, 2.97it/s]{'loss': 0.1956, 'learning_rate': 2.8950777202072538e-05, 'epoch': 2.1}\n", + " 42% 13000/30880 [43:49<1:40:14, 2.97it/s][INFO|trainer.py:1917] 2021-07-20 10:33:20,176 >> Saving model checkpoint to ./outputs/checkpoint-13000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:33:20,177 >> Configuration saved in ./outputs/checkpoint-13000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:33:21,603 >> Model weights saved in ./outputs/checkpoint-13000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:33:21,604 >> tokenizer config file saved in ./outputs/checkpoint-13000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:33:21,604 >> Special tokens file saved in ./outputs/checkpoint-13000/special_tokens_map.json\n", + "{'loss': 0.2148, 'learning_rate': 2.814119170984456e-05, 'epoch': 2.19}\n", + " 44% 13500/30880 [45:29<1:34:37, 3.06it/s][INFO|trainer.py:1917] 2021-07-20 10:35:00,281 >> Saving model checkpoint to ./outputs/checkpoint-13500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:35:00,282 >> Configuration saved in ./outputs/checkpoint-13500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:35:01,688 >> Model weights saved in ./outputs/checkpoint-13500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:35:01,689 >> tokenizer config file saved in ./outputs/checkpoint-13500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:35:01,689 >> Special tokens file saved in ./outputs/checkpoint-13500/special_tokens_map.json\n", + "{'loss': 0.1991, 'learning_rate': 2.7331606217616585e-05, 'epoch': 2.27}\n", + " 45% 14000/30880 [47:10<1:34:19, 2.98it/s][INFO|trainer.py:1917] 2021-07-20 10:36:40,805 >> Saving model checkpoint to ./outputs/checkpoint-14000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:36:40,806 >> Configuration saved in ./outputs/checkpoint-14000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:36:42,190 >> Model weights saved in ./outputs/checkpoint-14000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:36:42,191 >> tokenizer config file saved in ./outputs/checkpoint-14000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:36:42,191 >> Special tokens file saved in ./outputs/checkpoint-14000/special_tokens_map.json\n", + "{'loss': 0.197, 'learning_rate': 2.6522020725388604e-05, 'epoch': 2.35}\n", + " 47% 14500/30880 [48:50<1:30:21, 3.02it/s][INFO|trainer.py:1917] 2021-07-20 10:38:21,321 >> Saving model checkpoint to ./outputs/checkpoint-14500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:38:21,322 >> Configuration saved in ./outputs/checkpoint-14500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:38:22,767 >> Model weights saved in ./outputs/checkpoint-14500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:38:22,767 >> tokenizer config file saved in ./outputs/checkpoint-14500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:38:22,768 >> Special tokens file saved in ./outputs/checkpoint-14500/special_tokens_map.json\n", + " 49% 15000/30880 [50:30<1:27:09, 3.04it/s]{'loss': 0.2066, 'learning_rate': 2.5712435233160625e-05, 'epoch': 2.43}\n", + " 49% 15000/30880 [50:30<1:27:09, 3.04it/s][INFO|trainer.py:1917] 2021-07-20 10:40:01,492 >> Saving model checkpoint to ./outputs/checkpoint-15000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:40:01,493 >> Configuration saved in ./outputs/checkpoint-15000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:40:02,931 >> Model weights saved in ./outputs/checkpoint-15000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:40:02,932 >> tokenizer config file saved in ./outputs/checkpoint-15000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:40:02,932 >> Special tokens file saved in ./outputs/checkpoint-15000/special_tokens_map.json\n", + "{'loss': 0.2204, 'learning_rate': 2.4902849740932644e-05, 'epoch': 2.51}\n", + " 50% 15500/30880 [52:11<1:22:51, 3.09it/s][INFO|trainer.py:1917] 2021-07-20 10:41:42,253 >> Saving model checkpoint to ./outputs/checkpoint-15500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:41:42,254 >> Configuration saved in ./outputs/checkpoint-15500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:41:43,614 >> Model weights saved in ./outputs/checkpoint-15500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:41:43,615 >> tokenizer config file saved in ./outputs/checkpoint-15500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:41:43,615 >> Special tokens file saved in ./outputs/checkpoint-15500/special_tokens_map.json\n", + "{'loss': 0.2226, 'learning_rate': 2.4093264248704665e-05, 'epoch': 2.59}\n", + " 52% 16000/30880 [53:51<1:22:06, 3.02it/s][INFO|trainer.py:1917] 2021-07-20 10:43:22,272 >> Saving model checkpoint to ./outputs/checkpoint-16000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:43:22,273 >> Configuration saved in ./outputs/checkpoint-16000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:43:23,624 >> Model weights saved in ./outputs/checkpoint-16000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:43:23,625 >> tokenizer config file saved in ./outputs/checkpoint-16000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:43:23,625 >> Special tokens file saved in ./outputs/checkpoint-16000/special_tokens_map.json\n", + " 53% 16500/30880 [55:32<1:18:46, 3.04it/s]{'loss': 0.2047, 'learning_rate': 2.3283678756476684e-05, 'epoch': 2.67}\n", + " 53% 16500/30880 [55:32<1:18:46, 3.04it/s][INFO|trainer.py:1917] 2021-07-20 10:45:03,203 >> Saving model checkpoint to ./outputs/checkpoint-16500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:45:03,204 >> Configuration saved in ./outputs/checkpoint-16500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:45:04,557 >> Model weights saved in ./outputs/checkpoint-16500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:45:04,560 >> tokenizer config file saved in ./outputs/checkpoint-16500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:45:04,560 >> Special tokens file saved in ./outputs/checkpoint-16500/special_tokens_map.json\n", + " 55% 17000/30880 [57:13<1:15:48, 3.05it/s]{'loss': 0.2121, 'learning_rate': 2.2474093264248706e-05, 'epoch': 2.75}\n", + " 55% 17000/30880 [57:13<1:15:48, 3.05it/s][INFO|trainer.py:1917] 2021-07-20 10:46:44,112 >> Saving model checkpoint to ./outputs/checkpoint-17000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:46:44,113 >> Configuration saved in ./outputs/checkpoint-17000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:46:45,478 >> Model weights saved in ./outputs/checkpoint-17000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:46:45,479 >> tokenizer config file saved in ./outputs/checkpoint-17000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:46:45,479 >> Special tokens file saved in ./outputs/checkpoint-17000/special_tokens_map.json\n", + " 57% 17500/30880 [58:53<1:11:40, 3.11it/s]{'loss': 0.2176, 'learning_rate': 2.1664507772020724e-05, 'epoch': 2.83}\n", + "[INFO|trainer.py:1917] 2021-07-20 10:48:24,106 >> Saving model checkpoint to ./outputs/checkpoint-17500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:48:24,107 >> Configuration saved in ./outputs/checkpoint-17500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:48:25,513 >> Model weights saved in ./outputs/checkpoint-17500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:48:25,513 >> tokenizer config file saved in ./outputs/checkpoint-17500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:48:25,514 >> Special tokens file saved in ./outputs/checkpoint-17500/special_tokens_map.json\n", + "{'loss': 0.2093, 'learning_rate': 2.0854922279792746e-05, 'epoch': 2.91}\n", + " 58% 18000/30880 [1:00:34<1:12:22, 2.97it/s][INFO|trainer.py:1917] 2021-07-20 10:50:05,107 >> Saving model checkpoint to ./outputs/checkpoint-18000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:50:05,108 >> Configuration saved in ./outputs/checkpoint-18000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:50:06,489 >> Model weights saved in ./outputs/checkpoint-18000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:50:06,490 >> tokenizer config file saved in ./outputs/checkpoint-18000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:50:06,490 >> Special tokens file saved in ./outputs/checkpoint-18000/special_tokens_map.json\n", + " 60% 18500/30880 [1:02:15<1:07:56, 3.04it/s]{'loss': 0.2007, 'learning_rate': 2.0045336787564768e-05, 'epoch': 3.0}\n", + " 60% 18500/30880 [1:02:15<1:07:56, 3.04it/s][INFO|trainer.py:1917] 2021-07-20 10:51:46,554 >> Saving model checkpoint to ./outputs/checkpoint-18500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:51:46,556 >> Configuration saved in ./outputs/checkpoint-18500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:51:47,880 >> Model weights saved in ./outputs/checkpoint-18500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:51:47,882 >> tokenizer config file saved in ./outputs/checkpoint-18500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:51:47,882 >> Special tokens file saved in ./outputs/checkpoint-18500/special_tokens_map.json\n", + " 62% 19000/30880 [1:03:55<1:05:21, 3.03it/s]{'loss': 0.165, 'learning_rate': 1.9235751295336786e-05, 'epoch': 3.08}\n", + "[INFO|trainer.py:1917] 2021-07-20 10:53:26,301 >> Saving model checkpoint to ./outputs/checkpoint-19000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:53:26,303 >> Configuration saved in ./outputs/checkpoint-19000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:53:27,751 >> Model weights saved in ./outputs/checkpoint-19000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:53:27,752 >> tokenizer config file saved in ./outputs/checkpoint-19000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:53:27,752 >> Special tokens file saved in ./outputs/checkpoint-19000/special_tokens_map.json\n", + "{'loss': 0.1774, 'learning_rate': 1.8426165803108808e-05, 'epoch': 3.16}\n", + " 63% 19500/30880 [1:05:35<1:02:07, 3.05it/s][INFO|trainer.py:1917] 2021-07-20 10:55:06,699 >> Saving model checkpoint to ./outputs/checkpoint-19500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:55:06,701 >> Configuration saved in ./outputs/checkpoint-19500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:55:07,948 >> Model weights saved in ./outputs/checkpoint-19500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:55:07,949 >> tokenizer config file saved in ./outputs/checkpoint-19500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:55:07,949 >> Special tokens file saved in ./outputs/checkpoint-19500/special_tokens_map.json\n", + " 65% 20000/30880 [1:07:16<59:35, 3.04it/s]{'loss': 0.1746, 'learning_rate': 1.761658031088083e-05, 'epoch': 3.24}\n", + " 65% 20000/30880 [1:07:16<59:35, 3.04it/s][INFO|trainer.py:1917] 2021-07-20 10:56:47,099 >> Saving model checkpoint to ./outputs/checkpoint-20000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:56:47,100 >> Configuration saved in ./outputs/checkpoint-20000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:56:48,483 >> Model weights saved in ./outputs/checkpoint-20000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:56:48,483 >> tokenizer config file saved in ./outputs/checkpoint-20000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:56:48,484 >> Special tokens file saved in ./outputs/checkpoint-20000/special_tokens_map.json\n", + " 66% 20500/30880 [1:08:56<56:53, 3.04it/s]{'loss': 0.1562, 'learning_rate': 1.6806994818652848e-05, 'epoch': 3.32}\n", + "[INFO|trainer.py:1917] 2021-07-20 10:58:27,121 >> Saving model checkpoint to ./outputs/checkpoint-20500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 10:58:27,122 >> Configuration saved in ./outputs/checkpoint-20500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 10:58:28,386 >> Model weights saved in ./outputs/checkpoint-20500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 10:58:28,387 >> tokenizer config file saved in ./outputs/checkpoint-20500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 10:58:28,387 >> Special tokens file saved in ./outputs/checkpoint-20500/special_tokens_map.json\n", + "{'loss': 0.1704, 'learning_rate': 1.5997409326424873e-05, 'epoch': 3.4}\n", + " 68% 21000/30880 [1:10:36<54:01, 3.05it/s][INFO|trainer.py:1917] 2021-07-20 11:00:07,502 >> Saving model checkpoint to ./outputs/checkpoint-21000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:00:07,503 >> Configuration saved in ./outputs/checkpoint-21000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:00:08,828 >> Model weights saved in ./outputs/checkpoint-21000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:00:08,828 >> tokenizer config file saved in ./outputs/checkpoint-21000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:00:08,829 >> Special tokens file saved in ./outputs/checkpoint-21000/special_tokens_map.json\n", + " 70% 21500/30880 [1:12:17<51:03, 3.06it/s]{'loss': 0.1581, 'learning_rate': 1.5187823834196893e-05, 'epoch': 3.48}\n", + "[INFO|trainer.py:1917] 2021-07-20 11:01:48,047 >> Saving model checkpoint to ./outputs/checkpoint-21500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:01:48,048 >> Configuration saved in ./outputs/checkpoint-21500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:01:49,401 >> Model weights saved in ./outputs/checkpoint-21500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:01:49,402 >> tokenizer config file saved in ./outputs/checkpoint-21500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:01:49,402 >> Special tokens file saved in ./outputs/checkpoint-21500/special_tokens_map.json\n", + " 71% 22000/30880 [1:13:57<48:11, 3.07it/s]{'loss': 0.1737, 'learning_rate': 1.4378238341968913e-05, 'epoch': 3.56}\n", + "[INFO|trainer.py:1917] 2021-07-20 11:03:28,457 >> Saving model checkpoint to ./outputs/checkpoint-22000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:03:28,458 >> Configuration saved in ./outputs/checkpoint-22000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:03:29,825 >> Model weights saved in ./outputs/checkpoint-22000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:03:29,827 >> tokenizer config file saved in ./outputs/checkpoint-22000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:03:29,827 >> Special tokens file saved in ./outputs/checkpoint-22000/special_tokens_map.json\n", + "{'loss': 0.1471, 'learning_rate': 1.3568652849740935e-05, 'epoch': 3.64}\n", + " 73% 22500/30880 [1:15:37<44:21, 3.15it/s][INFO|trainer.py:1917] 2021-07-20 11:05:08,418 >> Saving model checkpoint to ./outputs/checkpoint-22500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:05:08,419 >> Configuration saved in ./outputs/checkpoint-22500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:05:09,758 >> Model weights saved in ./outputs/checkpoint-22500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:05:09,759 >> tokenizer config file saved in ./outputs/checkpoint-22500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:05:09,759 >> Special tokens file saved in ./outputs/checkpoint-22500/special_tokens_map.json\n", + "{'loss': 0.1502, 'learning_rate': 1.2759067357512955e-05, 'epoch': 3.72}\n", + " 74% 23000/30880 [1:17:18<43:25, 3.02it/s][INFO|trainer.py:1917] 2021-07-20 11:06:48,977 >> Saving model checkpoint to ./outputs/checkpoint-23000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:06:48,978 >> Configuration saved in ./outputs/checkpoint-23000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:06:50,327 >> Model weights saved in ./outputs/checkpoint-23000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:06:50,328 >> tokenizer config file saved in ./outputs/checkpoint-23000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:06:50,329 >> Special tokens file saved in ./outputs/checkpoint-23000/special_tokens_map.json\n", + "{'loss': 0.1631, 'learning_rate': 1.1949481865284974e-05, 'epoch': 3.81}\n", + " 76% 23500/30880 [1:18:58<40:34, 3.03it/s][INFO|trainer.py:1917] 2021-07-20 11:08:29,144 >> Saving model checkpoint to ./outputs/checkpoint-23500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:08:29,145 >> Configuration saved in ./outputs/checkpoint-23500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:08:30,568 >> Model weights saved in ./outputs/checkpoint-23500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:08:30,569 >> tokenizer config file saved in ./outputs/checkpoint-23500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:08:30,569 >> Special tokens file saved in ./outputs/checkpoint-23500/special_tokens_map.json\n", + "{'loss': 0.1439, 'learning_rate': 1.1139896373056995e-05, 'epoch': 3.89}\n", + " 78% 24000/30880 [1:20:38<37:35, 3.05it/s][INFO|trainer.py:1917] 2021-07-20 11:10:09,196 >> Saving model checkpoint to ./outputs/checkpoint-24000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:10:09,198 >> Configuration saved in ./outputs/checkpoint-24000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:10:10,576 >> Model weights saved in ./outputs/checkpoint-24000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:10:10,576 >> tokenizer config file saved in ./outputs/checkpoint-24000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:10:10,577 >> Special tokens file saved in ./outputs/checkpoint-24000/special_tokens_map.json\n", + " 79% 24500/30880 [1:22:18<34:19, 3.10it/s][INFO|trainer.py:1917] 2021-07-20 11:11:49,603 >> Saving model checkpoint to ./outputs/checkpoint-24500\n", + "{'loss': 0.1415, 'learning_rate': 1.0330310880829017e-05, 'epoch': 3.97}\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:11:49,606 >> Configuration saved in ./outputs/checkpoint-24500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:11:51,041 >> Model weights saved in ./outputs/checkpoint-24500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:11:51,042 >> tokenizer config file saved in ./outputs/checkpoint-24500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:11:51,042 >> Special tokens file saved in ./outputs/checkpoint-24500/special_tokens_map.json\n", + "{'loss': 0.132, 'learning_rate': 9.520725388601037e-06, 'epoch': 4.05}\n", + " 81% 25000/30880 [1:23:59<32:26, 3.02it/s][INFO|trainer.py:1917] 2021-07-20 11:13:29,797 >> Saving model checkpoint to ./outputs/checkpoint-25000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:13:29,799 >> Configuration saved in ./outputs/checkpoint-25000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:13:31,171 >> Model weights saved in ./outputs/checkpoint-25000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:13:31,172 >> tokenizer config file saved in ./outputs/checkpoint-25000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:13:31,172 >> Special tokens file saved in ./outputs/checkpoint-25000/special_tokens_map.json\n", + "{'loss': 0.1063, 'learning_rate': 8.711139896373057e-06, 'epoch': 4.13}\n", + " 83% 25500/30880 [1:25:38<28:47, 3.11it/s][INFO|trainer.py:1917] 2021-07-20 11:15:09,283 >> Saving model checkpoint to ./outputs/checkpoint-25500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:15:09,284 >> Configuration saved in ./outputs/checkpoint-25500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:15:10,684 >> Model weights saved in ./outputs/checkpoint-25500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:15:10,685 >> tokenizer config file saved in ./outputs/checkpoint-25500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:15:10,685 >> Special tokens file saved in ./outputs/checkpoint-25500/special_tokens_map.json\n", + "{'loss': 0.0984, 'learning_rate': 7.901554404145079e-06, 'epoch': 4.21}\n", + " 84% 26000/30880 [1:27:18<26:36, 3.06it/s][INFO|trainer.py:1917] 2021-07-20 11:16:49,478 >> Saving model checkpoint to ./outputs/checkpoint-26000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:16:49,479 >> Configuration saved in ./outputs/checkpoint-26000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:16:50,856 >> Model weights saved in ./outputs/checkpoint-26000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:16:50,857 >> tokenizer config file saved in ./outputs/checkpoint-26000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:16:50,857 >> Special tokens file saved in ./outputs/checkpoint-26000/special_tokens_map.json\n", + "{'loss': 0.1332, 'learning_rate': 7.091968911917099e-06, 'epoch': 4.29}\n", + " 86% 26500/30880 [1:28:58<24:15, 3.01it/s][INFO|trainer.py:1917] 2021-07-20 11:18:29,721 >> Saving model checkpoint to ./outputs/checkpoint-26500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:18:29,723 >> Configuration saved in ./outputs/checkpoint-26500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:18:31,148 >> Model weights saved in ./outputs/checkpoint-26500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:18:31,149 >> tokenizer config file saved in ./outputs/checkpoint-26500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:18:31,149 >> Special tokens file saved in ./outputs/checkpoint-26500/special_tokens_map.json\n", + "{'loss': 0.1032, 'learning_rate': 6.282383419689119e-06, 'epoch': 4.37}\n", + " 87% 27000/30880 [1:30:38<21:09, 3.06it/s][INFO|trainer.py:1917] 2021-07-20 11:20:09,194 >> Saving model checkpoint to ./outputs/checkpoint-27000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:20:09,196 >> Configuration saved in ./outputs/checkpoint-27000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:20:10,677 >> Model weights saved in ./outputs/checkpoint-27000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:20:10,677 >> tokenizer config file saved in ./outputs/checkpoint-27000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:20:10,678 >> Special tokens file saved in ./outputs/checkpoint-27000/special_tokens_map.json\n", + "{'loss': 0.1309, 'learning_rate': 5.47279792746114e-06, 'epoch': 4.45}\n", + " 89% 27500/30880 [1:32:18<18:47, 3.00it/s][INFO|trainer.py:1917] 2021-07-20 11:21:49,423 >> Saving model checkpoint to ./outputs/checkpoint-27500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:21:49,434 >> Configuration saved in ./outputs/checkpoint-27500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:21:50,803 >> Model weights saved in ./outputs/checkpoint-27500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:21:50,804 >> tokenizer config file saved in ./outputs/checkpoint-27500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:21:50,804 >> Special tokens file saved in ./outputs/checkpoint-27500/special_tokens_map.json\n", + "{'loss': 0.1148, 'learning_rate': 4.663212435233161e-06, 'epoch': 4.53}\n", + " 91% 28000/30880 [1:33:58<15:41, 3.06it/s][INFO|trainer.py:1917] 2021-07-20 11:23:29,763 >> Saving model checkpoint to ./outputs/checkpoint-28000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:23:29,764 >> Configuration saved in ./outputs/checkpoint-28000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:23:31,124 >> Model weights saved in ./outputs/checkpoint-28000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:23:31,124 >> tokenizer config file saved in ./outputs/checkpoint-28000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:23:31,125 >> Special tokens file saved in ./outputs/checkpoint-28000/special_tokens_map.json\n", + "{'loss': 0.1071, 'learning_rate': 3.853626943005181e-06, 'epoch': 4.61}\n", + " 92% 28500/30880 [1:35:38<12:49, 3.09it/s][INFO|trainer.py:1917] 2021-07-20 11:25:09,353 >> Saving model checkpoint to ./outputs/checkpoint-28500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:25:09,354 >> Configuration saved in ./outputs/checkpoint-28500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:25:10,715 >> Model weights saved in ./outputs/checkpoint-28500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:25:10,716 >> tokenizer config file saved in ./outputs/checkpoint-28500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:25:10,716 >> Special tokens file saved in ./outputs/checkpoint-28500/special_tokens_map.json\n", + "{'loss': 0.1129, 'learning_rate': 3.044041450777202e-06, 'epoch': 4.7}\n", + " 94% 29000/30880 [1:37:18<10:17, 3.04it/s][INFO|trainer.py:1917] 2021-07-20 11:26:49,355 >> Saving model checkpoint to ./outputs/checkpoint-29000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:26:49,356 >> Configuration saved in ./outputs/checkpoint-29000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:26:50,707 >> Model weights saved in ./outputs/checkpoint-29000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:26:50,707 >> tokenizer config file saved in ./outputs/checkpoint-29000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:26:50,708 >> Special tokens file saved in ./outputs/checkpoint-29000/special_tokens_map.json\n", + "{'loss': 0.122, 'learning_rate': 2.234455958549223e-06, 'epoch': 4.78}\n", + " 96% 29500/30880 [1:38:59<07:29, 3.07it/s][INFO|trainer.py:1917] 2021-07-20 11:28:30,049 >> Saving model checkpoint to ./outputs/checkpoint-29500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:28:30,050 >> Configuration saved in ./outputs/checkpoint-29500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:28:31,429 >> Model weights saved in ./outputs/checkpoint-29500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:28:31,430 >> tokenizer config file saved in ./outputs/checkpoint-29500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:28:31,430 >> Special tokens file saved in ./outputs/checkpoint-29500/special_tokens_map.json\n", + " 97% 30000/30880 [1:40:39<04:47, 3.06it/s]{'loss': 0.1079, 'learning_rate': 1.4248704663212437e-06, 'epoch': 4.86}\n", + " 97% 30000/30880 [1:40:39<04:47, 3.06it/s][INFO|trainer.py:1917] 2021-07-20 11:30:10,669 >> Saving model checkpoint to ./outputs/checkpoint-30000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:30:10,672 >> Configuration saved in ./outputs/checkpoint-30000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:30:12,147 >> Model weights saved in ./outputs/checkpoint-30000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:30:12,148 >> tokenizer config file saved in ./outputs/checkpoint-30000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:30:12,148 >> Special tokens file saved in ./outputs/checkpoint-30000/special_tokens_map.json\n", + " 99% 30500/30880 [1:42:19<02:05, 3.04it/s]{'loss': 0.1135, 'learning_rate': 6.152849740932643e-07, 'epoch': 4.94}\n", + " 99% 30500/30880 [1:42:20<02:05, 3.04it/s][INFO|trainer.py:1917] 2021-07-20 11:31:50,788 >> Saving model checkpoint to ./outputs/checkpoint-30500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:31:50,789 >> Configuration saved in ./outputs/checkpoint-30500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:31:52,164 >> Model weights saved in ./outputs/checkpoint-30500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:31:52,165 >> tokenizer config file saved in ./outputs/checkpoint-30500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:31:52,165 >> Special tokens file saved in ./outputs/checkpoint-30500/special_tokens_map.json\n", + "100% 30880/30880 [1:43:36<00:00, 6.02it/s][INFO|trainer.py:1358] 2021-07-20 11:33:07,657 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "100% 30880/30880 [1:43:36<00:00, 6.02it/s]{'train_runtime': 6219.3427, 'train_samples_per_second': 39.716, 'train_steps_per_second': 4.965, 'train_loss': 0.232939304341924, 'epoch': 5.0}\n", + "100% 30880/30880 [1:43:36<00:00, 4.97it/s]\n", + "[INFO|trainer.py:1917] 2021-07-20 11:33:07,681 >> Saving model checkpoint to ./outputs\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:33:07,682 >> Configuration saved in ./outputs/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:33:09,171 >> Model weights saved in ./outputs/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:33:09,171 >> tokenizer config file saved in ./outputs/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:33:09,172 >> Special tokens file saved in ./outputs/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 5.0\n", + " train_loss = 0.2329\n", + " train_runtime = 1:43:39.34\n", + " train_samples = 49401\n", + " train_samples_per_second = 39.716\n", + " train_steps_per_second = 4.965\n", + "07/20/2021 11:33:09 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:522] 2021-07-20 11:33:09,321 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: sentence1, id, sentence2.\n", + "[INFO|trainer.py:2163] 2021-07-20 11:33:09,340 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2165] 2021-07-20 11:33:09,341 >> Num examples = 2000\n", + "[INFO|trainer.py:2168] 2021-07-20 11:33:09,341 >> Batch size = 8\n", + "100% 250/250 [00:12<00:00, 20.40it/s]\n", + "***** eval metrics *****\n", + " epoch = 5.0\n", + " eval_accuracy = 0.8795\n", + " eval_loss = 0.5979\n", + " eval_runtime = 0:00:12.33\n", + " eval_samples = 2000\n", + " eval_samples_per_second = 162.145\n", + " eval_steps_per_second = 20.268\n", + "\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish, PID 1526\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Program ended successfully.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find user logs for this run at: /content/wandb/run-20210720_094928-3kwtbkiz/logs/debug.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find internal logs for this run at: /content/wandb/run-20210720_094928-3kwtbkiz/logs/debug-internal.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.1135\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 30880\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime 6233\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp 1626780801\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step 62\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime 6219.3427\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second 39.716\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second 4.965\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos 1.62474365572992e+16\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss 0.23294\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 0.59792\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy 0.8795\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 12.3347\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 162.145\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 20.268\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▆▅▅▅▄▄▄▄▃▃▃▃▃▃▃▃▂▂▂▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate ████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced \u001b[33m./outputs\u001b[0m: \u001b[34mhttps://wandb.ai/versae/bertin-eval/runs/3kwtbkiz\u001b[0m\n", + "2021-07-20 11:33:34.803215: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "07/20/2021 11:33:38 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "07/20/2021 11:33:38 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_find_unused_parameters=None,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=False,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_steps=500,\n", + "evaluation_strategy=IntervalStrategy.NO,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "gradient_accumulation_steps=1,\n", + "greater_is_better=None,\n", + "group_by_length=False,\n", + "ignore_data_skip=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=5e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=False,\n", + "local_rank=-1,\n", + "log_level=-1,\n", + "log_level_replica=-1,\n", + "log_on_each_node=True,\n", + "logging_dir=./outputs/runs/Jul20_11-33-38_48f3f265b421,\n", + "logging_first_step=False,\n", + "logging_steps=500,\n", + "logging_strategy=IntervalStrategy.STEPS,\n", + "lr_scheduler_type=SchedulerType.LINEAR,\n", + "max_grad_norm=1.0,\n", + "max_steps=-1,\n", + "metric_for_best_model=None,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=5.0,\n", + "output_dir=./outputs,\n", + "overwrite_output_dir=True,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=outputs,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=None,\n", + "remove_unused_columns=True,\n", + "report_to=['tensorboard', 'wandb'],\n", + "resume_from_checkpoint=None,\n", + "run_name=./outputs,\n", + "save_on_each_node=False,\n", + "save_steps=500,\n", + "save_strategy=IntervalStrategy.STEPS,\n", + "save_total_limit=None,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_legacy_prediction_loop=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + ")\n", + "07/20/2021 11:33:39 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py at /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x\n", + "07/20/2021 11:33:39 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py at /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af\n", + "07/20/2021 11:33:39 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py to /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/paws-x.py\n", + "07/20/2021 11:33:39 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/dataset_infos.json\n", + "07/20/2021 11:33:39 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py at /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/paws-x.json\n", + "07/20/2021 11:33:39 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py at /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x\n", + "07/20/2021 11:33:39 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py at /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af\n", + "07/20/2021 11:33:39 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py to /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/paws-x.py\n", + "07/20/2021 11:33:39 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/dataset_infos.json\n", + "07/20/2021 11:33:39 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py at /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/paws-x.json\n", + "07/20/2021 11:33:39 - INFO - datasets.info - Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af\n", + "07/20/2021 11:33:39 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "07/20/2021 11:33:39 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/pawsx/es/1.1.0/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af\n", + "07/20/2021 11:33:39 - WARNING - datasets.builder - Reusing dataset pawsx (/root/.cache/huggingface/datasets/pawsx/es/1.1.0/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af)\n", + "07/20/2021 11:33:39 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/pawsx/es/1.1.0/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af\n", + "100% 3/3 [00:00<00:00, 25.65it/s]\n", + "[INFO|configuration_utils.py:545] 2021-07-20 11:33:40,223 >> loading configuration file https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/a3b98490ab467f825ce932e6e6e7de25a6ea47beeceb6f5cd521b8ee4f61f95e.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|configuration_utils.py:581] 2021-07-20 11:33:40,224 >> Model config RobertaConfig {\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"bos_token_id\": 0,\n", + " \"eos_token_id\": 2,\n", + " \"finetuning_task\": \"paws-x\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50265\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-20 11:33:42,705 >> loading file https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/c1ba808baa4a9c0f3062f1881d448087c30a1443644365bd41cf366491ab4063.a80f232f572026f92499b14999a8ed4e044e04cf3d01b9f2be298c98e78e8498\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-20 11:33:42,705 >> loading file https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/17ffd9604d64364336252e5a3859c3c55be07c457328ab5fc37e4aaf39913d28.a0dfc41f9d0f03a56ba7a5401d770f6e43071045a0bd79073380d408d17a0d92\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-20 11:33:42,705 >> loading file https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/59e3c1ec6ec0fe2653924dcd348a763dd43f51d8eae6ab758e2d962ec7c14d5e.2a5dc806edc00ab3a329cb22b9973596ca75b24ba0e5e4963bf1308de7237a3d\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-20 11:33:42,705 >> loading file https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-20 11:33:42,705 >> loading file https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/39ddc268aab2655adb602f93d771480d4db157c1b6fae9a5ae9fc2112c645a69.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-20 11:33:42,705 >> loading file https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/a9d7a6740959c8c347993f62fbd5620bffa2d10c35c2e579a2ecec181299c9a1.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|modeling_utils.py:1271] 2021-07-20 11:33:43,126 >> loading weights file https://huggingface.co/bertin-project/bertin-base-random-exp-512seqlen/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/6dd5d03f2c36b42a305cf20636d35935ad2d998d2ab0588b28eeed0fc164db43.5b6b77533b091cc9204533514d844abfe875ebba66e044b251306c4228bd3221\n", + "[WARNING|modeling_utils.py:1502] 2021-07-20 11:33:51,203 >> Some weights of the model checkpoint at bertin-project/bertin-base-random-exp-512seqlen were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias']\n", + "- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:1513] 2021-07-20 11:33:51,203 >> Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at bertin-project/bertin-base-random-exp-512seqlen and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "07/20/2021 11:33:51 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /root/.cache/huggingface/datasets/pawsx/es/1.1.0/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/cache-201539629a7c3697.arrow\n", + "07/20/2021 11:33:51 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /root/.cache/huggingface/datasets/pawsx/es/1.1.0/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/cache-63872d8033480024.arrow\n", + "Running tokenizer on dataset: 0% 0/2 [00:00> The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: sentence2, id, sentence1.\n", + "[INFO|trainer.py:1162] 2021-07-20 11:33:56,324 >> ***** Running training *****\n", + "[INFO|trainer.py:1163] 2021-07-20 11:33:56,324 >> Num examples = 49401\n", + "[INFO|trainer.py:1164] 2021-07-20 11:33:56,325 >> Num Epochs = 5\n", + "[INFO|trainer.py:1165] 2021-07-20 11:33:56,325 >> Instantaneous batch size per device = 8\n", + "[INFO|trainer.py:1166] 2021-07-20 11:33:56,325 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "[INFO|trainer.py:1167] 2021-07-20 11:33:56,325 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1168] 2021-07-20 11:33:56,325 >> Total optimization steps = 30880\n", + "[INFO|integrations.py:446] 2021-07-20 11:33:56,340 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mversae\u001b[0m (use `wandb login --relogin` to force relogin)\n", + "2021-07-20 11:33:57.672926: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m./outputs\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval/runs/27wn690d\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in /content/wandb/run-20210720_113356-27wn690d\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run `wandb offline` to turn off syncing.\n", + "\n", + " 2% 500/30880 [01:37<2:54:15, 2.91it/s]{'loss': 0.7114, 'learning_rate': 4.919041450777203e-05, 'epoch': 0.08}\n", + " 2% 500/30880 [01:37<2:54:15, 2.91it/s][INFO|trainer.py:1917] 2021-07-20 11:35:35,937 >> Saving model checkpoint to ./outputs/checkpoint-500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:35:35,938 >> Configuration saved in ./outputs/checkpoint-500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:35:37,403 >> Model weights saved in ./outputs/checkpoint-500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:35:37,403 >> tokenizer config file saved in ./outputs/checkpoint-500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:35:37,404 >> Special tokens file saved in ./outputs/checkpoint-500/special_tokens_map.json\n", + " 3% 1000/30880 [03:19<2:44:28, 3.03it/s]{'loss': 0.7076, 'learning_rate': 4.8380829015544046e-05, 'epoch': 0.16}\n", + " 3% 1000/30880 [03:19<2:44:28, 3.03it/s][INFO|trainer.py:1917] 2021-07-20 11:37:18,583 >> Saving model checkpoint to ./outputs/checkpoint-1000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:37:18,585 >> Configuration saved in ./outputs/checkpoint-1000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:37:19,832 >> Model weights saved in ./outputs/checkpoint-1000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:37:19,832 >> tokenizer config file saved in ./outputs/checkpoint-1000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:37:19,833 >> Special tokens file saved in ./outputs/checkpoint-1000/special_tokens_map.json\n", + " 5% 1500/30880 [05:03<2:45:27, 2.96it/s]{'loss': 0.71, 'learning_rate': 4.7571243523316064e-05, 'epoch': 0.24}\n", + " 5% 1500/30880 [05:03<2:45:27, 2.96it/s][INFO|trainer.py:1917] 2021-07-20 11:39:01,986 >> Saving model checkpoint to ./outputs/checkpoint-1500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:39:01,987 >> Configuration saved in ./outputs/checkpoint-1500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:39:03,402 >> Model weights saved in ./outputs/checkpoint-1500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:39:03,403 >> tokenizer config file saved in ./outputs/checkpoint-1500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:39:03,403 >> Special tokens file saved in ./outputs/checkpoint-1500/special_tokens_map.json\n", + "{'loss': 0.7, 'learning_rate': 4.676165803108808e-05, 'epoch': 0.32}\n", + " 6% 2000/30880 [06:46<2:39:37, 3.02it/s][INFO|trainer.py:1917] 2021-07-20 11:40:44,994 >> Saving model checkpoint to ./outputs/checkpoint-2000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:40:44,996 >> Configuration saved in ./outputs/checkpoint-2000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:40:46,341 >> Model weights saved in ./outputs/checkpoint-2000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:40:46,341 >> tokenizer config file saved in ./outputs/checkpoint-2000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:40:46,342 >> Special tokens file saved in ./outputs/checkpoint-2000/special_tokens_map.json\n", + "{'loss': 0.7043, 'learning_rate': 4.595207253886011e-05, 'epoch': 0.4}\n", + " 8% 2500/30880 [08:29<2:41:01, 2.94it/s][INFO|trainer.py:1917] 2021-07-20 11:42:28,240 >> Saving model checkpoint to ./outputs/checkpoint-2500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:42:28,242 >> Configuration saved in ./outputs/checkpoint-2500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:42:29,666 >> Model weights saved in ./outputs/checkpoint-2500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:42:29,667 >> tokenizer config file saved in ./outputs/checkpoint-2500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:42:29,667 >> Special tokens file saved in ./outputs/checkpoint-2500/special_tokens_map.json\n", + " {'loss': 0.7044, 'learning_rate': 4.5142487046632126e-05, 'epoch': 0.49}\n", + " 10% 3000/30880 [10:12<2:37:59, 2.94it/s][INFO|trainer.py:1917] 2021-07-20 11:44:11,673 >> Saving model checkpoint to ./outputs/checkpoint-3000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:44:11,674 >> Configuration saved in ./outputs/checkpoint-3000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:44:13,163 >> Model weights saved in ./outputs/checkpoint-3000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:44:13,164 >> tokenizer config file saved in ./outputs/checkpoint-3000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:44:13,164 >> Special tokens file saved in ./outputs/checkpoint-3000/special_tokens_map.json\n", + " 11% 3500/30880 [11:55<2:32:12, 3.00it/s]{'loss': 0.699, 'learning_rate': 4.433290155440415e-05, 'epoch': 0.57}\n", + " 11% 3500/30880 [11:55<2:32:12, 3.00it/s][INFO|trainer.py:1917] 2021-07-20 11:45:54,670 >> Saving model checkpoint to ./outputs/checkpoint-3500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:45:54,671 >> Configuration saved in ./outputs/checkpoint-3500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:45:56,108 >> Model weights saved in ./outputs/checkpoint-3500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:45:56,109 >> tokenizer config file saved in ./outputs/checkpoint-3500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:45:56,109 >> Special tokens file saved in ./outputs/checkpoint-3500/special_tokens_map.json\n", + " 13% 4000/30880 [13:39<2:33:58, 2.91it/s]{'loss': 0.7005, 'learning_rate': 4.352331606217617e-05, 'epoch': 0.65}\n", + "[INFO|trainer.py:1917] 2021-07-20 11:47:38,240 >> Saving model checkpoint to ./outputs/checkpoint-4000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:47:38,241 >> Configuration saved in ./outputs/checkpoint-4000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:47:39,674 >> Model weights saved in ./outputs/checkpoint-4000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:47:39,675 >> tokenizer config file saved in ./outputs/checkpoint-4000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:47:39,675 >> Special tokens file saved in ./outputs/checkpoint-4000/special_tokens_map.json\n", + " 15% 4500/30880 [15:22<2:32:34, 2.88it/s][INFO|trainer.py:1917] 2021-07-20 11:49:21,505 >> Saving model checkpoint to ./outputs/checkpoint-4500\n", + "{'loss': 0.6985, 'learning_rate': 4.271373056994819e-05, 'epoch': 0.73}\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:49:21,507 >> Configuration saved in ./outputs/checkpoint-4500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:49:22,977 >> Model weights saved in ./outputs/checkpoint-4500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:49:22,978 >> tokenizer config file saved in ./outputs/checkpoint-4500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:49:22,979 >> Special tokens file saved in ./outputs/checkpoint-4500/special_tokens_map.json\n", + " 16% 5000/30880 [17:05<2:23:58, 3.00it/s]{'loss': 0.6943, 'learning_rate': 4.190414507772021e-05, 'epoch': 0.81}\n", + " 16% 5000/30880 [17:05<2:23:58, 3.00it/s][INFO|trainer.py:1917] 2021-07-20 11:51:04,064 >> Saving model checkpoint to ./outputs/checkpoint-5000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:51:04,065 >> Configuration saved in ./outputs/checkpoint-5000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:51:05,414 >> Model weights saved in ./outputs/checkpoint-5000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:51:05,415 >> tokenizer config file saved in ./outputs/checkpoint-5000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:51:05,415 >> Special tokens file saved in ./outputs/checkpoint-5000/special_tokens_map.json\n", + "{'loss': 0.7005, 'learning_rate': 4.109455958549223e-05, 'epoch': 0.89}\n", + " 18% 5500/30880 [18:48<2:22:02, 2.98it/s][INFO|trainer.py:1917] 2021-07-20 11:52:47,332 >> Saving model checkpoint to ./outputs/checkpoint-5500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:52:47,333 >> Configuration saved in ./outputs/checkpoint-5500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:52:48,760 >> Model weights saved in ./outputs/checkpoint-5500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:52:48,761 >> tokenizer config file saved in ./outputs/checkpoint-5500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:52:48,761 >> Special tokens file saved in ./outputs/checkpoint-5500/special_tokens_map.json\n", + " 19% 6000/30880 [20:31<2:20:50, 2.94it/s]{'loss': 0.6906, 'learning_rate': 4.028497409326425e-05, 'epoch': 0.97}\n", + "[INFO|trainer.py:1917] 2021-07-20 11:54:30,202 >> Saving model checkpoint to ./outputs/checkpoint-6000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:54:30,208 >> Configuration saved in ./outputs/checkpoint-6000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:54:31,804 >> Model weights saved in ./outputs/checkpoint-6000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:54:31,805 >> tokenizer config file saved in ./outputs/checkpoint-6000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:54:31,805 >> Special tokens file saved in ./outputs/checkpoint-6000/special_tokens_map.json\n", + "{'loss': 0.6988, 'learning_rate': 3.9475388601036275e-05, 'epoch': 1.05}\n", + " 21% 6500/30880 [22:15<2:19:33, 2.91it/s][INFO|trainer.py:1917] 2021-07-20 11:56:13,844 >> Saving model checkpoint to ./outputs/checkpoint-6500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:56:13,845 >> Configuration saved in ./outputs/checkpoint-6500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:56:15,246 >> Model weights saved in ./outputs/checkpoint-6500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:56:15,247 >> tokenizer config file saved in ./outputs/checkpoint-6500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:56:15,248 >> Special tokens file saved in ./outputs/checkpoint-6500/special_tokens_map.json\n", + "{'loss': 0.6978, 'learning_rate': 3.8665803108808294e-05, 'epoch': 1.13}\n", + " 23% 7000/30880 [23:58<2:13:16, 2.99it/s][INFO|trainer.py:1917] 2021-07-20 11:57:56,873 >> Saving model checkpoint to ./outputs/checkpoint-7000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:57:56,874 >> Configuration saved in ./outputs/checkpoint-7000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:57:58,371 >> Model weights saved in ./outputs/checkpoint-7000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:57:58,371 >> tokenizer config file saved in ./outputs/checkpoint-7000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:57:58,372 >> Special tokens file saved in ./outputs/checkpoint-7000/special_tokens_map.json\n", + " 24% 7500/30880 [25:41<2:12:52, 2.93it/s]{'loss': 0.6968, 'learning_rate': 3.785621761658031e-05, 'epoch': 1.21}\n", + "[INFO|trainer.py:1917] 2021-07-20 11:59:39,996 >> Saving model checkpoint to ./outputs/checkpoint-7500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 11:59:40,002 >> Configuration saved in ./outputs/checkpoint-7500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 11:59:41,676 >> Model weights saved in ./outputs/checkpoint-7500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 11:59:41,677 >> tokenizer config file saved in ./outputs/checkpoint-7500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 11:59:41,678 >> Special tokens file saved in ./outputs/checkpoint-7500/special_tokens_map.json\n", + " 26% 8000/30880 [27:24<2:10:55, 2.91it/s]{'loss': 0.69, 'learning_rate': 3.704663212435233e-05, 'epoch': 1.3}\n", + " 26% 8000/30880 [27:24<2:10:55, 2.91it/s][INFO|trainer.py:1917] 2021-07-20 12:01:23,354 >> Saving model checkpoint to ./outputs/checkpoint-8000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:01:23,356 >> Configuration saved in ./outputs/checkpoint-8000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:01:24,924 >> Model weights saved in ./outputs/checkpoint-8000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:01:24,925 >> tokenizer config file saved in ./outputs/checkpoint-8000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:01:24,925 >> Special tokens file saved in ./outputs/checkpoint-8000/special_tokens_map.json\n", + "{'loss': 0.6932, 'learning_rate': 3.6237046632124356e-05, 'epoch': 1.38}\n", + " 28% 8500/30880 [29:07<2:05:11, 2.98it/s][INFO|trainer.py:1917] 2021-07-20 12:03:06,242 >> Saving model checkpoint to ./outputs/checkpoint-8500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:03:06,243 >> Configuration saved in ./outputs/checkpoint-8500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:03:07,715 >> Model weights saved in ./outputs/checkpoint-8500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:03:07,715 >> tokenizer config file saved in ./outputs/checkpoint-8500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:03:07,716 >> Special tokens file saved in ./outputs/checkpoint-8500/special_tokens_map.json\n", + " 29% 9000/30880 [30:50<2:02:52, 2.97it/s]{'loss': 0.6929, 'learning_rate': 3.5427461139896374e-05, 'epoch': 1.46}\n", + "[INFO|trainer.py:1917] 2021-07-20 12:04:49,640 >> Saving model checkpoint to ./outputs/checkpoint-9000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:04:49,641 >> Configuration saved in ./outputs/checkpoint-9000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:04:51,160 >> Model weights saved in ./outputs/checkpoint-9000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:04:51,161 >> tokenizer config file saved in ./outputs/checkpoint-9000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:04:51,161 >> Special tokens file saved in ./outputs/checkpoint-9000/special_tokens_map.json\n", + "{'loss': 0.6971, 'learning_rate': 3.46178756476684e-05, 'epoch': 1.54}\n", + " 31% 9500/30880 [32:34<1:59:31, 2.98it/s][INFO|trainer.py:1917] 2021-07-20 12:06:32,929 >> Saving model checkpoint to ./outputs/checkpoint-9500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:06:32,931 >> Configuration saved in ./outputs/checkpoint-9500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:06:34,280 >> Model weights saved in ./outputs/checkpoint-9500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:06:34,281 >> tokenizer config file saved in ./outputs/checkpoint-9500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:06:34,281 >> Special tokens file saved in ./outputs/checkpoint-9500/special_tokens_map.json\n", + "{'loss': 0.6928, 'learning_rate': 3.380829015544041e-05, 'epoch': 1.62}\n", + " 32% 10000/30880 [34:16<1:58:59, 2.92it/s][INFO|trainer.py:1917] 2021-07-20 12:08:15,623 >> Saving model checkpoint to ./outputs/checkpoint-10000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:08:15,624 >> Configuration saved in ./outputs/checkpoint-10000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:08:17,181 >> Model weights saved in ./outputs/checkpoint-10000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:08:17,182 >> tokenizer config file saved in ./outputs/checkpoint-10000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:08:17,182 >> Special tokens file saved in ./outputs/checkpoint-10000/special_tokens_map.json\n", + "{'loss': 0.6897, 'learning_rate': 3.2998704663212436e-05, 'epoch': 1.7}\n", + " 34% 10500/30880 [35:59<1:54:11, 2.97it/s][INFO|trainer.py:1917] 2021-07-20 12:09:58,789 >> Saving model checkpoint to ./outputs/checkpoint-10500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:09:58,790 >> Configuration saved in ./outputs/checkpoint-10500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:10:00,339 >> Model weights saved in ./outputs/checkpoint-10500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:10:00,340 >> tokenizer config file saved in ./outputs/checkpoint-10500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:10:00,340 >> Special tokens file saved in ./outputs/checkpoint-10500/special_tokens_map.json\n", + "{'loss': 0.6967, 'learning_rate': 3.2189119170984454e-05, 'epoch': 1.78}\n", + " 36% 11000/30880 [37:43<1:52:14, 2.95it/s][INFO|trainer.py:1917] 2021-07-20 12:11:42,081 >> Saving model checkpoint to ./outputs/checkpoint-11000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:11:42,082 >> Configuration saved in ./outputs/checkpoint-11000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:11:43,497 >> Model weights saved in ./outputs/checkpoint-11000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:11:43,498 >> tokenizer config file saved in ./outputs/checkpoint-11000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:11:43,498 >> Special tokens file saved in ./outputs/checkpoint-11000/special_tokens_map.json\n", + " 37% 11500/30880 [39:26<1:51:41, 2.89it/s][INFO|trainer.py:1917] 2021-07-20 12:13:25,144 >> Saving model checkpoint to ./outputs/checkpoint-11500\n", + "{'loss': 0.6902, 'learning_rate': 3.137953367875648e-05, 'epoch': 1.86}\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:13:25,147 >> Configuration saved in ./outputs/checkpoint-11500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:13:26,571 >> Model weights saved in ./outputs/checkpoint-11500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:13:26,688 >> tokenizer config file saved in ./outputs/checkpoint-11500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:13:26,689 >> Special tokens file saved in ./outputs/checkpoint-11500/special_tokens_map.json\n", + " 39% 12000/30880 [41:09<1:46:53, 2.94it/s]{'loss': 0.6965, 'learning_rate': 3.05699481865285e-05, 'epoch': 1.94}\n", + "[INFO|trainer.py:1917] 2021-07-20 12:15:08,299 >> Saving model checkpoint to ./outputs/checkpoint-12000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:15:08,300 >> Configuration saved in ./outputs/checkpoint-12000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:15:09,864 >> Model weights saved in ./outputs/checkpoint-12000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:15:09,864 >> tokenizer config file saved in ./outputs/checkpoint-12000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:15:09,865 >> Special tokens file saved in ./outputs/checkpoint-12000/special_tokens_map.json\n", + " 40% 12500/30880 [42:52<1:42:30, 2.99it/s]{'loss': 0.6914, 'learning_rate': 2.976036269430052e-05, 'epoch': 2.02}\n", + "[INFO|trainer.py:1917] 2021-07-20 12:16:51,392 >> Saving model checkpoint to ./outputs/checkpoint-12500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:16:51,398 >> Configuration saved in ./outputs/checkpoint-12500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:16:52,834 >> Model weights saved in ./outputs/checkpoint-12500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:16:52,835 >> tokenizer config file saved in ./outputs/checkpoint-12500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:16:52,835 >> Special tokens file saved in ./outputs/checkpoint-12500/special_tokens_map.json\n", + " 42% 13000/30880 [44:35<1:41:23, 2.94it/s]{'loss': 0.6941, 'learning_rate': 2.8950777202072538e-05, 'epoch': 2.1}\n", + " 42% 13000/30880 [44:35<1:41:23, 2.94it/s][INFO|trainer.py:1917] 2021-07-20 12:18:34,614 >> Saving model checkpoint to ./outputs/checkpoint-13000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:18:34,618 >> Configuration saved in ./outputs/checkpoint-13000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:18:36,080 >> Model weights saved in ./outputs/checkpoint-13000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:18:36,080 >> tokenizer config file saved in ./outputs/checkpoint-13000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:18:36,081 >> Special tokens file saved in ./outputs/checkpoint-13000/special_tokens_map.json\n", + "{'loss': 0.6912, 'learning_rate': 2.814119170984456e-05, 'epoch': 2.19}\n", + " 44% 13500/30880 [46:18<1:37:33, 2.97it/s][INFO|trainer.py:1917] 2021-07-20 12:20:17,210 >> Saving model checkpoint to ./outputs/checkpoint-13500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:20:17,211 >> Configuration saved in ./outputs/checkpoint-13500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:20:18,691 >> Model weights saved in ./outputs/checkpoint-13500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:20:18,692 >> tokenizer config file saved in ./outputs/checkpoint-13500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:20:18,692 >> Special tokens file saved in ./outputs/checkpoint-13500/special_tokens_map.json\n", + " 45% 14000/30880 [48:01<1:35:05, 2.96it/s]{'loss': 0.6955, 'learning_rate': 2.7331606217616585e-05, 'epoch': 2.27}\n", + "[INFO|trainer.py:1917] 2021-07-20 12:22:00,730 >> Saving model checkpoint to ./outputs/checkpoint-14000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:22:00,731 >> Configuration saved in ./outputs/checkpoint-14000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:22:02,112 >> Model weights saved in ./outputs/checkpoint-14000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:22:02,112 >> tokenizer config file saved in ./outputs/checkpoint-14000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:22:02,113 >> Special tokens file saved in ./outputs/checkpoint-14000/special_tokens_map.json\n", + " 47% 14500/30880 [49:44<1:31:35, 2.98it/s]{'loss': 0.6928, 'learning_rate': 2.6522020725388604e-05, 'epoch': 2.35}\n", + " 47% 14500/30880 [49:44<1:31:35, 2.98it/s][INFO|trainer.py:1917] 2021-07-20 12:23:43,737 >> Saving model checkpoint to ./outputs/checkpoint-14500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:23:43,739 >> Configuration saved in ./outputs/checkpoint-14500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:23:45,078 >> Model weights saved in ./outputs/checkpoint-14500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:23:45,079 >> tokenizer config file saved in ./outputs/checkpoint-14500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:23:45,080 >> Special tokens file saved in ./outputs/checkpoint-14500/special_tokens_map.json\n", + " 49% 15000/30880 [51:27<1:30:17, 2.93it/s]{'loss': 0.6917, 'learning_rate': 2.5712435233160625e-05, 'epoch': 2.43}\n", + "[INFO|trainer.py:1917] 2021-07-20 12:25:26,725 >> Saving model checkpoint to ./outputs/checkpoint-15000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:25:26,729 >> Configuration saved in ./outputs/checkpoint-15000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:25:28,300 >> Model weights saved in ./outputs/checkpoint-15000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:25:28,301 >> tokenizer config file saved in ./outputs/checkpoint-15000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:25:28,301 >> Special tokens file saved in ./outputs/checkpoint-15000/special_tokens_map.json\n", + "{'loss': 0.6945, 'learning_rate': 2.4902849740932644e-05, 'epoch': 2.51}\n", + " 50% 15500/30880 [53:11<1:27:49, 2.92it/s][INFO|trainer.py:1917] 2021-07-20 12:27:09,833 >> Saving model checkpoint to ./outputs/checkpoint-15500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:27:09,834 >> Configuration saved in ./outputs/checkpoint-15500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:27:11,301 >> Model weights saved in ./outputs/checkpoint-15500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:27:11,301 >> tokenizer config file saved in ./outputs/checkpoint-15500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:27:11,302 >> Special tokens file saved in ./outputs/checkpoint-15500/special_tokens_map.json\n", + " 52% 16000/30880 [54:53<1:22:12, 3.02it/s]{'loss': 0.6881, 'learning_rate': 2.4093264248704665e-05, 'epoch': 2.59}\n", + "[INFO|trainer.py:1917] 2021-07-20 12:28:52,665 >> Saving model checkpoint to ./outputs/checkpoint-16000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:28:52,666 >> Configuration saved in ./outputs/checkpoint-16000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:28:54,135 >> Model weights saved in ./outputs/checkpoint-16000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:28:54,136 >> tokenizer config file saved in ./outputs/checkpoint-16000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:28:54,136 >> Special tokens file saved in ./outputs/checkpoint-16000/special_tokens_map.json\n", + " 53% 16500/30880 [56:37<1:20:48, 2.97it/s][INFO|trainer.py:1917] 2021-07-20 12:30:35,855 >> Saving model checkpoint to ./outputs/checkpoint-16500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:30:35,856 >> Configuration saved in ./outputs/checkpoint-16500/config.json\n", + "{'loss': 0.6896, 'learning_rate': 2.3283678756476684e-05, 'epoch': 2.67}\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:30:37,477 >> Model weights saved in ./outputs/checkpoint-16500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:30:37,478 >> tokenizer config file saved in ./outputs/checkpoint-16500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:30:37,479 >> Special tokens file saved in ./outputs/checkpoint-16500/special_tokens_map.json\n", + "{'loss': 0.6931, 'learning_rate': 2.2474093264248706e-05, 'epoch': 2.75}\n", + " 55% 17000/30880 [58:19<1:17:33, 2.98it/s][INFO|trainer.py:1917] 2021-07-20 12:32:18,657 >> Saving model checkpoint to ./outputs/checkpoint-17000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:32:18,659 >> Configuration saved in ./outputs/checkpoint-17000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:32:19,988 >> Model weights saved in ./outputs/checkpoint-17000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:32:19,988 >> tokenizer config file saved in ./outputs/checkpoint-17000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:32:19,989 >> Special tokens file saved in ./outputs/checkpoint-17000/special_tokens_map.json\n", + "{'loss': 0.6905, 'learning_rate': 2.1664507772020724e-05, 'epoch': 2.83}\n", + " 57% 17500/30880 [1:00:03<1:14:24, 3.00it/s][INFO|trainer.py:1917] 2021-07-20 12:34:01,907 >> Saving model checkpoint to ./outputs/checkpoint-17500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:34:01,908 >> Configuration saved in ./outputs/checkpoint-17500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:34:03,358 >> Model weights saved in ./outputs/checkpoint-17500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:34:03,388 >> tokenizer config file saved in ./outputs/checkpoint-17500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:34:03,388 >> Special tokens file saved in ./outputs/checkpoint-17500/special_tokens_map.json\n", + "{'loss': 0.6844, 'learning_rate': 2.0854922279792746e-05, 'epoch': 2.91}\n", + " 58% 18000/30880 [1:01:46<1:12:45, 2.95it/s][INFO|trainer.py:1917] 2021-07-20 12:35:45,236 >> Saving model checkpoint to ./outputs/checkpoint-18000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:35:45,238 >> Configuration saved in ./outputs/checkpoint-18000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:35:46,818 >> Model weights saved in ./outputs/checkpoint-18000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:35:46,819 >> tokenizer config file saved in ./outputs/checkpoint-18000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:35:46,819 >> Special tokens file saved in ./outputs/checkpoint-18000/special_tokens_map.json\n", + "{'loss': 0.6943, 'learning_rate': 2.0045336787564768e-05, 'epoch': 3.0}\n", + " 60% 18500/30880 [1:03:29<1:10:36, 2.92it/s][INFO|trainer.py:1917] 2021-07-20 12:37:27,973 >> Saving model checkpoint to ./outputs/checkpoint-18500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:37:27,979 >> Configuration saved in ./outputs/checkpoint-18500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:37:29,463 >> Model weights saved in ./outputs/checkpoint-18500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:37:29,464 >> tokenizer config file saved in ./outputs/checkpoint-18500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:37:29,464 >> Special tokens file saved in ./outputs/checkpoint-18500/special_tokens_map.json\n", + "{'loss': 0.6921, 'learning_rate': 1.9235751295336786e-05, 'epoch': 3.08}\n", + " 62% 19000/30880 [1:05:12<1:07:11, 2.95it/s][INFO|trainer.py:1917] 2021-07-20 12:39:11,273 >> Saving model checkpoint to ./outputs/checkpoint-19000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:39:11,275 >> Configuration saved in ./outputs/checkpoint-19000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:39:12,714 >> Model weights saved in ./outputs/checkpoint-19000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:39:12,715 >> tokenizer config file saved in ./outputs/checkpoint-19000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:39:12,715 >> Special tokens file saved in ./outputs/checkpoint-19000/special_tokens_map.json\n", + "{'loss': 0.6897, 'learning_rate': 1.8426165803108808e-05, 'epoch': 3.16}\n", + " 63% 19500/30880 [1:06:55<1:03:44, 2.98it/s][INFO|trainer.py:1917] 2021-07-20 12:40:54,038 >> Saving model checkpoint to ./outputs/checkpoint-19500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:40:54,039 >> Configuration saved in ./outputs/checkpoint-19500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:40:55,517 >> Model weights saved in ./outputs/checkpoint-19500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:40:55,518 >> tokenizer config file saved in ./outputs/checkpoint-19500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:40:55,518 >> Special tokens file saved in ./outputs/checkpoint-19500/special_tokens_map.json\n", + "{'loss': 0.6946, 'learning_rate': 1.761658031088083e-05, 'epoch': 3.24}\n", + " 65% 20000/30880 [1:08:38<1:02:04, 2.92it/s][INFO|trainer.py:1917] 2021-07-20 12:42:36,995 >> Saving model checkpoint to ./outputs/checkpoint-20000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:42:36,996 >> Configuration saved in ./outputs/checkpoint-20000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:42:38,398 >> Model weights saved in ./outputs/checkpoint-20000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:42:38,424 >> tokenizer config file saved in ./outputs/checkpoint-20000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:42:38,425 >> Special tokens file saved in ./outputs/checkpoint-20000/special_tokens_map.json\n", + " 66% 20500/30880 [1:10:21<58:48, 2.94it/s]{'loss': 0.6898, 'learning_rate': 1.6806994818652848e-05, 'epoch': 3.32}\n", + " 66% 20500/30880 [1:10:21<58:48, 2.94it/s][INFO|trainer.py:1917] 2021-07-20 12:44:20,114 >> Saving model checkpoint to ./outputs/checkpoint-20500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:44:20,115 >> Configuration saved in ./outputs/checkpoint-20500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:44:21,566 >> Model weights saved in ./outputs/checkpoint-20500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:44:21,567 >> tokenizer config file saved in ./outputs/checkpoint-20500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:44:21,567 >> Special tokens file saved in ./outputs/checkpoint-20500/special_tokens_map.json\n", + " 68% 21000/30880 [1:12:04<54:48, 3.00it/s]{'loss': 0.6916, 'learning_rate': 1.5997409326424873e-05, 'epoch': 3.4}\n", + " 68% 21000/30880 [1:12:04<54:48, 3.00it/s][INFO|trainer.py:1917] 2021-07-20 12:46:03,028 >> Saving model checkpoint to ./outputs/checkpoint-21000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:46:03,029 >> Configuration saved in ./outputs/checkpoint-21000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:46:04,655 >> Model weights saved in ./outputs/checkpoint-21000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:46:04,656 >> tokenizer config file saved in ./outputs/checkpoint-21000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:46:04,656 >> Special tokens file saved in ./outputs/checkpoint-21000/special_tokens_map.json\n", + "{'loss': 0.6878, 'learning_rate': 1.5187823834196893e-05, 'epoch': 3.48}\n", + " 70% 21500/30880 [1:13:47<53:10, 2.94it/s][INFO|trainer.py:1917] 2021-07-20 12:47:46,437 >> Saving model checkpoint to ./outputs/checkpoint-21500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:47:46,438 >> Configuration saved in ./outputs/checkpoint-21500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:47:47,786 >> Model weights saved in ./outputs/checkpoint-21500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:47:47,787 >> tokenizer config file saved in ./outputs/checkpoint-21500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:47:47,787 >> Special tokens file saved in ./outputs/checkpoint-21500/special_tokens_map.json\n", + "{'loss': 0.6931, 'learning_rate': 1.4378238341968913e-05, 'epoch': 3.56}\n", + " 71% 22000/30880 [1:15:31<49:44, 2.98it/s][INFO|trainer.py:1917] 2021-07-20 12:49:30,292 >> Saving model checkpoint to ./outputs/checkpoint-22000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:49:30,293 >> Configuration saved in ./outputs/checkpoint-22000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:49:31,802 >> Model weights saved in ./outputs/checkpoint-22000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:49:31,803 >> tokenizer config file saved in ./outputs/checkpoint-22000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:49:31,803 >> Special tokens file saved in ./outputs/checkpoint-22000/special_tokens_map.json\n", + "{'loss': 0.6853, 'learning_rate': 1.3568652849740935e-05, 'epoch': 3.64}\n", + " 73% 22500/30880 [1:17:14<48:11, 2.90it/s][INFO|trainer.py:1917] 2021-07-20 12:51:13,628 >> Saving model checkpoint to ./outputs/checkpoint-22500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:51:13,629 >> Configuration saved in ./outputs/checkpoint-22500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:51:15,249 >> Model weights saved in ./outputs/checkpoint-22500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:51:15,249 >> tokenizer config file saved in ./outputs/checkpoint-22500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:51:15,250 >> Special tokens file saved in ./outputs/checkpoint-22500/special_tokens_map.json\n", + " 74% 23000/30880 [1:18:57<44:00, 2.98it/s]{'loss': 0.6911, 'learning_rate': 1.2759067357512955e-05, 'epoch': 3.72}\n", + " 74% 23000/30880 [1:18:57<44:00, 2.98it/s][INFO|trainer.py:1917] 2021-07-20 12:52:56,757 >> Saving model checkpoint to ./outputs/checkpoint-23000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:52:56,758 >> Configuration saved in ./outputs/checkpoint-23000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:52:58,313 >> Model weights saved in ./outputs/checkpoint-23000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:52:58,314 >> tokenizer config file saved in ./outputs/checkpoint-23000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:52:58,315 >> Special tokens file saved in ./outputs/checkpoint-23000/special_tokens_map.json\n", + " 76% 23500/30880 [1:20:41<42:09, 2.92it/s]{'loss': 0.6917, 'learning_rate': 1.1949481865284974e-05, 'epoch': 3.81}\n", + " 76% 23500/30880 [1:20:41<42:09, 2.92it/s][INFO|trainer.py:1917] 2021-07-20 12:54:40,110 >> Saving model checkpoint to ./outputs/checkpoint-23500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:54:40,112 >> Configuration saved in ./outputs/checkpoint-23500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:54:41,583 >> Model weights saved in ./outputs/checkpoint-23500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:54:41,584 >> tokenizer config file saved in ./outputs/checkpoint-23500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:54:41,585 >> Special tokens file saved in ./outputs/checkpoint-23500/special_tokens_map.json\n", + "{'loss': 0.6904, 'learning_rate': 1.1139896373056995e-05, 'epoch': 3.89}\n", + " 78% 24000/30880 [1:22:24<39:20, 2.91it/s][INFO|trainer.py:1917] 2021-07-20 12:56:23,489 >> Saving model checkpoint to ./outputs/checkpoint-24000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:56:23,491 >> Configuration saved in ./outputs/checkpoint-24000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:56:25,074 >> Model weights saved in ./outputs/checkpoint-24000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:56:25,075 >> tokenizer config file saved in ./outputs/checkpoint-24000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:56:25,076 >> Special tokens file saved in ./outputs/checkpoint-24000/special_tokens_map.json\n", + " 79% 24500/30880 [1:24:08<35:56, 2.96it/s]{'loss': 0.6884, 'learning_rate': 1.0330310880829017e-05, 'epoch': 3.97}\n", + "[INFO|trainer.py:1917] 2021-07-20 12:58:06,872 >> Saving model checkpoint to ./outputs/checkpoint-24500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:58:06,873 >> Configuration saved in ./outputs/checkpoint-24500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:58:08,346 >> Model weights saved in ./outputs/checkpoint-24500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:58:08,347 >> tokenizer config file saved in ./outputs/checkpoint-24500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:58:08,347 >> Special tokens file saved in ./outputs/checkpoint-24500/special_tokens_map.json\n", + " 81% 25000/30880 [1:25:51<33:26, 2.93it/s]{'loss': 0.6919, 'learning_rate': 9.520725388601037e-06, 'epoch': 4.05}\n", + " 81% 25000/30880 [1:25:51<33:26, 2.93it/s][INFO|trainer.py:1917] 2021-07-20 12:59:50,700 >> Saving model checkpoint to ./outputs/checkpoint-25000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 12:59:50,701 >> Configuration saved in ./outputs/checkpoint-25000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 12:59:52,196 >> Model weights saved in ./outputs/checkpoint-25000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 12:59:52,197 >> tokenizer config file saved in ./outputs/checkpoint-25000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 12:59:52,197 >> Special tokens file saved in ./outputs/checkpoint-25000/special_tokens_map.json\n", + " 83% 25500/30880 [1:27:35<30:17, 2.96it/s]{'loss': 0.6866, 'learning_rate': 8.711139896373057e-06, 'epoch': 4.13}\n", + "[INFO|trainer.py:1917] 2021-07-20 13:01:34,651 >> Saving model checkpoint to ./outputs/checkpoint-25500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:01:34,653 >> Configuration saved in ./outputs/checkpoint-25500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:01:36,166 >> Model weights saved in ./outputs/checkpoint-25500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:01:36,167 >> tokenizer config file saved in ./outputs/checkpoint-25500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:01:36,167 >> Special tokens file saved in ./outputs/checkpoint-25500/special_tokens_map.json\n", + " 84% 26000/30880 [1:29:19<27:31, 2.95it/s]{'loss': 0.6898, 'learning_rate': 7.901554404145079e-06, 'epoch': 4.21}\n", + " 84% 26000/30880 [1:29:19<27:31, 2.95it/s][INFO|trainer.py:1917] 2021-07-20 13:03:17,880 >> Saving model checkpoint to ./outputs/checkpoint-26000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:03:17,882 >> Configuration saved in ./outputs/checkpoint-26000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:03:19,302 >> Model weights saved in ./outputs/checkpoint-26000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:03:19,302 >> tokenizer config file saved in ./outputs/checkpoint-26000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:03:19,303 >> Special tokens file saved in ./outputs/checkpoint-26000/special_tokens_map.json\n", + " 86% 26500/30880 [1:31:02<25:17, 2.89it/s]{'loss': 0.6898, 'learning_rate': 7.091968911917099e-06, 'epoch': 4.29}\n", + " 86% 26500/30880 [1:31:02<25:17, 2.89it/s][INFO|trainer.py:1917] 2021-07-20 13:05:01,244 >> Saving model checkpoint to ./outputs/checkpoint-26500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:05:01,245 >> Configuration saved in ./outputs/checkpoint-26500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:05:02,779 >> Model weights saved in ./outputs/checkpoint-26500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:05:02,780 >> tokenizer config file saved in ./outputs/checkpoint-26500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:05:02,780 >> Special tokens file saved in ./outputs/checkpoint-26500/special_tokens_map.json\n", + " 87% 27000/30880 [1:32:45<21:55, 2.95it/s]{'loss': 0.6904, 'learning_rate': 6.282383419689119e-06, 'epoch': 4.37}\n", + "[INFO|trainer.py:1917] 2021-07-20 13:06:44,730 >> Saving model checkpoint to ./outputs/checkpoint-27000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:06:44,731 >> Configuration saved in ./outputs/checkpoint-27000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:06:46,145 >> Model weights saved in ./outputs/checkpoint-27000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:06:46,146 >> tokenizer config file saved in ./outputs/checkpoint-27000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:06:46,146 >> Special tokens file saved in ./outputs/checkpoint-27000/special_tokens_map.json\n", + "{'loss': 0.6882, 'learning_rate': 5.47279792746114e-06, 'epoch': 4.45}\n", + " 89% 27500/30880 [1:34:29<19:17, 2.92it/s][INFO|trainer.py:1917] 2021-07-20 13:08:28,182 >> Saving model checkpoint to ./outputs/checkpoint-27500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:08:28,183 >> Configuration saved in ./outputs/checkpoint-27500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:08:29,645 >> Model weights saved in ./outputs/checkpoint-27500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:08:29,646 >> tokenizer config file saved in ./outputs/checkpoint-27500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:08:29,646 >> Special tokens file saved in ./outputs/checkpoint-27500/special_tokens_map.json\n", + " 91% 28000/30880 [1:36:12<16:12, 2.96it/s]{'loss': 0.6899, 'learning_rate': 4.663212435233161e-06, 'epoch': 4.53}\n", + "[INFO|trainer.py:1917] 2021-07-20 13:10:11,478 >> Saving model checkpoint to ./outputs/checkpoint-28000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:10:11,479 >> Configuration saved in ./outputs/checkpoint-28000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:10:12,808 >> Model weights saved in ./outputs/checkpoint-28000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:10:12,809 >> tokenizer config file saved in ./outputs/checkpoint-28000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:10:12,809 >> Special tokens file saved in ./outputs/checkpoint-28000/special_tokens_map.json\n", + " 92% 28500/30880 [1:37:56<13:24, 2.96it/s]{'loss': 0.6897, 'learning_rate': 3.853626943005181e-06, 'epoch': 4.61}\n", + "[INFO|trainer.py:1917] 2021-07-20 13:11:55,543 >> Saving model checkpoint to ./outputs/checkpoint-28500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:11:55,544 >> Configuration saved in ./outputs/checkpoint-28500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:11:57,071 >> Model weights saved in ./outputs/checkpoint-28500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:11:57,134 >> tokenizer config file saved in ./outputs/checkpoint-28500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:11:57,134 >> Special tokens file saved in ./outputs/checkpoint-28500/special_tokens_map.json\n", + " 94% 29000/30880 [1:39:40<10:30, 2.98it/s]{'loss': 0.69, 'learning_rate': 3.044041450777202e-06, 'epoch': 4.7}\n", + "[INFO|trainer.py:1917] 2021-07-20 13:13:39,086 >> Saving model checkpoint to ./outputs/checkpoint-29000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:13:39,087 >> Configuration saved in ./outputs/checkpoint-29000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:13:40,501 >> Model weights saved in ./outputs/checkpoint-29000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:13:40,505 >> tokenizer config file saved in ./outputs/checkpoint-29000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:13:40,505 >> Special tokens file saved in ./outputs/checkpoint-29000/special_tokens_map.json\n", + " 96% 29500/30880 [1:41:23<07:49, 2.94it/s]{'loss': 0.687, 'learning_rate': 2.234455958549223e-06, 'epoch': 4.78}\n", + "[INFO|trainer.py:1917] 2021-07-20 13:15:22,276 >> Saving model checkpoint to ./outputs/checkpoint-29500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:15:22,282 >> Configuration saved in ./outputs/checkpoint-29500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:15:23,660 >> Model weights saved in ./outputs/checkpoint-29500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:15:23,661 >> tokenizer config file saved in ./outputs/checkpoint-29500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:15:23,661 >> Special tokens file saved in ./outputs/checkpoint-29500/special_tokens_map.json\n", + " 97% 30000/30880 [1:43:07<05:01, 2.92it/s]{'loss': 0.6893, 'learning_rate': 1.4248704663212437e-06, 'epoch': 4.86}\n", + "[INFO|trainer.py:1917] 2021-07-20 13:17:06,113 >> Saving model checkpoint to ./outputs/checkpoint-30000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:17:06,114 >> Configuration saved in ./outputs/checkpoint-30000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:17:07,719 >> Model weights saved in ./outputs/checkpoint-30000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:17:07,720 >> tokenizer config file saved in ./outputs/checkpoint-30000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:17:07,721 >> Special tokens file saved in ./outputs/checkpoint-30000/special_tokens_map.json\n", + " 99% 30500/30880 [1:44:50<02:08, 2.96it/s]{'loss': 0.6909, 'learning_rate': 6.152849740932643e-07, 'epoch': 4.94}\n", + " 99% 30500/30880 [1:44:50<02:08, 2.96it/s][INFO|trainer.py:1917] 2021-07-20 13:18:49,734 >> Saving model checkpoint to ./outputs/checkpoint-30500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:18:49,735 >> Configuration saved in ./outputs/checkpoint-30500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:18:51,079 >> Model weights saved in ./outputs/checkpoint-30500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:18:51,079 >> tokenizer config file saved in ./outputs/checkpoint-30500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:18:51,080 >> Special tokens file saved in ./outputs/checkpoint-30500/special_tokens_map.json\n", + "100% 30880/30880 [1:46:10<00:00, 5.89it/s][INFO|trainer.py:1358] 2021-07-20 13:20:09,340 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "{'train_runtime': 6373.0159, 'train_samples_per_second': 38.758, 'train_steps_per_second': 4.845, 'train_loss': 0.6934825669916183, 'epoch': 5.0}\n", + "100% 30880/30880 [1:46:10<00:00, 4.85it/s]\n", + "[INFO|trainer.py:1917] 2021-07-20 13:20:09,352 >> Saving model checkpoint to ./outputs\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:20:09,353 >> Configuration saved in ./outputs/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:20:11,595 >> Model weights saved in ./outputs/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:20:11,596 >> tokenizer config file saved in ./outputs/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:20:11,596 >> Special tokens file saved in ./outputs/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 5.0\n", + " train_loss = 0.6935\n", + " train_runtime = 1:46:13.01\n", + " train_samples = 49401\n", + " train_samples_per_second = 38.758\n", + " train_steps_per_second = 4.845\n", + "07/20/2021 13:20:11 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:522] 2021-07-20 13:20:11,706 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: sentence2, id, sentence1.\n", + "[INFO|trainer.py:2163] 2021-07-20 13:20:11,715 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2165] 2021-07-20 13:20:11,715 >> Num examples = 2000\n", + "[INFO|trainer.py:2168] 2021-07-20 13:20:11,715 >> Batch size = 8\n", + " 99% 248/250 [00:11<00:00, 20.55it/s]***** eval metrics *****\n", + "100% 250/250 [00:12<00:00, 20.81it/s] epoch = 5.0\n", + " eval_accuracy = 0.5765\n", + " eval_loss = 0.6818\n", + "\n", + " eval_runtime = 0:00:12.07\n", + " eval_samples = 2000\n", + " eval_samples_per_second = 165.685\n", + " eval_steps_per_second = 20.711\n", + "\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish, PID 1591\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Program ended successfully.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find user logs for this run at: /content/wandb/run-20210720_113356-27wn690d/logs/debug.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find internal logs for this run at: /content/wandb/run-20210720_113356-27wn690d/logs/debug-internal.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.6909\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 30880\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime 6387\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp 1626787223\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step 62\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime 6373.0159\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second 38.758\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second 4.845\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos 1.62474365572992e+16\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss 0.69348\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 0.68176\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy 0.5765\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 12.0711\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 165.685\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 20.711\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▇▅▆▅▅▄▅▅▄▂▃▄▂▄▄▃▃▄▃▄▂▃▁▄▂▂▃▃▁▃▃▃▂▂▃▂▂▂▃\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate ████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced \u001b[33m./outputs\u001b[0m: \u001b[34mhttps://wandb.ai/versae/bertin-eval/runs/27wn690d\u001b[0m\n", + "2021-07-20 13:20:34.224882: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "07/20/2021 13:20:36 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "07/20/2021 13:20:36 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_find_unused_parameters=None,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=False,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_steps=500,\n", + "evaluation_strategy=IntervalStrategy.NO,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "gradient_accumulation_steps=1,\n", + "greater_is_better=None,\n", + "group_by_length=False,\n", + "ignore_data_skip=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=5e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=False,\n", + "local_rank=-1,\n", + "log_level=-1,\n", + "log_level_replica=-1,\n", + "log_on_each_node=True,\n", + "logging_dir=./outputs/runs/Jul20_13-20-36_48f3f265b421,\n", + "logging_first_step=False,\n", + "logging_steps=500,\n", + "logging_strategy=IntervalStrategy.STEPS,\n", + "lr_scheduler_type=SchedulerType.LINEAR,\n", + "max_grad_norm=1.0,\n", + "max_steps=-1,\n", + "metric_for_best_model=None,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=5.0,\n", + "output_dir=./outputs,\n", + "overwrite_output_dir=True,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=outputs,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=None,\n", + "remove_unused_columns=True,\n", + "report_to=['tensorboard', 'wandb'],\n", + "resume_from_checkpoint=None,\n", + "run_name=./outputs,\n", + "save_on_each_node=False,\n", + "save_steps=500,\n", + "save_strategy=IntervalStrategy.STEPS,\n", + "save_total_limit=None,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_legacy_prediction_loop=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + ")\n", + "07/20/2021 13:20:37 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py at /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x\n", + "07/20/2021 13:20:37 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py at /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af\n", + "07/20/2021 13:20:37 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py to /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/paws-x.py\n", + "07/20/2021 13:20:37 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/dataset_infos.json\n", + "07/20/2021 13:20:37 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py at /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/paws-x.json\n", + "07/20/2021 13:20:37 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py at /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x\n", + "07/20/2021 13:20:37 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py at /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af\n", + "07/20/2021 13:20:37 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py to /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/paws-x.py\n", + "07/20/2021 13:20:37 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/dataset_infos.json\n", + "07/20/2021 13:20:37 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py at /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/paws-x.json\n", + "07/20/2021 13:20:37 - INFO - datasets.info - Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af\n", + "07/20/2021 13:20:37 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "07/20/2021 13:20:37 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/pawsx/es/1.1.0/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af\n", + "07/20/2021 13:20:37 - WARNING - datasets.builder - Reusing dataset pawsx (/root/.cache/huggingface/datasets/pawsx/es/1.1.0/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af)\n", + "07/20/2021 13:20:37 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/pawsx/es/1.1.0/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af\n", + "100% 3/3 [00:00<00:00, 26.02it/s]\n", + "[INFO|configuration_utils.py:545] 2021-07-20 13:20:38,216 >> loading configuration file https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/cbd56d68ce5dcd2626aba4c4b188db63f2ba2c49a604b36e7cdc6e52578ee306.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|configuration_utils.py:581] 2021-07-20 13:20:38,217 >> Model config RobertaConfig {\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"bos_token_id\": 0,\n", + " \"eos_token_id\": 2,\n", + " \"finetuning_task\": \"paws-x\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50265\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-20 13:20:40,662 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/dae6454603d0b1d10a2446ffc1a21ccd636b0ca6a4c77a79fb9dfde03f4a51b8.a80f232f572026f92499b14999a8ed4e044e04cf3d01b9f2be298c98e78e8498\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-20 13:20:40,662 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/2e77e6f778d3fd8875349675d408521ca20c1f1acac2fd57d60ca945d82b926e.a0dfc41f9d0f03a56ba7a5401d770f6e43071045a0bd79073380d408d17a0d92\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-20 13:20:40,662 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/98a6808b3aa08b6d84e8b30dfa6892d15e9e631eebff8652b37ab29d75a0b98a.2a5dc806edc00ab3a329cb22b9973596ca75b24ba0e5e4963bf1308de7237a3d\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-20 13:20:40,662 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-20 13:20:40,662 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/1f5a4bde3e85f3c7b914d0e6b43b2f72d6b3b2f9ddbec7be9e4b0521a429f67f.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-20 13:20:40,662 >> loading file https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/7a419a17bf4372869932365630632f434d402e93dd7e609e73607cf71ec1bdf7.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|modeling_utils.py:1271] 2021-07-20 13:20:41,084 >> loading weights file https://huggingface.co/bertin-project/bertin-base-gaussian/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/d61c66f6163d7933bcffb5de3a666094c2c7c8d54145ec0cea640f72204427e0.50b5552cf09535e0a4b85cc39c83be233674d4cab0836dd8fedc97aa778c802c\n", + "[WARNING|modeling_utils.py:1502] 2021-07-20 13:20:48,909 >> Some weights of the model checkpoint at bertin-project/bertin-base-gaussian were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.bias']\n", + "- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:1513] 2021-07-20 13:20:48,910 >> Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at bertin-project/bertin-base-gaussian and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "07/20/2021 13:20:48 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /root/.cache/huggingface/datasets/pawsx/es/1.1.0/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/cache-54fbfa10643bca55.arrow\n", + "07/20/2021 13:20:49 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /root/.cache/huggingface/datasets/pawsx/es/1.1.0/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/cache-68c99004674c0acc.arrow\n", + "Running tokenizer on dataset: 0% 0/2 [00:00> The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: sentence2, id, sentence1.\n", + "[INFO|trainer.py:1162] 2021-07-20 13:20:54,594 >> ***** Running training *****\n", + "[INFO|trainer.py:1163] 2021-07-20 13:20:54,594 >> Num examples = 49401\n", + "[INFO|trainer.py:1164] 2021-07-20 13:20:54,595 >> Num Epochs = 5\n", + "[INFO|trainer.py:1165] 2021-07-20 13:20:54,595 >> Instantaneous batch size per device = 8\n", + "[INFO|trainer.py:1166] 2021-07-20 13:20:54,595 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "[INFO|trainer.py:1167] 2021-07-20 13:20:54,595 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1168] 2021-07-20 13:20:54,595 >> Total optimization steps = 30880\n", + "[INFO|integrations.py:446] 2021-07-20 13:20:54,613 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mversae\u001b[0m (use `wandb login --relogin` to force relogin)\n", + "2021-07-20 13:20:56.005653: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m./outputs\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval/runs/20701jiq\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in /content/wandb/run-20210720_132054-20701jiq\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run `wandb offline` to turn off syncing.\n", + "\n", + " 2% 500/30880 [01:38<2:48:30, 3.00it/s]{'loss': 0.6445, 'learning_rate': 4.919041450777203e-05, 'epoch': 0.08}\n", + " 2% 500/30880 [01:38<2:48:30, 3.00it/s][INFO|trainer.py:1917] 2021-07-20 13:22:35,366 >> Saving model checkpoint to ./outputs/checkpoint-500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:22:35,367 >> Configuration saved in ./outputs/checkpoint-500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:22:36,740 >> Model weights saved in ./outputs/checkpoint-500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:22:36,740 >> tokenizer config file saved in ./outputs/checkpoint-500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:22:36,741 >> Special tokens file saved in ./outputs/checkpoint-500/special_tokens_map.json\n", + " {'loss': 0.4639, 'learning_rate': 4.8380829015544046e-05, 'epoch': 0.16}\n", + " 3% 1000/30880 [03:21<2:47:36, 2.97it/s][INFO|trainer.py:1917] 2021-07-20 13:24:19,145 >> Saving model checkpoint to ./outputs/checkpoint-1000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:24:19,146 >> Configuration saved in ./outputs/checkpoint-1000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:24:20,479 >> Model weights saved in ./outputs/checkpoint-1000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:24:20,480 >> tokenizer config file saved in ./outputs/checkpoint-1000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:24:20,480 >> Special tokens file saved in ./outputs/checkpoint-1000/special_tokens_map.json\n", + "{'loss': 0.4439, 'learning_rate': 4.7571243523316064e-05, 'epoch': 0.24}\n", + " 5% 1500/30880 [05:05<2:42:55, 3.01it/s][INFO|trainer.py:1917] 2021-07-20 13:26:02,950 >> Saving model checkpoint to ./outputs/checkpoint-1500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:26:02,951 >> Configuration saved in ./outputs/checkpoint-1500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:26:04,453 >> Model weights saved in ./outputs/checkpoint-1500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:26:04,454 >> tokenizer config file saved in ./outputs/checkpoint-1500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:26:04,454 >> Special tokens file saved in ./outputs/checkpoint-1500/special_tokens_map.json\n", + "{'loss': 0.4076, 'learning_rate': 4.676165803108808e-05, 'epoch': 0.32}\n", + " 6% 2000/30880 [06:50<2:42:07, 2.97it/s][INFO|trainer.py:1917] 2021-07-20 13:27:47,569 >> Saving model checkpoint to ./outputs/checkpoint-2000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:27:47,571 >> Configuration saved in ./outputs/checkpoint-2000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:27:49,056 >> Model weights saved in ./outputs/checkpoint-2000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:27:49,057 >> tokenizer config file saved in ./outputs/checkpoint-2000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:27:49,057 >> Special tokens file saved in ./outputs/checkpoint-2000/special_tokens_map.json\n", + "{'loss': 0.4025, 'learning_rate': 4.595207253886011e-05, 'epoch': 0.4}\n", + " 8% 2500/30880 [08:34<2:42:19, 2.91it/s][INFO|trainer.py:1917] 2021-07-20 13:29:31,608 >> Saving model checkpoint to ./outputs/checkpoint-2500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:29:31,609 >> Configuration saved in ./outputs/checkpoint-2500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:29:32,947 >> Model weights saved in ./outputs/checkpoint-2500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:29:32,948 >> tokenizer config file saved in ./outputs/checkpoint-2500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:29:32,948 >> Special tokens file saved in ./outputs/checkpoint-2500/special_tokens_map.json\n", + "{'loss': 0.397, 'learning_rate': 4.5142487046632126e-05, 'epoch': 0.49}\n", + " 10% 3000/30880 [10:18<2:57:21, 2.62it/s][INFO|trainer.py:1917] 2021-07-20 13:31:15,457 >> Saving model checkpoint to ./outputs/checkpoint-3000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:31:15,459 >> Configuration saved in ./outputs/checkpoint-3000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:31:17,087 >> Model weights saved in ./outputs/checkpoint-3000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:31:17,087 >> tokenizer config file saved in ./outputs/checkpoint-3000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:31:17,088 >> Special tokens file saved in ./outputs/checkpoint-3000/special_tokens_map.json\n", + "{'loss': 0.3652, 'learning_rate': 4.433290155440415e-05, 'epoch': 0.57}\n", + " 11% 3500/30880 [12:02<2:30:37, 3.03it/s][INFO|trainer.py:1917] 2021-07-20 13:32:59,762 >> Saving model checkpoint to ./outputs/checkpoint-3500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:32:59,764 >> Configuration saved in ./outputs/checkpoint-3500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:33:01,187 >> Model weights saved in ./outputs/checkpoint-3500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:33:01,188 >> tokenizer config file saved in ./outputs/checkpoint-3500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:33:01,188 >> Special tokens file saved in ./outputs/checkpoint-3500/special_tokens_map.json\n", + " 13% 4000/30880 [13:46<2:27:43, 3.03it/s]{'loss': 0.3801, 'learning_rate': 4.352331606217617e-05, 'epoch': 0.65}\n", + "[INFO|trainer.py:1917] 2021-07-20 13:34:43,538 >> Saving model checkpoint to ./outputs/checkpoint-4000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:34:43,539 >> Configuration saved in ./outputs/checkpoint-4000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:34:44,988 >> Model weights saved in ./outputs/checkpoint-4000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:34:45,015 >> tokenizer config file saved in ./outputs/checkpoint-4000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:34:45,015 >> Special tokens file saved in ./outputs/checkpoint-4000/special_tokens_map.json\n", + " 15% 4500/30880 [15:29<2:29:33, 2.94it/s][INFO|trainer.py:1917] 2021-07-20 13:36:26,541 >> Saving model checkpoint to ./outputs/checkpoint-4500\n", + "{'loss': 0.3775, 'learning_rate': 4.271373056994819e-05, 'epoch': 0.73}\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:36:26,543 >> Configuration saved in ./outputs/checkpoint-4500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:36:28,160 >> Model weights saved in ./outputs/checkpoint-4500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:36:28,161 >> tokenizer config file saved in ./outputs/checkpoint-4500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:36:28,161 >> Special tokens file saved in ./outputs/checkpoint-4500/special_tokens_map.json\n", + "{'loss': 0.3487, 'learning_rate': 4.190414507772021e-05, 'epoch': 0.81}\n", + " 16% 5000/30880 [17:13<2:26:17, 2.95it/s][INFO|trainer.py:1917] 2021-07-20 13:38:10,599 >> Saving model checkpoint to ./outputs/checkpoint-5000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:38:10,600 >> Configuration saved in ./outputs/checkpoint-5000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:38:12,020 >> Model weights saved in ./outputs/checkpoint-5000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:38:12,021 >> tokenizer config file saved in ./outputs/checkpoint-5000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:38:12,021 >> Special tokens file saved in ./outputs/checkpoint-5000/special_tokens_map.json\n", + " 18% 5500/30880 [18:56<2:21:05, 3.00it/s]{'loss': 0.352, 'learning_rate': 4.109455958549223e-05, 'epoch': 0.89}\n", + "[INFO|trainer.py:1917] 2021-07-20 13:39:54,062 >> Saving model checkpoint to ./outputs/checkpoint-5500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:39:54,068 >> Configuration saved in ./outputs/checkpoint-5500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:39:55,470 >> Model weights saved in ./outputs/checkpoint-5500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:39:55,471 >> tokenizer config file saved in ./outputs/checkpoint-5500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:39:55,471 >> Special tokens file saved in ./outputs/checkpoint-5500/special_tokens_map.json\n", + " 19% 6000/30880 [20:40<2:18:54, 2.99it/s]{'loss': 0.3634, 'learning_rate': 4.028497409326425e-05, 'epoch': 0.97}\n", + " 19% 6000/30880 [20:40<2:18:54, 2.99it/s][INFO|trainer.py:1917] 2021-07-20 13:41:37,622 >> Saving model checkpoint to ./outputs/checkpoint-6000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:41:37,624 >> Configuration saved in ./outputs/checkpoint-6000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:41:39,087 >> Model weights saved in ./outputs/checkpoint-6000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:41:39,088 >> tokenizer config file saved in ./outputs/checkpoint-6000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:41:39,088 >> Special tokens file saved in ./outputs/checkpoint-6000/special_tokens_map.json\n", + "{'loss': 0.3221, 'learning_rate': 3.9475388601036275e-05, 'epoch': 1.05}\n", + " 21% 6500/30880 [22:23<2:17:55, 2.95it/s][INFO|trainer.py:1917] 2021-07-20 13:43:20,501 >> Saving model checkpoint to ./outputs/checkpoint-6500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:43:20,502 >> Configuration saved in ./outputs/checkpoint-6500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:43:22,034 >> Model weights saved in ./outputs/checkpoint-6500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:43:22,035 >> tokenizer config file saved in ./outputs/checkpoint-6500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:43:22,035 >> Special tokens file saved in ./outputs/checkpoint-6500/special_tokens_map.json\n", + " 23% 7000/30880 [24:06<2:12:01, 3.01it/s]{'loss': 0.3012, 'learning_rate': 3.8665803108808294e-05, 'epoch': 1.13}\n", + " 23% 7000/30880 [24:06<2:12:01, 3.01it/s][INFO|trainer.py:1917] 2021-07-20 13:45:03,740 >> Saving model checkpoint to ./outputs/checkpoint-7000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:45:03,746 >> Configuration saved in ./outputs/checkpoint-7000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:45:05,234 >> Model weights saved in ./outputs/checkpoint-7000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:45:05,235 >> tokenizer config file saved in ./outputs/checkpoint-7000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:45:05,235 >> Special tokens file saved in ./outputs/checkpoint-7000/special_tokens_map.json\n", + " 24% 7500/30880 [25:49<2:10:29, 2.99it/s][INFO|trainer.py:1917] 2021-07-20 13:46:46,770 >> Saving model checkpoint to ./outputs/checkpoint-7500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:46:46,771 >> Configuration saved in ./outputs/checkpoint-7500/config.json\n", + "{'loss': 0.2896, 'learning_rate': 3.785621761658031e-05, 'epoch': 1.21}\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:46:48,276 >> Model weights saved in ./outputs/checkpoint-7500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:46:48,277 >> tokenizer config file saved in ./outputs/checkpoint-7500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:46:48,277 >> Special tokens file saved in ./outputs/checkpoint-7500/special_tokens_map.json\n", + " 26% 8000/30880 [27:32<2:07:45, 2.98it/s]{'loss': 0.293, 'learning_rate': 3.704663212435233e-05, 'epoch': 1.3}\n", + "[INFO|trainer.py:1917] 2021-07-20 13:48:29,524 >> Saving model checkpoint to ./outputs/checkpoint-8000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:48:29,525 >> Configuration saved in ./outputs/checkpoint-8000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:48:30,956 >> Model weights saved in ./outputs/checkpoint-8000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:48:30,957 >> tokenizer config file saved in ./outputs/checkpoint-8000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:48:30,957 >> Special tokens file saved in ./outputs/checkpoint-8000/special_tokens_map.json\n", + "{'loss': 0.3197, 'learning_rate': 3.6237046632124356e-05, 'epoch': 1.38}\n", + " 28% 8500/30880 [29:15<2:07:02, 2.94it/s][INFO|trainer.py:1917] 2021-07-20 13:50:12,950 >> Saving model checkpoint to ./outputs/checkpoint-8500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:50:12,951 >> Configuration saved in ./outputs/checkpoint-8500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:50:14,392 >> Model weights saved in ./outputs/checkpoint-8500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:50:14,420 >> tokenizer config file saved in ./outputs/checkpoint-8500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:50:14,421 >> Special tokens file saved in ./outputs/checkpoint-8500/special_tokens_map.json\n", + "{'loss': 0.2864, 'learning_rate': 3.5427461139896374e-05, 'epoch': 1.46}\n", + " 29% 9000/30880 [30:58<1:59:32, 3.05it/s][INFO|trainer.py:1917] 2021-07-20 13:51:56,132 >> Saving model checkpoint to ./outputs/checkpoint-9000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:51:56,133 >> Configuration saved in ./outputs/checkpoint-9000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:51:57,657 >> Model weights saved in ./outputs/checkpoint-9000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:51:57,658 >> tokenizer config file saved in ./outputs/checkpoint-9000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:51:57,658 >> Special tokens file saved in ./outputs/checkpoint-9000/special_tokens_map.json\n", + "{'loss': 0.2849, 'learning_rate': 3.46178756476684e-05, 'epoch': 1.54}\n", + " 31% 9500/30880 [32:41<1:58:07, 3.02it/s][INFO|trainer.py:1917] 2021-07-20 13:53:38,772 >> Saving model checkpoint to ./outputs/checkpoint-9500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:53:38,773 >> Configuration saved in ./outputs/checkpoint-9500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:53:40,184 >> Model weights saved in ./outputs/checkpoint-9500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:53:40,184 >> tokenizer config file saved in ./outputs/checkpoint-9500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:53:40,185 >> Special tokens file saved in ./outputs/checkpoint-9500/special_tokens_map.json\n", + "{'loss': 0.3262, 'learning_rate': 3.380829015544041e-05, 'epoch': 1.62}\n", + " 32% 10000/30880 [34:25<1:56:35, 2.98it/s][INFO|trainer.py:1917] 2021-07-20 13:55:22,359 >> Saving model checkpoint to ./outputs/checkpoint-10000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:55:22,360 >> Configuration saved in ./outputs/checkpoint-10000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:55:23,768 >> Model weights saved in ./outputs/checkpoint-10000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:55:23,769 >> tokenizer config file saved in ./outputs/checkpoint-10000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:55:23,770 >> Special tokens file saved in ./outputs/checkpoint-10000/special_tokens_map.json\n", + "{'loss': 0.3097, 'learning_rate': 3.2998704663212436e-05, 'epoch': 1.7}\n", + " 34% 10500/30880 [36:07<1:52:23, 3.02it/s][INFO|trainer.py:1917] 2021-07-20 13:57:04,709 >> Saving model checkpoint to ./outputs/checkpoint-10500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:57:04,710 >> Configuration saved in ./outputs/checkpoint-10500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:57:06,218 >> Model weights saved in ./outputs/checkpoint-10500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:57:06,219 >> tokenizer config file saved in ./outputs/checkpoint-10500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:57:06,219 >> Special tokens file saved in ./outputs/checkpoint-10500/special_tokens_map.json\n", + "{'loss': 0.3176, 'learning_rate': 3.2189119170984454e-05, 'epoch': 1.78}\n", + " 36% 11000/30880 [37:51<1:50:40, 2.99it/s][INFO|trainer.py:1917] 2021-07-20 13:58:48,236 >> Saving model checkpoint to ./outputs/checkpoint-11000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 13:58:48,237 >> Configuration saved in ./outputs/checkpoint-11000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 13:58:49,696 >> Model weights saved in ./outputs/checkpoint-11000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 13:58:49,697 >> tokenizer config file saved in ./outputs/checkpoint-11000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 13:58:49,697 >> Special tokens file saved in ./outputs/checkpoint-11000/special_tokens_map.json\n", + " 37% 11500/30880 [39:34<1:45:08, 3.07it/s]{'loss': 0.3081, 'learning_rate': 3.137953367875648e-05, 'epoch': 1.86}\n", + " 37% 11500/30880 [39:34<1:45:08, 3.07it/s][INFO|trainer.py:1917] 2021-07-20 14:00:31,780 >> Saving model checkpoint to ./outputs/checkpoint-11500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:00:31,781 >> Configuration saved in ./outputs/checkpoint-11500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:00:33,282 >> Model weights saved in ./outputs/checkpoint-11500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:00:33,283 >> tokenizer config file saved in ./outputs/checkpoint-11500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:00:33,283 >> Special tokens file saved in ./outputs/checkpoint-11500/special_tokens_map.json\n", + "{'loss': 0.3089, 'learning_rate': 3.05699481865285e-05, 'epoch': 1.94}\n", + " 39% 12000/30880 [41:17<1:45:38, 2.98it/s][INFO|trainer.py:1917] 2021-07-20 14:02:14,754 >> Saving model checkpoint to ./outputs/checkpoint-12000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:02:14,756 >> Configuration saved in ./outputs/checkpoint-12000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:02:16,320 >> Model weights saved in ./outputs/checkpoint-12000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:02:16,321 >> tokenizer config file saved in ./outputs/checkpoint-12000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:02:16,322 >> Special tokens file saved in ./outputs/checkpoint-12000/special_tokens_map.json\n", + " 40% 12500/30880 [43:00<1:40:30, 3.05it/s]{'loss': 0.3404, 'learning_rate': 2.976036269430052e-05, 'epoch': 2.02}\n", + " 40% 12500/30880 [43:00<1:40:30, 3.05it/s][INFO|trainer.py:1917] 2021-07-20 14:03:57,999 >> Saving model checkpoint to ./outputs/checkpoint-12500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:03:58,000 >> Configuration saved in ./outputs/checkpoint-12500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:03:59,481 >> Model weights saved in ./outputs/checkpoint-12500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:03:59,481 >> tokenizer config file saved in ./outputs/checkpoint-12500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:03:59,482 >> Special tokens file saved in ./outputs/checkpoint-12500/special_tokens_map.json\n", + " 42% 13000/30880 [44:44<1:38:16, 3.03it/s]{'loss': 0.2694, 'learning_rate': 2.8950777202072538e-05, 'epoch': 2.1}\n", + " 42% 13000/30880 [44:44<1:38:16, 3.03it/s][INFO|trainer.py:1917] 2021-07-20 14:05:41,200 >> Saving model checkpoint to ./outputs/checkpoint-13000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:05:41,202 >> Configuration saved in ./outputs/checkpoint-13000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:05:42,662 >> Model weights saved in ./outputs/checkpoint-13000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:05:42,663 >> tokenizer config file saved in ./outputs/checkpoint-13000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:05:42,663 >> Special tokens file saved in ./outputs/checkpoint-13000/special_tokens_map.json\n", + " 44% 13500/30880 [46:26<1:36:37, 3.00it/s]{'loss': 0.2582, 'learning_rate': 2.814119170984456e-05, 'epoch': 2.19}\n", + "[INFO|trainer.py:1917] 2021-07-20 14:07:23,996 >> Saving model checkpoint to ./outputs/checkpoint-13500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:07:23,997 >> Configuration saved in ./outputs/checkpoint-13500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:07:25,507 >> Model weights saved in ./outputs/checkpoint-13500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:07:25,508 >> tokenizer config file saved in ./outputs/checkpoint-13500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:07:25,508 >> Special tokens file saved in ./outputs/checkpoint-13500/special_tokens_map.json\n", + " 45% 14000/30880 [48:09<1:32:30, 3.04it/s]{'loss': 0.2567, 'learning_rate': 2.7331606217616585e-05, 'epoch': 2.27}\n", + "[INFO|trainer.py:1917] 2021-07-20 14:09:06,466 >> Saving model checkpoint to ./outputs/checkpoint-14000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:09:06,468 >> Configuration saved in ./outputs/checkpoint-14000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:09:07,854 >> Model weights saved in ./outputs/checkpoint-14000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:09:07,854 >> tokenizer config file saved in ./outputs/checkpoint-14000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:09:07,912 >> Special tokens file saved in ./outputs/checkpoint-14000/special_tokens_map.json\n", + " 47% 14500/30880 [49:52<1:31:47, 2.97it/s]{'loss': 0.2521, 'learning_rate': 2.6522020725388604e-05, 'epoch': 2.35}\n", + "[INFO|trainer.py:1917] 2021-07-20 14:10:49,314 >> Saving model checkpoint to ./outputs/checkpoint-14500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:10:49,315 >> Configuration saved in ./outputs/checkpoint-14500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:10:50,881 >> Model weights saved in ./outputs/checkpoint-14500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:10:50,882 >> tokenizer config file saved in ./outputs/checkpoint-14500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:10:50,882 >> Special tokens file saved in ./outputs/checkpoint-14500/special_tokens_map.json\n", + " 49% 15000/30880 [51:35<1:27:25, 3.03it/s][INFO|trainer.py:1917] 2021-07-20 14:12:32,636 >> Saving model checkpoint to ./outputs/checkpoint-15000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:12:32,638 >> Configuration saved in ./outputs/checkpoint-15000/config.json\n", + "{'loss': 0.2523, 'learning_rate': 2.5712435233160625e-05, 'epoch': 2.43}\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:12:34,018 >> Model weights saved in ./outputs/checkpoint-15000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:12:34,019 >> tokenizer config file saved in ./outputs/checkpoint-15000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:12:34,019 >> Special tokens file saved in ./outputs/checkpoint-15000/special_tokens_map.json\n", + "{'loss': 0.2615, 'learning_rate': 2.4902849740932644e-05, 'epoch': 2.51}\n", + " 50% 15500/30880 [53:17<1:23:48, 3.06it/s][INFO|trainer.py:1917] 2021-07-20 14:14:15,110 >> Saving model checkpoint to ./outputs/checkpoint-15500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:14:15,111 >> Configuration saved in ./outputs/checkpoint-15500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:14:16,610 >> Model weights saved in ./outputs/checkpoint-15500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:14:16,611 >> tokenizer config file saved in ./outputs/checkpoint-15500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:14:16,611 >> Special tokens file saved in ./outputs/checkpoint-15500/special_tokens_map.json\n", + "{'loss': 0.2558, 'learning_rate': 2.4093264248704665e-05, 'epoch': 2.59}\n", + " 52% 16000/30880 [55:00<1:22:31, 3.01it/s][INFO|trainer.py:1917] 2021-07-20 14:15:58,112 >> Saving model checkpoint to ./outputs/checkpoint-16000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:15:58,113 >> Configuration saved in ./outputs/checkpoint-16000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:15:59,661 >> Model weights saved in ./outputs/checkpoint-16000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:15:59,661 >> tokenizer config file saved in ./outputs/checkpoint-16000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:15:59,662 >> Special tokens file saved in ./outputs/checkpoint-16000/special_tokens_map.json\n", + " 53% 16500/30880 [56:43<1:16:50, 3.12it/s]{'loss': 0.2322, 'learning_rate': 2.3283678756476684e-05, 'epoch': 2.67}\n", + "[INFO|trainer.py:1917] 2021-07-20 14:17:40,627 >> Saving model checkpoint to ./outputs/checkpoint-16500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:17:40,628 >> Configuration saved in ./outputs/checkpoint-16500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:17:42,033 >> Model weights saved in ./outputs/checkpoint-16500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:17:42,034 >> tokenizer config file saved in ./outputs/checkpoint-16500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:17:42,034 >> Special tokens file saved in ./outputs/checkpoint-16500/special_tokens_map.json\n", + "{'loss': 0.2407, 'learning_rate': 2.2474093264248706e-05, 'epoch': 2.75}\n", + " 55% 17000/30880 [58:25<1:16:33, 3.02it/s][INFO|trainer.py:1917] 2021-07-20 14:19:22,910 >> Saving model checkpoint to ./outputs/checkpoint-17000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:19:22,911 >> Configuration saved in ./outputs/checkpoint-17000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:19:24,335 >> Model weights saved in ./outputs/checkpoint-17000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:19:24,336 >> tokenizer config file saved in ./outputs/checkpoint-17000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:19:24,336 >> Special tokens file saved in ./outputs/checkpoint-17000/special_tokens_map.json\n", + " 57% 17500/30880 [1:00:08<1:12:01, 3.10it/s]{'loss': 0.2554, 'learning_rate': 2.1664507772020724e-05, 'epoch': 2.83}\n", + "[INFO|trainer.py:1917] 2021-07-20 14:21:05,299 >> Saving model checkpoint to ./outputs/checkpoint-17500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:21:05,306 >> Configuration saved in ./outputs/checkpoint-17500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:21:06,788 >> Model weights saved in ./outputs/checkpoint-17500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:21:06,789 >> tokenizer config file saved in ./outputs/checkpoint-17500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:21:06,789 >> Special tokens file saved in ./outputs/checkpoint-17500/special_tokens_map.json\n", + "{'loss': 0.2291, 'learning_rate': 2.0854922279792746e-05, 'epoch': 2.91}\n", + " 58% 18000/30880 [1:01:51<1:11:33, 3.00it/s][INFO|trainer.py:1917] 2021-07-20 14:22:48,225 >> Saving model checkpoint to ./outputs/checkpoint-18000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:22:48,226 >> Configuration saved in ./outputs/checkpoint-18000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:22:49,636 >> Model weights saved in ./outputs/checkpoint-18000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:22:49,637 >> tokenizer config file saved in ./outputs/checkpoint-18000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:22:49,637 >> Special tokens file saved in ./outputs/checkpoint-18000/special_tokens_map.json\n", + "{'loss': 0.2321, 'learning_rate': 2.0045336787564768e-05, 'epoch': 3.0}\n", + " 60% 18500/30880 [1:03:33<1:09:18, 2.98it/s][INFO|trainer.py:1917] 2021-07-20 14:24:31,106 >> Saving model checkpoint to ./outputs/checkpoint-18500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:24:31,107 >> Configuration saved in ./outputs/checkpoint-18500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:24:32,639 >> Model weights saved in ./outputs/checkpoint-18500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:24:32,640 >> tokenizer config file saved in ./outputs/checkpoint-18500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:24:32,640 >> Special tokens file saved in ./outputs/checkpoint-18500/special_tokens_map.json\n", + "{'loss': 0.1643, 'learning_rate': 1.9235751295336786e-05, 'epoch': 3.08}\n", + " 62% 19000/30880 [1:05:15<1:05:06, 3.04it/s][INFO|trainer.py:1917] 2021-07-20 14:26:12,887 >> Saving model checkpoint to ./outputs/checkpoint-19000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:26:12,888 >> Configuration saved in ./outputs/checkpoint-19000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:26:14,308 >> Model weights saved in ./outputs/checkpoint-19000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:26:14,308 >> tokenizer config file saved in ./outputs/checkpoint-19000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:26:14,309 >> Special tokens file saved in ./outputs/checkpoint-19000/special_tokens_map.json\n", + "{'loss': 0.1882, 'learning_rate': 1.8426165803108808e-05, 'epoch': 3.16}\n", + " 63% 19500/30880 [1:06:58<1:03:32, 2.98it/s][INFO|trainer.py:1917] 2021-07-20 14:27:55,385 >> Saving model checkpoint to ./outputs/checkpoint-19500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:27:55,386 >> Configuration saved in ./outputs/checkpoint-19500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:27:56,995 >> Model weights saved in ./outputs/checkpoint-19500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:27:56,996 >> tokenizer config file saved in ./outputs/checkpoint-19500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:27:56,996 >> Special tokens file saved in ./outputs/checkpoint-19500/special_tokens_map.json\n", + " 65% 20000/30880 [1:08:41<58:41, 3.09it/s]{'loss': 0.199, 'learning_rate': 1.761658031088083e-05, 'epoch': 3.24}\n", + " 65% 20000/30880 [1:08:41<58:41, 3.09it/s][INFO|trainer.py:1917] 2021-07-20 14:29:38,312 >> Saving model checkpoint to ./outputs/checkpoint-20000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:29:38,313 >> Configuration saved in ./outputs/checkpoint-20000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:29:39,750 >> Model weights saved in ./outputs/checkpoint-20000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:29:39,751 >> tokenizer config file saved in ./outputs/checkpoint-20000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:29:39,751 >> Special tokens file saved in ./outputs/checkpoint-20000/special_tokens_map.json\n", + " 66% 20500/30880 [1:10:22<56:24, 3.07it/s]{'loss': 0.1814, 'learning_rate': 1.6806994818652848e-05, 'epoch': 3.32}\n", + "[INFO|trainer.py:1917] 2021-07-20 14:31:19,805 >> Saving model checkpoint to ./outputs/checkpoint-20500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:31:19,806 >> Configuration saved in ./outputs/checkpoint-20500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:31:21,215 >> Model weights saved in ./outputs/checkpoint-20500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:31:21,216 >> tokenizer config file saved in ./outputs/checkpoint-20500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:31:21,216 >> Special tokens file saved in ./outputs/checkpoint-20500/special_tokens_map.json\n", + " 68% 21000/30880 [1:12:04<54:20, 3.03it/s]{'loss': 0.1822, 'learning_rate': 1.5997409326424873e-05, 'epoch': 3.4}\n", + " 68% 21000/30880 [1:12:05<54:20, 3.03it/s][INFO|trainer.py:1917] 2021-07-20 14:33:02,168 >> Saving model checkpoint to ./outputs/checkpoint-21000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:33:02,169 >> Configuration saved in ./outputs/checkpoint-21000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:33:03,697 >> Model weights saved in ./outputs/checkpoint-21000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:33:03,698 >> tokenizer config file saved in ./outputs/checkpoint-21000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:33:03,698 >> Special tokens file saved in ./outputs/checkpoint-21000/special_tokens_map.json\n", + "{'loss': 0.1819, 'learning_rate': 1.5187823834196893e-05, 'epoch': 3.48}\n", + " 70% 21500/30880 [1:13:47<51:12, 3.05it/s][INFO|trainer.py:1917] 2021-07-20 14:34:44,955 >> Saving model checkpoint to ./outputs/checkpoint-21500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:34:44,956 >> Configuration saved in ./outputs/checkpoint-21500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:34:46,429 >> Model weights saved in ./outputs/checkpoint-21500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:34:46,429 >> tokenizer config file saved in ./outputs/checkpoint-21500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:34:46,430 >> Special tokens file saved in ./outputs/checkpoint-21500/special_tokens_map.json\n", + " 71% 22000/30880 [1:15:30<49:14, 3.01it/s]{'loss': 0.1754, 'learning_rate': 1.4378238341968913e-05, 'epoch': 3.56}\n", + "[INFO|trainer.py:1917] 2021-07-20 14:36:27,178 >> Saving model checkpoint to ./outputs/checkpoint-22000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:36:27,179 >> Configuration saved in ./outputs/checkpoint-22000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:36:29,394 >> Model weights saved in ./outputs/checkpoint-22000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:36:29,395 >> tokenizer config file saved in ./outputs/checkpoint-22000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:36:29,396 >> Special tokens file saved in ./outputs/checkpoint-22000/special_tokens_map.json\n", + " 73% 22500/30880 [1:17:12<47:52, 2.92it/s]{'loss': 0.1672, 'learning_rate': 1.3568652849740935e-05, 'epoch': 3.64}\n", + " 73% 22500/30880 [1:17:12<47:52, 2.92it/s][INFO|trainer.py:1917] 2021-07-20 14:38:09,777 >> Saving model checkpoint to ./outputs/checkpoint-22500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:38:09,778 >> Configuration saved in ./outputs/checkpoint-22500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:38:11,228 >> Model weights saved in ./outputs/checkpoint-22500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:38:11,228 >> tokenizer config file saved in ./outputs/checkpoint-22500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:38:11,229 >> Special tokens file saved in ./outputs/checkpoint-22500/special_tokens_map.json\n", + "{'loss': 0.1615, 'learning_rate': 1.2759067357512955e-05, 'epoch': 3.72}\n", + " 74% 23000/30880 [1:18:55<43:14, 3.04it/s][INFO|trainer.py:1917] 2021-07-20 14:39:52,200 >> Saving model checkpoint to ./outputs/checkpoint-23000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:39:52,201 >> Configuration saved in ./outputs/checkpoint-23000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:39:53,606 >> Model weights saved in ./outputs/checkpoint-23000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:39:53,607 >> tokenizer config file saved in ./outputs/checkpoint-23000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:39:53,607 >> Special tokens file saved in ./outputs/checkpoint-23000/special_tokens_map.json\n", + "{'loss': 0.1797, 'learning_rate': 1.1949481865284974e-05, 'epoch': 3.81}\n", + " 76% 23500/30880 [1:20:37<41:24, 2.97it/s][INFO|trainer.py:1917] 2021-07-20 14:41:35,005 >> Saving model checkpoint to ./outputs/checkpoint-23500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:41:35,006 >> Configuration saved in ./outputs/checkpoint-23500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:41:36,515 >> Model weights saved in ./outputs/checkpoint-23500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:41:36,516 >> tokenizer config file saved in ./outputs/checkpoint-23500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:41:36,517 >> Special tokens file saved in ./outputs/checkpoint-23500/special_tokens_map.json\n", + "{'loss': 0.151, 'learning_rate': 1.1139896373056995e-05, 'epoch': 3.89}\n", + " 78% 24000/30880 [1:22:20<37:27, 3.06it/s][INFO|trainer.py:1917] 2021-07-20 14:43:17,276 >> Saving model checkpoint to ./outputs/checkpoint-24000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:43:17,277 >> Configuration saved in ./outputs/checkpoint-24000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:43:18,798 >> Model weights saved in ./outputs/checkpoint-24000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:43:18,799 >> tokenizer config file saved in ./outputs/checkpoint-24000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:43:18,817 >> Special tokens file saved in ./outputs/checkpoint-24000/special_tokens_map.json\n", + "{'loss': 0.1575, 'learning_rate': 1.0330310880829017e-05, 'epoch': 3.97}\n", + " 79% 24500/30880 [1:24:03<35:05, 3.03it/s][INFO|trainer.py:1917] 2021-07-20 14:45:00,282 >> Saving model checkpoint to ./outputs/checkpoint-24500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:45:00,284 >> Configuration saved in ./outputs/checkpoint-24500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:45:01,706 >> Model weights saved in ./outputs/checkpoint-24500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:45:01,707 >> tokenizer config file saved in ./outputs/checkpoint-24500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:45:01,708 >> Special tokens file saved in ./outputs/checkpoint-24500/special_tokens_map.json\n", + "{'loss': 0.1398, 'learning_rate': 9.520725388601037e-06, 'epoch': 4.05}\n", + " 81% 25000/30880 [1:25:45<31:38, 3.10it/s][INFO|trainer.py:1917] 2021-07-20 14:46:42,259 >> Saving model checkpoint to ./outputs/checkpoint-25000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:46:42,260 >> Configuration saved in ./outputs/checkpoint-25000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:46:43,605 >> Model weights saved in ./outputs/checkpoint-25000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:46:43,606 >> tokenizer config file saved in ./outputs/checkpoint-25000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:46:43,606 >> Special tokens file saved in ./outputs/checkpoint-25000/special_tokens_map.json\n", + "{'loss': 0.1246, 'learning_rate': 8.711139896373057e-06, 'epoch': 4.13}\n", + " 83% 25500/30880 [1:27:26<29:37, 3.03it/s][INFO|trainer.py:1917] 2021-07-20 14:48:24,031 >> Saving model checkpoint to ./outputs/checkpoint-25500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:48:24,032 >> Configuration saved in ./outputs/checkpoint-25500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:48:25,668 >> Model weights saved in ./outputs/checkpoint-25500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:48:25,669 >> tokenizer config file saved in ./outputs/checkpoint-25500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:48:25,670 >> Special tokens file saved in ./outputs/checkpoint-25500/special_tokens_map.json\n", + " 84% 26000/30880 [1:29:09<27:14, 2.99it/s]{'loss': 0.1393, 'learning_rate': 7.901554404145079e-06, 'epoch': 4.21}\n", + "[INFO|trainer.py:1917] 2021-07-20 14:50:06,537 >> Saving model checkpoint to ./outputs/checkpoint-26000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:50:06,539 >> Configuration saved in ./outputs/checkpoint-26000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:50:08,081 >> Model weights saved in ./outputs/checkpoint-26000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:50:08,082 >> tokenizer config file saved in ./outputs/checkpoint-26000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:50:08,082 >> Special tokens file saved in ./outputs/checkpoint-26000/special_tokens_map.json\n", + " 86% 26500/30880 [1:30:54<24:08, 3.02it/s][INFO|trainer.py:1917] 2021-07-20 14:51:51,373 >> Saving model checkpoint to ./outputs/checkpoint-26500\n", + "{'loss': 0.142, 'learning_rate': 7.091968911917099e-06, 'epoch': 4.29}\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:51:51,379 >> Configuration saved in ./outputs/checkpoint-26500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:51:52,813 >> Model weights saved in ./outputs/checkpoint-26500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:51:52,813 >> tokenizer config file saved in ./outputs/checkpoint-26500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:51:52,814 >> Special tokens file saved in ./outputs/checkpoint-26500/special_tokens_map.json\n", + "{'loss': 0.1281, 'learning_rate': 6.282383419689119e-06, 'epoch': 4.37}\n", + " 87% 27000/30880 [1:32:37<21:37, 2.99it/s][INFO|trainer.py:1917] 2021-07-20 14:53:34,522 >> Saving model checkpoint to ./outputs/checkpoint-27000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:53:34,523 >> Configuration saved in ./outputs/checkpoint-27000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:53:36,177 >> Model weights saved in ./outputs/checkpoint-27000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:53:36,178 >> tokenizer config file saved in ./outputs/checkpoint-27000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:53:36,179 >> Special tokens file saved in ./outputs/checkpoint-27000/special_tokens_map.json\n", + " 89% 27500/30880 [1:34:18<18:45, 3.00it/s]{'loss': 0.1299, 'learning_rate': 5.47279792746114e-06, 'epoch': 4.45}\n", + "[INFO|trainer.py:1917] 2021-07-20 14:55:15,878 >> Saving model checkpoint to ./outputs/checkpoint-27500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:55:15,879 >> Configuration saved in ./outputs/checkpoint-27500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:55:17,410 >> Model weights saved in ./outputs/checkpoint-27500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:55:17,411 >> tokenizer config file saved in ./outputs/checkpoint-27500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:55:17,411 >> Special tokens file saved in ./outputs/checkpoint-27500/special_tokens_map.json\n", + " {'loss': 0.1355, 'learning_rate': 4.663212435233161e-06, 'epoch': 4.53}\n", + " 91% 28000/30880 [1:35:59<15:22, 3.12it/s][INFO|trainer.py:1917] 2021-07-20 14:56:56,957 >> Saving model checkpoint to ./outputs/checkpoint-28000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:56:56,958 >> Configuration saved in ./outputs/checkpoint-28000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:56:58,302 >> Model weights saved in ./outputs/checkpoint-28000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:56:58,302 >> tokenizer config file saved in ./outputs/checkpoint-28000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:56:58,303 >> Special tokens file saved in ./outputs/checkpoint-28000/special_tokens_map.json\n", + " 92% 28500/30880 [1:37:41<13:05, 3.03it/s]{'loss': 0.1165, 'learning_rate': 3.853626943005181e-06, 'epoch': 4.61}\n", + "[INFO|trainer.py:1917] 2021-07-20 14:58:39,000 >> Saving model checkpoint to ./outputs/checkpoint-28500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 14:58:39,002 >> Configuration saved in ./outputs/checkpoint-28500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 14:58:40,562 >> Model weights saved in ./outputs/checkpoint-28500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 14:58:40,563 >> tokenizer config file saved in ./outputs/checkpoint-28500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 14:58:40,564 >> Special tokens file saved in ./outputs/checkpoint-28500/special_tokens_map.json\n", + " 94% 29000/30880 [1:39:23<10:27, 2.99it/s]{'loss': 0.1292, 'learning_rate': 3.044041450777202e-06, 'epoch': 4.7}\n", + " 94% 29000/30880 [1:39:23<10:27, 2.99it/s][INFO|trainer.py:1917] 2021-07-20 15:00:20,895 >> Saving model checkpoint to ./outputs/checkpoint-29000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:00:20,896 >> Configuration saved in ./outputs/checkpoint-29000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:00:23,040 >> Model weights saved in ./outputs/checkpoint-29000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:00:23,041 >> tokenizer config file saved in ./outputs/checkpoint-29000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:00:23,041 >> Special tokens file saved in ./outputs/checkpoint-29000/special_tokens_map.json\n", + " 96% 29500/30880 [1:41:05<07:26, 3.09it/s]{'loss': 0.13, 'learning_rate': 2.234455958549223e-06, 'epoch': 4.78}\n", + "[INFO|trainer.py:1917] 2021-07-20 15:02:02,753 >> Saving model checkpoint to ./outputs/checkpoint-29500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:02:02,754 >> Configuration saved in ./outputs/checkpoint-29500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:02:04,090 >> Model weights saved in ./outputs/checkpoint-29500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:02:04,091 >> tokenizer config file saved in ./outputs/checkpoint-29500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:02:04,091 >> Special tokens file saved in ./outputs/checkpoint-29500/special_tokens_map.json\n", + " 97% 30000/30880 [1:42:47<04:48, 3.05it/s]{'loss': 0.1135, 'learning_rate': 1.4248704663212437e-06, 'epoch': 4.86}\n", + "[INFO|trainer.py:1917] 2021-07-20 15:03:44,201 >> Saving model checkpoint to ./outputs/checkpoint-30000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:03:44,203 >> Configuration saved in ./outputs/checkpoint-30000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:03:45,654 >> Model weights saved in ./outputs/checkpoint-30000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:03:45,655 >> tokenizer config file saved in ./outputs/checkpoint-30000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:03:45,655 >> Special tokens file saved in ./outputs/checkpoint-30000/special_tokens_map.json\n", + "{'loss': 0.1377, 'learning_rate': 6.152849740932643e-07, 'epoch': 4.94}\n", + " 99% 30500/30880 [1:44:29<02:06, 3.00it/s][INFO|trainer.py:1917] 2021-07-20 15:05:26,961 >> Saving model checkpoint to ./outputs/checkpoint-30500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:05:26,962 >> Configuration saved in ./outputs/checkpoint-30500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:05:28,421 >> Model weights saved in ./outputs/checkpoint-30500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:05:28,422 >> tokenizer config file saved in ./outputs/checkpoint-30500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:05:28,422 >> Special tokens file saved in ./outputs/checkpoint-30500/special_tokens_map.json\n", + "100% 30880/30880 [1:45:48<00:00, 6.01it/s][INFO|trainer.py:1358] 2021-07-20 15:06:45,231 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "100% 30880/30880 [1:45:48<00:00, 6.01it/s]{'train_runtime': 6350.6363, 'train_samples_per_second': 38.895, 'train_steps_per_second': 4.863, 'train_loss': 0.25439529233645897, 'epoch': 5.0}\n", + "100% 30880/30880 [1:45:48<00:00, 4.86it/s]\n", + "[INFO|trainer.py:1917] 2021-07-20 15:06:45,258 >> Saving model checkpoint to ./outputs\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:06:45,259 >> Configuration saved in ./outputs/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:06:46,739 >> Model weights saved in ./outputs/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:06:46,740 >> tokenizer config file saved in ./outputs/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:06:46,740 >> Special tokens file saved in ./outputs/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 5.0\n", + " train_loss = 0.2544\n", + " train_runtime = 1:45:50.63\n", + " train_samples = 49401\n", + " train_samples_per_second = 38.895\n", + " train_steps_per_second = 4.863\n", + "07/20/2021 15:06:47 - INFO - __main__ - *** Evaluate ***\n", + "[INFO|trainer.py:522] 2021-07-20 15:06:47,602 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: sentence2, id, sentence1.\n", + "[INFO|trainer.py:2163] 2021-07-20 15:06:47,628 >> ***** Running Evaluation *****\n", + "[INFO|trainer.py:2165] 2021-07-20 15:06:47,628 >> Num examples = 2000\n", + "[INFO|trainer.py:2168] 2021-07-20 15:06:47,628 >> Batch size = 8\n", + "100% 250/250 [00:12<00:00, 20.77it/s]\n", + "***** eval metrics *****\n", + " epoch = 5.0\n", + " eval_accuracy = 0.8845\n", + " eval_loss = 0.553\n", + " eval_runtime = 0:00:12.08\n", + " eval_samples = 2000\n", + " eval_samples_per_second = 165.47\n", + " eval_steps_per_second = 20.684\n", + "\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish, PID 1654\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Program ended successfully.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find user logs for this run at: /content/wandb/run-20210720_132054-20701jiq/logs/debug.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find internal logs for this run at: /content/wandb/run-20210720_132054-20701jiq/logs/debug-internal.log\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.1377\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 30880\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime 6365\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp 1626793619\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step 62\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime 6350.6363\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second 38.895\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second 4.863\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos 1.62474365572992e+16\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss 0.2544\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 0.55297\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy 0.8845\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 12.0868\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 165.47\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 20.684\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▆▅▅▄▄▄▄▄▃▃▄▃▄▄▄▄▃▃▃▃▃▃▂▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate ████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _runtime ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _timestamp ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: _step ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/total_flos ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: train/train_loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced \u001b[33m./outputs\u001b[0m: \u001b[34mhttps://wandb.ai/versae/bertin-eval/runs/20701jiq\u001b[0m\n", + "2021-07-20 15:07:10.673127: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "07/20/2021 15:07:12 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", + "07/20/2021 15:07:12 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_find_unused_parameters=None,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=False,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_steps=500,\n", + "evaluation_strategy=IntervalStrategy.NO,\n", + "fp16=False,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "gradient_accumulation_steps=1,\n", + "greater_is_better=None,\n", + "group_by_length=False,\n", + "ignore_data_skip=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=5e-05,\n", + "length_column_name=length,\n", + "load_best_model_at_end=False,\n", + "local_rank=-1,\n", + "log_level=-1,\n", + "log_level_replica=-1,\n", + "log_on_each_node=True,\n", + "logging_dir=./outputs/runs/Jul20_15-07-12_48f3f265b421,\n", + "logging_first_step=False,\n", + "logging_steps=500,\n", + "logging_strategy=IntervalStrategy.STEPS,\n", + "lr_scheduler_type=SchedulerType.LINEAR,\n", + "max_grad_norm=1.0,\n", + "max_steps=-1,\n", + "metric_for_best_model=None,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=5.0,\n", + "output_dir=./outputs,\n", + "overwrite_output_dir=True,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=False,\n", + "push_to_hub_model_id=outputs,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=None,\n", + "remove_unused_columns=True,\n", + "report_to=['tensorboard', 'wandb'],\n", + "resume_from_checkpoint=None,\n", + "run_name=./outputs,\n", + "save_on_each_node=False,\n", + "save_steps=500,\n", + "save_strategy=IntervalStrategy.STEPS,\n", + "save_total_limit=None,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_legacy_prediction_loop=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=0,\n", + "weight_decay=0.0,\n", + ")\n", + "07/20/2021 15:07:13 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py at /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x\n", + "07/20/2021 15:07:13 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py at /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af\n", + "07/20/2021 15:07:13 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py to /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/paws-x.py\n", + "07/20/2021 15:07:13 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/dataset_infos.json\n", + "07/20/2021 15:07:13 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py at /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/paws-x.json\n", + "07/20/2021 15:07:14 - INFO - datasets.load - Found main folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py at /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x\n", + "07/20/2021 15:07:14 - INFO - datasets.load - Found specific version folder for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py at /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af\n", + "07/20/2021 15:07:14 - INFO - datasets.load - Found script file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py to /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/paws-x.py\n", + "07/20/2021 15:07:14 - INFO - datasets.load - Found dataset infos file from https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/dataset_infos.json to /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/dataset_infos.json\n", + "07/20/2021 15:07:14 - INFO - datasets.load - Found metadata file for dataset https://raw.githubusercontent.com/huggingface/datasets/1.9.0/datasets/paws-x/paws-x.py at /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/paws-x.json\n", + "07/20/2021 15:07:14 - INFO - datasets.info - Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/paws-x/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af\n", + "07/20/2021 15:07:14 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", + "07/20/2021 15:07:14 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/pawsx/es/1.1.0/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af\n", + "07/20/2021 15:07:14 - WARNING - datasets.builder - Reusing dataset pawsx (/root/.cache/huggingface/datasets/pawsx/es/1.1.0/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af)\n", + "07/20/2021 15:07:14 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/pawsx/es/1.1.0/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af\n", + "100% 3/3 [00:00<00:00, 23.03it/s]\n", + "[INFO|configuration_utils.py:545] 2021-07-20 15:07:14,735 >> loading configuration file https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/367bb30bd1ae06268e9d1c64ae1fb923fc9931913fa478dfa01d79a4c7086238.08210214ecaf5cd53c702826a604e564d8af557125033eef720f4f9f2af1fc44\n", + "[INFO|configuration_utils.py:581] 2021-07-20 15:07:14,735 >> Model config RobertaConfig {\n", + " \"architectures\": [\n", + " \"RobertaForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"bos_token_id\": 0,\n", + " \"eos_token_id\": 2,\n", + " \"finetuning_task\": \"paws-x\",\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 514,\n", + " \"model_type\": \"roberta\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.9.0.dev0\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50265\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-20 15:07:17,210 >> loading file https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/dc7971c78d10d920138338883fd23b96f3994bce40018345ab1ba2ba8c8f6bdd.a80f232f572026f92499b14999a8ed4e044e04cf3d01b9f2be298c98e78e8498\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-20 15:07:17,210 >> loading file https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/5f573076405f6fab314615142ba9deec180a84917e495fecbf81f61afb2965cb.a0dfc41f9d0f03a56ba7a5401d770f6e43071045a0bd79073380d408d17a0d92\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-20 15:07:17,210 >> loading file https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/9a541e4855ef267ea4879cc9c2277f67dd5569f68cc688d212822bed1ca8755f.2a5dc806edc00ab3a329cb22b9973596ca75b24ba0e5e4963bf1308de7237a3d\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-20 15:07:17,210 >> loading file https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-20 15:07:17,210 >> loading file https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/8e21c2757a0c3938b80989bb3dabd355f9221ed98847fb957a5c4e9a86209c03.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "[INFO|tokenization_utils_base.py:1722] 2021-07-20 15:07:17,210 >> loading file https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/e577044edb84fa4a576105e2202c62cf62f831634f9581da80435c97b8034fba.9f7ac4246b492dfa7c21beb5233deec8aef46153ec25ac5258ac5e2ae82dfa89\n", + "[INFO|modeling_utils.py:1271] 2021-07-20 15:07:17,636 >> loading weights file https://huggingface.co/bertin-project/bertin-base-stepwise/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/4832a73c0f4a13adaab71151bf2413717416da487cdb2a79247f10198c6421f8.aebba6b503a22a0c70b362f8b026aa0f030aae594f7580f0164a0a73fb0001af\n", + "[WARNING|modeling_utils.py:1502] 2021-07-20 15:07:25,547 >> Some weights of the model checkpoint at bertin-project/bertin-base-stepwise were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias']\n", + "- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "[WARNING|modeling_utils.py:1513] 2021-07-20 15:07:25,547 >> Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at bertin-project/bertin-base-stepwise and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "07/20/2021 15:07:25 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /root/.cache/huggingface/datasets/pawsx/es/1.1.0/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/cache-76ca7b71f1e87df9.arrow\n", + "07/20/2021 15:07:26 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /root/.cache/huggingface/datasets/pawsx/es/1.1.0/a5033b43902a02a4ba2ee469c1dd22af3e6a4a247ac47fa1af9835d0e734e2af/cache-db0d8830efc25d6e.arrow\n", + "Running tokenizer on dataset: 0% 0/2 [00:00> The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: sentence1, sentence2, id.\n", + "[INFO|trainer.py:1162] 2021-07-20 15:07:31,426 >> ***** Running training *****\n", + "[INFO|trainer.py:1163] 2021-07-20 15:07:31,426 >> Num examples = 49401\n", + "[INFO|trainer.py:1164] 2021-07-20 15:07:31,426 >> Num Epochs = 5\n", + "[INFO|trainer.py:1165] 2021-07-20 15:07:31,426 >> Instantaneous batch size per device = 8\n", + "[INFO|trainer.py:1166] 2021-07-20 15:07:31,426 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", + "[INFO|trainer.py:1167] 2021-07-20 15:07:31,426 >> Gradient Accumulation steps = 1\n", + "[INFO|trainer.py:1168] 2021-07-20 15:07:31,426 >> Total optimization steps = 30880\n", + "[INFO|integrations.py:446] 2021-07-20 15:07:31,446 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mversae\u001b[0m (use `wandb login --relogin` to force relogin)\n", + "2021-07-20 15:07:32.876281: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.11.0\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m./outputs\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/versae/bertin-eval/runs/1zzsesw7\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in /content/wandb/run-20210720_150731-1zzsesw7\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run `wandb offline` to turn off syncing.\n", + "\n", + " {'loss': 0.7073, 'learning_rate': 4.919041450777203e-05, 'epoch': 0.08}\n", + " 2% 500/30880 [01:37<2:53:12, 2.92it/s][INFO|trainer.py:1917] 2021-07-20 15:09:11,219 >> Saving model checkpoint to ./outputs/checkpoint-500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:09:11,221 >> Configuration saved in ./outputs/checkpoint-500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:09:12,592 >> Model weights saved in ./outputs/checkpoint-500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:09:12,593 >> tokenizer config file saved in ./outputs/checkpoint-500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:09:12,594 >> Special tokens file saved in ./outputs/checkpoint-500/special_tokens_map.json\n", + "{'loss': 0.592, 'learning_rate': 4.8380829015544046e-05, 'epoch': 0.16}\n", + " 3% 1000/30880 [03:20<2:46:17, 2.99it/s][INFO|trainer.py:1917] 2021-07-20 15:10:54,974 >> Saving model checkpoint to ./outputs/checkpoint-1000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:10:54,975 >> Configuration saved in ./outputs/checkpoint-1000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:10:56,282 >> Model weights saved in ./outputs/checkpoint-1000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:10:56,283 >> tokenizer config file saved in ./outputs/checkpoint-1000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:10:56,283 >> Special tokens file saved in ./outputs/checkpoint-1000/special_tokens_map.json\n", + "{'loss': 0.5125, 'learning_rate': 4.7571243523316064e-05, 'epoch': 0.24}\n", + " 5% 1500/30880 [05:03<2:41:55, 3.02it/s][INFO|trainer.py:1917] 2021-07-20 15:12:37,612 >> Saving model checkpoint to ./outputs/checkpoint-1500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:12:37,613 >> Configuration saved in ./outputs/checkpoint-1500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:12:38,967 >> Model weights saved in ./outputs/checkpoint-1500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:12:38,968 >> tokenizer config file saved in ./outputs/checkpoint-1500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:12:38,968 >> Special tokens file saved in ./outputs/checkpoint-1500/special_tokens_map.json\n", + " 6% 2000/30880 [06:45<2:42:42, 2.96it/s][INFO|trainer.py:1917] 2021-07-20 15:14:19,715 >> Saving model checkpoint to ./outputs/checkpoint-2000\n", + "{'loss': 0.4902, 'learning_rate': 4.676165803108808e-05, 'epoch': 0.32}\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:14:19,721 >> Configuration saved in ./outputs/checkpoint-2000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:14:21,076 >> Model weights saved in ./outputs/checkpoint-2000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:14:21,077 >> tokenizer config file saved in ./outputs/checkpoint-2000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:14:21,077 >> Special tokens file saved in ./outputs/checkpoint-2000/special_tokens_map.json\n", + " 8% 2500/30880 [08:29<2:41:20, 2.93it/s][INFO|trainer.py:1917] 2021-07-20 15:16:03,536 >> Saving model checkpoint to ./outputs/checkpoint-2500\n", + "{'loss': 0.4658, 'learning_rate': 4.595207253886011e-05, 'epoch': 0.4}\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:16:03,543 >> Configuration saved in ./outputs/checkpoint-2500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:16:04,986 >> Model weights saved in ./outputs/checkpoint-2500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:16:04,987 >> tokenizer config file saved in ./outputs/checkpoint-2500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:16:04,987 >> Special tokens file saved in ./outputs/checkpoint-2500/special_tokens_map.json\n", + " 10% 3000/30880 [10:13<2:55:59, 2.64it/s]{'loss': 0.4675, 'learning_rate': 4.5142487046632126e-05, 'epoch': 0.49}\n", + " 10% 3000/30880 [10:13<2:55:59, 2.64it/s][INFO|trainer.py:1917] 2021-07-20 15:17:47,698 >> Saving model checkpoint to ./outputs/checkpoint-3000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:17:47,699 >> Configuration saved in ./outputs/checkpoint-3000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:17:49,080 >> Model weights saved in ./outputs/checkpoint-3000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:17:49,080 >> tokenizer config file saved in ./outputs/checkpoint-3000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:17:49,081 >> Special tokens file saved in ./outputs/checkpoint-3000/special_tokens_map.json\n", + " 11% 3500/30880 [11:55<2:32:42, 2.99it/s]{'loss': 0.4143, 'learning_rate': 4.433290155440415e-05, 'epoch': 0.57}\n", + "[INFO|trainer.py:1917] 2021-07-20 15:19:30,049 >> Saving model checkpoint to ./outputs/checkpoint-3500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:19:30,053 >> Configuration saved in ./outputs/checkpoint-3500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:19:31,476 >> Model weights saved in ./outputs/checkpoint-3500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:19:31,477 >> tokenizer config file saved in ./outputs/checkpoint-3500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:19:31,477 >> Special tokens file saved in ./outputs/checkpoint-3500/special_tokens_map.json\n", + "{'loss': 0.4291, 'learning_rate': 4.352331606217617e-05, 'epoch': 0.65}\n", + " 13% 4000/30880 [13:38<2:31:23, 2.96it/s][INFO|trainer.py:1917] 2021-07-20 15:21:12,358 >> Saving model checkpoint to ./outputs/checkpoint-4000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:21:12,359 >> Configuration saved in ./outputs/checkpoint-4000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:21:13,816 >> Model weights saved in ./outputs/checkpoint-4000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:21:13,817 >> tokenizer config file saved in ./outputs/checkpoint-4000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:21:13,817 >> Special tokens file saved in ./outputs/checkpoint-4000/special_tokens_map.json\n", + " 15% 4500/30880 [15:20<2:25:40, 3.02it/s][INFO|trainer.py:1917] 2021-07-20 15:22:54,963 >> Saving model checkpoint to ./outputs/checkpoint-4500\n", + "{'loss': 0.4058, 'learning_rate': 4.271373056994819e-05, 'epoch': 0.73}\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:22:54,964 >> Configuration saved in ./outputs/checkpoint-4500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:22:56,401 >> Model weights saved in ./outputs/checkpoint-4500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:22:56,402 >> tokenizer config file saved in ./outputs/checkpoint-4500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:22:56,403 >> Special tokens file saved in ./outputs/checkpoint-4500/special_tokens_map.json\n", + "{'loss': 0.3937, 'learning_rate': 4.190414507772021e-05, 'epoch': 0.81}\n", + " 16% 5000/30880 [17:02<2:26:31, 2.94it/s][INFO|trainer.py:1917] 2021-07-20 15:24:36,868 >> Saving model checkpoint to ./outputs/checkpoint-5000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:24:36,869 >> Configuration saved in ./outputs/checkpoint-5000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:24:38,356 >> Model weights saved in ./outputs/checkpoint-5000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:24:38,357 >> tokenizer config file saved in ./outputs/checkpoint-5000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:24:38,357 >> Special tokens file saved in ./outputs/checkpoint-5000/special_tokens_map.json\n", + "{'loss': 0.4076, 'learning_rate': 4.109455958549223e-05, 'epoch': 0.89}\n", + " 18% 5500/30880 [18:44<2:20:21, 3.01it/s][INFO|trainer.py:1917] 2021-07-20 15:26:18,777 >> Saving model checkpoint to ./outputs/checkpoint-5500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:26:18,778 >> Configuration saved in ./outputs/checkpoint-5500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:26:20,151 >> Model weights saved in ./outputs/checkpoint-5500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:26:20,180 >> tokenizer config file saved in ./outputs/checkpoint-5500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:26:20,180 >> Special tokens file saved in ./outputs/checkpoint-5500/special_tokens_map.json\n", + "{'loss': 0.3757, 'learning_rate': 4.028497409326425e-05, 'epoch': 0.97}\n", + " 19% 6000/30880 [20:26<2:17:39, 3.01it/s][INFO|trainer.py:1917] 2021-07-20 15:28:00,979 >> Saving model checkpoint to ./outputs/checkpoint-6000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:28:00,980 >> Configuration saved in ./outputs/checkpoint-6000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:28:02,413 >> Model weights saved in ./outputs/checkpoint-6000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:28:02,414 >> tokenizer config file saved in ./outputs/checkpoint-6000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:28:02,414 >> Special tokens file saved in ./outputs/checkpoint-6000/special_tokens_map.json\n", + "{'loss': 0.3788, 'learning_rate': 3.9475388601036275e-05, 'epoch': 1.05}\n", + " 21% 6500/30880 [22:08<2:15:45, 2.99it/s][INFO|trainer.py:1917] 2021-07-20 15:29:42,929 >> Saving model checkpoint to ./outputs/checkpoint-6500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:29:42,930 >> Configuration saved in ./outputs/checkpoint-6500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:29:44,394 >> Model weights saved in ./outputs/checkpoint-6500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:29:44,395 >> tokenizer config file saved in ./outputs/checkpoint-6500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:29:44,395 >> Special tokens file saved in ./outputs/checkpoint-6500/special_tokens_map.json\n", + "{'loss': 0.3526, 'learning_rate': 3.8665803108808294e-05, 'epoch': 1.13}\n", + " 23% 7000/30880 [23:50<2:15:14, 2.94it/s][INFO|trainer.py:1917] 2021-07-20 15:31:24,408 >> Saving model checkpoint to ./outputs/checkpoint-7000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:31:24,409 >> Configuration saved in ./outputs/checkpoint-7000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:31:25,828 >> Model weights saved in ./outputs/checkpoint-7000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:31:25,828 >> tokenizer config file saved in ./outputs/checkpoint-7000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:31:25,829 >> Special tokens file saved in ./outputs/checkpoint-7000/special_tokens_map.json\n", + "{'loss': 0.3525, 'learning_rate': 3.785621761658031e-05, 'epoch': 1.21}\n", + " 24% 7500/30880 [25:31<2:11:11, 2.97it/s][INFO|trainer.py:1917] 2021-07-20 15:33:06,085 >> Saving model checkpoint to ./outputs/checkpoint-7500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:33:06,086 >> Configuration saved in ./outputs/checkpoint-7500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:33:07,561 >> Model weights saved in ./outputs/checkpoint-7500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:33:07,562 >> tokenizer config file saved in ./outputs/checkpoint-7500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:33:07,562 >> Special tokens file saved in ./outputs/checkpoint-7500/special_tokens_map.json\n", + " 26% 8000/30880 [27:13<2:05:27, 3.04it/s]{'loss': 0.3367, 'learning_rate': 3.704663212435233e-05, 'epoch': 1.3}\n", + "[INFO|trainer.py:1917] 2021-07-20 15:34:47,724 >> Saving model checkpoint to ./outputs/checkpoint-8000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:34:47,725 >> Configuration saved in ./outputs/checkpoint-8000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:34:49,236 >> Model weights saved in ./outputs/checkpoint-8000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:34:49,236 >> tokenizer config file saved in ./outputs/checkpoint-8000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:34:49,237 >> Special tokens file saved in ./outputs/checkpoint-8000/special_tokens_map.json\n", + " 28% 8500/30880 [28:55<2:06:03, 2.96it/s]{'loss': 0.3467, 'learning_rate': 3.6237046632124356e-05, 'epoch': 1.38}\n", + "[INFO|trainer.py:1917] 2021-07-20 15:36:30,045 >> Saving model checkpoint to ./outputs/checkpoint-8500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:36:30,046 >> Configuration saved in ./outputs/checkpoint-8500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:36:31,513 >> Model weights saved in ./outputs/checkpoint-8500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:36:31,514 >> tokenizer config file saved in ./outputs/checkpoint-8500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:36:31,514 >> Special tokens file saved in ./outputs/checkpoint-8500/special_tokens_map.json\n", + " 29% 9000/30880 [30:38<2:02:02, 2.99it/s]{'loss': 0.3406, 'learning_rate': 3.5427461139896374e-05, 'epoch': 1.46}\n", + "[INFO|trainer.py:1917] 2021-07-20 15:38:12,287 >> Saving model checkpoint to ./outputs/checkpoint-9000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:38:12,289 >> Configuration saved in ./outputs/checkpoint-9000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:38:13,681 >> Model weights saved in ./outputs/checkpoint-9000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:38:13,682 >> tokenizer config file saved in ./outputs/checkpoint-9000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:38:13,683 >> Special tokens file saved in ./outputs/checkpoint-9000/special_tokens_map.json\n", + " 31% 9500/30880 [32:19<1:57:28, 3.03it/s]{'loss': 0.3246, 'learning_rate': 3.46178756476684e-05, 'epoch': 1.54}\n", + " 31% 9500/30880 [32:19<1:57:28, 3.03it/s][INFO|trainer.py:1917] 2021-07-20 15:39:53,791 >> Saving model checkpoint to ./outputs/checkpoint-9500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:39:53,792 >> Configuration saved in ./outputs/checkpoint-9500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:39:55,125 >> Model weights saved in ./outputs/checkpoint-9500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:39:55,125 >> tokenizer config file saved in ./outputs/checkpoint-9500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:39:55,126 >> Special tokens file saved in ./outputs/checkpoint-9500/special_tokens_map.json\n", + " 32% 10000/30880 [34:00<1:56:14, 2.99it/s]{'loss': 0.3081, 'learning_rate': 3.380829015544041e-05, 'epoch': 1.62}\n", + " 32% 10000/30880 [34:00<1:56:14, 2.99it/s][INFO|trainer.py:1917] 2021-07-20 15:41:34,974 >> Saving model checkpoint to ./outputs/checkpoint-10000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:41:34,975 >> Configuration saved in ./outputs/checkpoint-10000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:41:36,429 >> Model weights saved in ./outputs/checkpoint-10000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:41:36,430 >> tokenizer config file saved in ./outputs/checkpoint-10000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:41:36,430 >> Special tokens file saved in ./outputs/checkpoint-10000/special_tokens_map.json\n", + "{'loss': 0.3241, 'learning_rate': 3.2998704663212436e-05, 'epoch': 1.7}\n", + " 34% 10500/30880 [35:42<1:55:38, 2.94it/s][INFO|trainer.py:1917] 2021-07-20 15:43:16,682 >> Saving model checkpoint to ./outputs/checkpoint-10500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:43:16,684 >> Configuration saved in ./outputs/checkpoint-10500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:43:18,052 >> Model weights saved in ./outputs/checkpoint-10500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:43:18,053 >> tokenizer config file saved in ./outputs/checkpoint-10500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:43:18,055 >> Special tokens file saved in ./outputs/checkpoint-10500/special_tokens_map.json\n", + "{'loss': 0.3209, 'learning_rate': 3.2189119170984454e-05, 'epoch': 1.78}\n", + " 36% 11000/30880 [37:23<1:48:09, 3.06it/s][INFO|trainer.py:1917] 2021-07-20 15:44:57,615 >> Saving model checkpoint to ./outputs/checkpoint-11000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:44:57,616 >> Configuration saved in ./outputs/checkpoint-11000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:44:59,438 >> Model weights saved in ./outputs/checkpoint-11000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:44:59,439 >> tokenizer config file saved in ./outputs/checkpoint-11000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:44:59,443 >> Special tokens file saved in ./outputs/checkpoint-11000/special_tokens_map.json\n", + "{'loss': 0.3148, 'learning_rate': 3.137953367875648e-05, 'epoch': 1.86}\n", + " 37% 11500/30880 [39:05<1:48:16, 2.98it/s][INFO|trainer.py:1917] 2021-07-20 15:46:39,293 >> Saving model checkpoint to ./outputs/checkpoint-11500\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:46:39,295 >> Configuration saved in ./outputs/checkpoint-11500/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:46:40,782 >> Model weights saved in ./outputs/checkpoint-11500/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:46:40,782 >> tokenizer config file saved in ./outputs/checkpoint-11500/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:46:40,809 >> Special tokens file saved in ./outputs/checkpoint-11500/special_tokens_map.json\n", + "{'loss': 0.3225, 'learning_rate': 3.05699481865285e-05, 'epoch': 1.94}\n", + " 39% 12000/30880 [40:46<1:43:31, 3.04it/s][INFO|trainer.py:1917] 2021-07-20 15:48:20,223 >> Saving model checkpoint to ./outputs/checkpoint-12000\n", + "[INFO|configuration_utils.py:379] 2021-07-20 15:48:20,224 >> Configuration saved in ./outputs/checkpoint-12000/config.json\n", + "[INFO|modeling_utils.py:997] 2021-07-20 15:48:21,700 >> Model weights saved in ./outputs/checkpoint-12000/pytorch_model.bin\n", + "[INFO|tokenization_utils_base.py:1998] 2021-07-20 15:48:21,701 >> tokenizer config file saved in ./outputs/checkpoint-12000/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2004] 2021-07-20 15:48:21,702 >> Special tokens file saved in ./outputs/checkpoint-12000/special_tokens_map.json\n", + " 40% 12284/30880 [41:45<59:16, 5.23it/s]" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 364, + "referenced_widgets": [ + "3375353ee2ea43d28775f62c49ee0538", + "b57b116c89594c558fb17ee835cec7ae", + "2de683c0aad84a33b5d74c338151cb11", + "2d6e2ae6f5e24092bda544ced04abab4", + "ce5c0860d88b4ae594ff9c4f97bc998b", + "e908ba524a584e1c82cb2a1e0e48d7d6", + "5196bc6355b9487aadc2ac77b84f4c0c", + "76697b9a49db4f6c9a43f8a102118a45" + ] + }, + "id": "pqN4wpd7SZQN", + "outputId": "0ffb4081-1722-4e19-9399-a3946a208a12" + }, + "source": [ + "from datasets import load_metric\n", + "\n", + "load_metric(\"accuracy\")" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3375353ee2ea43d28775f62c49ee0538", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1362.0, style=ProgressStyle(description…" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Metric(name: \"accuracy\", features: {'predictions': Value(dtype='int32', id=None), 'references': Value(dtype='int32', id=None)}, usage: \"\"\"\n", + "Args:\n", + " predictions: Predicted labels, as returned by a model.\n", + " references: Ground truth labels.\n", + " normalize: If False, return the number of correctly classified samples.\n", + " Otherwise, return the fraction of correctly classified samples.\n", + " sample_weight: Sample weights.\n", + "Returns:\n", + " accuracy: Accuracy score.\n", + "Examples:\n", + "\n", + " >>> accuracy_metric = datasets.load_metric(\"accuracy\")\n", + " >>> results = accuracy_metric.compute(references=[0, 1], predictions=[0, 1])\n", + " >>> print(results)\n", + " {'accuracy': 1.0}\n", + "\"\"\", stored examples: 0)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "2UoX0t5AFxvM" + }, + "source": [ + "# !wget https://raw.githubusercontent.com/huggingface/transformers/master/examples/pytorch/token-classification/run_ner.py\n", + "for model in models:\n", + " !WANDB_PROJECT=bertin-eval TOKENIZERS_PARALLELISM=false CUDA_LAUNCH_BLOCKING=1 python run_glue.py \\\n", + " --model_name_or_path $model \\\n", + " --dataset_name \"amazon_reviews_multi\" \\\n", + " --dataset_config_name \"es\" \\\n", + " --task_name \"amazon_reviews_multi\" \\\n", + " --output_dir ./outputs \\\n", + " --overwrite_output_dir \\\n", + " --pad_to_max_length \\\n", + " --num_train_epochs 5 \\\n", + " --do_train \\\n", + " --do_eval" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file