Training in progress, epoch 1

Browse files

Files changed (8) hide show

config.json +143 -0
merges.txt +0 -0
pytorch_model.bin +3 -0
special_tokens_map.json +51 -0
tokenizer.json +0 -0
tokenizer_config.json +65 -0
training_args.bin +3 -0
vocab.json +0 -0

config.json ADDED Viewed

	@@ -0,0 +1,143 @@

+{
+  "_name_or_path": "PlanTL-GOB-ES/roberta-base-bne",
+  "architectures": [
+    "RobertaForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.0,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "Administraci\u00f3n Central",
+    "1": "Asuntos de la mujer",
+    "2": "Banco de la Rep\u00fablica",
+    "3": "Bienes nacionales",
+    "4": "Calamidades p\u00fablicas",
+    "5": "Comercio Exterior",
+    "6": "Constituci\u00f3n",
+    "7": "Control fiscal",
+    "8": "Corporaciones Aut\u00f3nomas Regionales",
+    "9": "Cultura",
+    "10": "Defensa Nacional",
+    "11": "Diplomacia",
+    "12": "Ecolog\u00eda",
+    "13": "Educaci\u00f3n",
+    "14": "Energ\u00eda",
+    "15": "Entidades Financieras",
+    "16": "Espacio A\u00e9reo",
+    "17": "Fondos de prestaciones",
+    "18": "Fronteras",
+    "19": "Fuerza P\u00fablica",
+    "20": "Hacienda",
+    "21": "Impuestos",
+    "22": "Investigaci\u00f3n cient\u00edfica",
+    "23": "Legislaci\u00f3n administrativa",
+    "24": "Leyes",
+    "25": "Medio ambiente",
+    "26": "Medios de comunicaci\u00f3n",
+    "27": "Mercado de Valores",
+    "28": "Miner\u00eda",
+    "29": "Monopolios",
+    "30": "Monumentos p\u00fablicos",
+    "31": "Nacionalidad",
+    "32": "Organismos de Control",
+    "33": "Organizaciones sindicales",
+    "34": "Organizaci\u00f3n Territorial",
+    "35": "Planeaci\u00f3n nacional",
+    "36": "Pol\u00edtica Internacional",
+    "37": "Pol\u00edtica portuaria",
+    "38": "Pol\u00edticas migratorias",
+    "39": "Prestaciones Servidores P\u00fablicos",
+    "40": "Presupuesto",
+    "41": "Propiedad intelectual",
+    "42": "Recreaci\u00f3n",
+    "43": "Recursos ictiol\u00f3gicos",
+    "44": "Regulaci\u00f3n econ\u00f3mica",
+    "45": "Reubicaci\u00f3n de entidades",
+    "46": "R\u00e9gimen Agropecuario",
+    "47": "Salud",
+    "48": "Seguridad Social",
+    "49": "Servicios p\u00fablicos",
+    "50": "Sin clasificaci\u00f3n",
+    "51": "Tierras",
+    "52": "Transporte",
+    "53": "Turismo",
+    "54": "Vivienda"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "Administraci\u00f3n Central": 0,
+    "Asuntos de la mujer": 1,
+    "Banco de la Rep\u00fablica": 2,
+    "Bienes nacionales": 3,
+    "Calamidades p\u00fablicas": 4,
+    "Comercio Exterior": 5,
+    "Constituci\u00f3n": 6,
+    "Control fiscal": 7,
+    "Corporaciones Aut\u00f3nomas Regionales": 8,
+    "Cultura": 9,
+    "Defensa Nacional": 10,
+    "Diplomacia": 11,
+    "Ecolog\u00eda": 12,
+    "Educaci\u00f3n": 13,
+    "Energ\u00eda": 14,
+    "Entidades Financieras": 15,
+    "Espacio A\u00e9reo": 16,
+    "Fondos de prestaciones": 17,
+    "Fronteras": 18,
+    "Fuerza P\u00fablica": 19,
+    "Hacienda": 20,
+    "Impuestos": 21,
+    "Investigaci\u00f3n cient\u00edfica": 22,
+    "Legislaci\u00f3n administrativa": 23,
+    "Leyes": 24,
+    "Medio ambiente": 25,
+    "Medios de comunicaci\u00f3n": 26,
+    "Mercado de Valores": 27,
+    "Miner\u00eda": 28,
+    "Monopolios": 29,
+    "Monumentos p\u00fablicos": 30,
+    "Nacionalidad": 31,
+    "Organismos de Control": 32,
+    "Organizaciones sindicales": 33,
+    "Organizaci\u00f3n Territorial": 34,
+    "Planeaci\u00f3n nacional": 35,
+    "Pol\u00edtica Internacional": 36,
+    "Pol\u00edtica portuaria": 37,
+    "Pol\u00edticas migratorias": 38,
+    "Prestaciones Servidores P\u00fablicos": 39,
+    "Presupuesto": 40,
+    "Propiedad intelectual": 41,
+    "Recreaci\u00f3n": 42,
+    "Recursos ictiol\u00f3gicos": 43,
+    "Regulaci\u00f3n econ\u00f3mica": 44,
+    "Reubicaci\u00f3n de entidades": 45,
+    "R\u00e9gimen Agropecuario": 46,
+    "Salud": 47,
+    "Seguridad Social": 48,
+    "Servicios p\u00fablicos": 49,
+    "Sin clasificaci\u00f3n": 50,
+    "Tierras": 51,
+    "Transporte": 52,
+    "Turismo": 53,
+    "Vivienda": 54
+  },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.33.1",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 50262
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f55d478730061260b9474027abe1a41bb81cf14439c0bbe0a37b348b7ea76bf7
+size 498811441

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,65 @@

+{
+  "add_prefix_space": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": {
+    "__type": "AddedToken",
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "errors": "replace",
+  "mask_token": {
+    "__type": "AddedToken",
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "max_len": 512,
+  "model_max_length": 512,
+  "pad_token": {
+    "__type": "AddedToken",
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "__type": "AddedToken",
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "tokenizer_class": "RobertaTokenizer",
+  "trim_offsets": true,
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b18a05250b9792068036c2adf5e9e92e5b98c7c02a7dd79723f5744abe352c0
+size 4027

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff