mnavas commited on
Commit
d40170e
1 Parent(s): e6cd5a8
app.py CHANGED
@@ -4,8 +4,8 @@ import torch
4
  import numpy as np
5
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
6
 
7
- tokenizer = AutoTokenizer.from_pretrained("oeg/roberta-finetuned-CPV_Spanish")
8
- model = AutoModelForSequenceClassification.from_pretrained("oeg/roberta-finetuned-CPV_Spanish")
9
 
10
  cpv = pd.read_csv("cpv.csv")
11
  df = pd.read_csv("code-desc.csv")
 
4
  import numpy as np
5
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
6
 
7
+ tokenizer = AutoTokenizer.from_pretrained("model")
8
+ model = AutoModelForSequenceClassification.from_pretrained("model")
9
 
10
  cpv = pd.read_csv("cpv.csv")
11
  df = pd.read_csv("code-desc.csv")
model/config.json ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "PlanTL-GOB-ES/roberta-base-bne",
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.0,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "03",
16
+ "1": "09",
17
+ "2": "14",
18
+ "3": "15",
19
+ "4": "16",
20
+ "5": "18",
21
+ "6": "19",
22
+ "7": "22",
23
+ "8": "24",
24
+ "9": "30",
25
+ "10": "31",
26
+ "11": "32",
27
+ "12": "33",
28
+ "13": "34",
29
+ "14": "35",
30
+ "15": "37",
31
+ "16": "38",
32
+ "17": "39",
33
+ "18": "41",
34
+ "19": "42",
35
+ "20": "43",
36
+ "21": "44",
37
+ "22": "45",
38
+ "23": "48",
39
+ "24": "50",
40
+ "25": "51",
41
+ "26": "55",
42
+ "27": "60",
43
+ "28": "63",
44
+ "29": "64",
45
+ "30": "65",
46
+ "31": "66",
47
+ "32": "70",
48
+ "33": "71",
49
+ "34": "72",
50
+ "35": "73",
51
+ "36": "75",
52
+ "37": "76",
53
+ "38": "77",
54
+ "39": "79",
55
+ "40": "80",
56
+ "41": "85",
57
+ "42": "90",
58
+ "43": "92",
59
+ "44": "98"
60
+ },
61
+ "initializer_range": 0.02,
62
+ "intermediate_size": 3072,
63
+ "label2id": {
64
+ "03": 0,
65
+ "09": 1,
66
+ "14": 2,
67
+ "15": 3,
68
+ "16": 4,
69
+ "18": 5,
70
+ "19": 6,
71
+ "22": 7,
72
+ "24": 8,
73
+ "30": 9,
74
+ "31": 10,
75
+ "32": 11,
76
+ "33": 12,
77
+ "34": 13,
78
+ "35": 14,
79
+ "37": 15,
80
+ "38": 16,
81
+ "39": 17,
82
+ "41": 18,
83
+ "42": 19,
84
+ "43": 20,
85
+ "44": 21,
86
+ "45": 22,
87
+ "48": 23,
88
+ "50": 24,
89
+ "51": 25,
90
+ "55": 26,
91
+ "60": 27,
92
+ "63": 28,
93
+ "64": 29,
94
+ "65": 30,
95
+ "66": 31,
96
+ "70": 32,
97
+ "71": 33,
98
+ "72": 34,
99
+ "73": 35,
100
+ "75": 36,
101
+ "76": 37,
102
+ "77": 38,
103
+ "79": 39,
104
+ "80": 40,
105
+ "85": 41,
106
+ "90": 42,
107
+ "92": 43,
108
+ "98": 44
109
+ },
110
+ "layer_norm_eps": 1e-05,
111
+ "max_position_embeddings": 514,
112
+ "model_type": "roberta",
113
+ "num_attention_heads": 12,
114
+ "num_hidden_layers": 12,
115
+ "pad_token_id": 1,
116
+ "position_embedding_type": "absolute",
117
+ "problem_type": "multi_label_classification",
118
+ "torch_dtype": "float32",
119
+ "transformers_version": "4.16.2",
120
+ "type_vocab_size": 1,
121
+ "use_cache": true,
122
+ "vocab_size": 50262
123
+ }
model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec3b704a4ba9c6b5a13fd4000da3684ea15e1d9f446da3b7a008ab1e702ed3fd
3
+ size 498797101
model/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}}
model/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
model/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "errors": "replace", "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "trim_offsets": true, "max_len": 512, "special_tokens_map_file": null, "name_or_path": "PlanTL-GOB-ES/roberta-base-bne", "tokenizer_class": "RobertaTokenizer"}
model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de5181bca1f3a92e7db43208603a46f084990047a8236f7ddce2a6fa818dd85b
3
+ size 3055
model/vocab.json ADDED
The diff for this file is too large to render. See raw diff