arthurmluz commited on
Commit
592f067
1 Parent(s): 5ccddd2

End of training

Browse files
README.md ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ base_model: unicamp-dl/ptt5-base-portuguese-vocab
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - rouge
8
+ model-index:
9
+ - name: ptt5-wikilingua-30epochs
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # ptt5-wikilingua-30epochs
17
+
18
+ This model is a fine-tuned version of [unicamp-dl/ptt5-base-portuguese-vocab](https://huggingface.co/unicamp-dl/ptt5-base-portuguese-vocab) on an unknown dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 1.9063
21
+ - Rouge1: 0.2604
22
+ - Rouge2: 0.1127
23
+ - Rougel: 0.2222
24
+ - Rougelsum: 0.2541
25
+ - Gen Len: 18.4528
26
+
27
+ ## Model description
28
+
29
+ More information needed
30
+
31
+ ## Intended uses & limitations
32
+
33
+ More information needed
34
+
35
+ ## Training and evaluation data
36
+
37
+ More information needed
38
+
39
+ ## Training procedure
40
+
41
+ ### Training hyperparameters
42
+
43
+ The following hyperparameters were used during training:
44
+ - learning_rate: 2e-05
45
+ - train_batch_size: 2
46
+ - eval_batch_size: 2
47
+ - seed: 42
48
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
+ - lr_scheduler_type: linear
50
+ - num_epochs: 30
51
+
52
+ ### Training results
53
+
54
+ | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
55
+ |:-------------:|:-----:|:------:|:---------------:|:------:|:------:|:------:|:---------:|:-------:|
56
+ | 2.1668 | 1.0 | 28580 | 2.0384 | 0.2366 | 0.0935 | 0.2034 | 0.2311 | 18.2195 |
57
+ | 2.0348 | 2.0 | 57160 | 1.9725 | 0.2448 | 0.0998 | 0.2098 | 0.2391 | 18.3898 |
58
+ | 2.0152 | 3.0 | 85740 | 1.9346 | 0.2469 | 0.1024 | 0.2122 | 0.2414 | 18.2427 |
59
+ | 1.9769 | 4.0 | 114320 | 1.9096 | 0.2503 | 0.1047 | 0.2147 | 0.2446 | 18.2773 |
60
+ | 1.8471 | 5.0 | 142900 | 1.8957 | 0.253 | 0.1076 | 0.2169 | 0.2473 | 18.2612 |
61
+ | 1.8504 | 6.0 | 171480 | 1.8840 | 0.2541 | 0.1084 | 0.2179 | 0.2483 | 18.3317 |
62
+ | 1.7456 | 7.0 | 200060 | 1.8768 | 0.2547 | 0.1084 | 0.2183 | 0.2488 | 18.3634 |
63
+ | 1.7254 | 8.0 | 228640 | 1.8747 | 0.2563 | 0.1099 | 0.2196 | 0.2505 | 18.3577 |
64
+ | 1.7742 | 9.0 | 257220 | 1.8739 | 0.2562 | 0.11 | 0.2194 | 0.2504 | 18.3904 |
65
+ | 1.7211 | 10.0 | 285800 | 1.8667 | 0.2572 | 0.1109 | 0.2205 | 0.2513 | 18.3616 |
66
+ | 1.696 | 11.0 | 314380 | 1.8677 | 0.2568 | 0.1112 | 0.2204 | 0.251 | 18.349 |
67
+ | 1.6762 | 12.0 | 342960 | 1.8695 | 0.2571 | 0.1108 | 0.2202 | 0.2513 | 18.3528 |
68
+ | 1.6404 | 13.0 | 371540 | 1.8738 | 0.2582 | 0.1115 | 0.2208 | 0.2523 | 18.3909 |
69
+ | 1.6523 | 14.0 | 400120 | 1.8727 | 0.259 | 0.1118 | 0.2215 | 0.253 | 18.4077 |
70
+ | 1.626 | 15.0 | 428700 | 1.8736 | 0.2596 | 0.1124 | 0.2222 | 0.2537 | 18.4245 |
71
+ | 1.5922 | 16.0 | 457280 | 1.8750 | 0.259 | 0.1123 | 0.2215 | 0.253 | 18.4125 |
72
+ | 1.5345 | 17.0 | 485860 | 1.8783 | 0.2591 | 0.112 | 0.2214 | 0.2529 | 18.4013 |
73
+ | 1.5785 | 18.0 | 514440 | 1.8797 | 0.2588 | 0.112 | 0.2212 | 0.2527 | 18.3965 |
74
+ | 1.5097 | 19.0 | 543020 | 1.8868 | 0.2592 | 0.1115 | 0.221 | 0.2531 | 18.4567 |
75
+ | 1.5091 | 20.0 | 571600 | 1.8851 | 0.2593 | 0.1124 | 0.2216 | 0.2533 | 18.397 |
76
+ | 1.5116 | 21.0 | 600180 | 1.8895 | 0.2599 | 0.1124 | 0.2219 | 0.2537 | 18.4505 |
77
+ | 1.5351 | 22.0 | 628760 | 1.8901 | 0.2606 | 0.113 | 0.2225 | 0.2544 | 18.4369 |
78
+ | 1.5125 | 23.0 | 657340 | 1.8953 | 0.2598 | 0.1125 | 0.2218 | 0.2535 | 18.4273 |
79
+ | 1.5246 | 24.0 | 685920 | 1.8980 | 0.2609 | 0.1129 | 0.2226 | 0.2544 | 18.4464 |
80
+ | 1.5113 | 25.0 | 714500 | 1.8990 | 0.2604 | 0.1127 | 0.2221 | 0.2542 | 18.4562 |
81
+ | 1.4814 | 26.0 | 743080 | 1.9029 | 0.261 | 0.1133 | 0.223 | 0.2547 | 18.4634 |
82
+ | 1.5212 | 27.0 | 771660 | 1.9014 | 0.2606 | 0.1129 | 0.2226 | 0.2544 | 18.4458 |
83
+ | 1.4469 | 28.0 | 800240 | 1.9032 | 0.2609 | 0.1129 | 0.2226 | 0.2546 | 18.4577 |
84
+ | 1.4844 | 29.0 | 828820 | 1.9050 | 0.2602 | 0.1125 | 0.2221 | 0.2539 | 18.4553 |
85
+ | 1.4561 | 30.0 | 857400 | 1.9063 | 0.2604 | 0.1127 | 0.2222 | 0.2541 | 18.4528 |
86
+
87
+
88
+ ### Framework versions
89
+
90
+ - Transformers 4.33.2
91
+ - Pytorch 2.0.1+cu117
92
+ - Datasets 2.14.5
93
+ - Tokenizers 0.13.3
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "unicamp-dl/ptt5-base-portuguese-vocab",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 3072,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "relu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "n_positions": 512,
21
+ "num_decoder_layers": 12,
22
+ "num_heads": 12,
23
+ "num_layers": 12,
24
+ "output_past": true,
25
+ "pad_token_id": 0,
26
+ "relative_attention_max_distance": 128,
27
+ "relative_attention_num_buckets": 32,
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.33.2",
30
+ "use_cache": true,
31
+ "vocab_size": 32128
32
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_start_token_id": 0,
3
+ "eos_token_id": 1,
4
+ "pad_token_id": 0,
5
+ "transformers_version": "4.33.2"
6
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e76a8d1dcac02617cebf6eda6f372bff1ede7e83e51eeb85e182e517c8dbb5e
3
+ size 891702929
special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
107
+ }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcb25b1d67f04fce0e710d58430b606b4ee9887144fa2da22d123c44061cc62e
3
+ size 755569
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "clean_up_tokenization_spaces": true,
105
+ "eos_token": "</s>",
106
+ "extra_ids": 100,
107
+ "legacy": true,
108
+ "model_max_length": 1000000000000000019884624838656,
109
+ "pad_token": "<pad>",
110
+ "sp_model_kwargs": {},
111
+ "tokenizer_class": "T5Tokenizer",
112
+ "unk_token": "<unk>"
113
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9e58f9cc74aede6b1552f5be944b43bb34ae9c4fca232278f26c059da57ead8
3
+ size 4219