GuyCalledMav commited on
Commit
9c4c6ae
1 Parent(s): 188078c

Upload 12 files

Browse files
added_tokens.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "[CLS]": 30002,
3
+ "[MASK]": 30003,
4
+ "[PAD]": 30001,
5
+ "[SEP]": 30000
6
+ }
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "RobertaForMaskedLM"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "roberta",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 1,
20
+ "position_embedding_type": "absolute",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.24.0",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30004
26
+ }
eval_results.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ eval_loss = 3.955014076278108
2
+ perplexity = tensor(52.1964)
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model_args.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"adafactor_beta1": null, "adafactor_clip_threshold": 1.0, "adafactor_decay_rate": -0.8, "adafactor_eps": [1e-30, 0.001], "adafactor_relative_step": true, "adafactor_scale_parameter": true, "adafactor_warmup_init": true, "adam_betas": [0.9, 0.999], "adam_epsilon": 1e-08, "best_model_dir": "RoBERTa-ceb/best_model", "cache_dir": "cache_dir/", "config": {}, "cosine_schedule_num_cycles": 0.5, "custom_layer_parameters": [], "custom_parameter_groups": [], "dataloader_num_workers": 0, "do_lower_case": false, "dynamic_quantize": false, "early_stopping_consider_epochs": false, "early_stopping_delta": 0, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 3, "encoding": null, "eval_batch_size": 160, "evaluate_during_training": true, "evaluate_during_training_silent": true, "evaluate_during_training_steps": 20000, "evaluate_during_training_verbose": false, "evaluate_each_epoch": true, "fp16": true, "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "loss_type": null, "loss_args": {}, "manual_seed": null, "max_grad_norm": 1.0, "max_seq_length": 128, "model_name": null, "model_type": "roberta", "multiprocessing_chunksize": -1, "n_gpu": 1, "no_cache": false, "no_save": false, "not_saved_args": [], "num_train_epochs": 100, "optimizer": "AdamW", "output_dir": "BERT-ceb", "overwrite_output_dir": true, "polynomial_decay_schedule_lr_end": 1e-07, "polynomial_decay_schedule_power": 1.0, "process_count": 38, "quantized_model": false, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": true, "save_model_every_epoch": true, "save_optimizer_and_scheduler": true, "save_steps": 5000, "scheduler": "linear_schedule_with_warmup", "silent": false, "skip_special_tokens": true, "tensorboard_dir": null, "thread_count": null, "tokenizer_name": "BERT-ceb", "tokenizer_type": null, "train_batch_size": 160, "train_custom_parameters_only": false, "use_cached_eval_features": false, "use_early_stopping": false, "use_hf_datasets": false, "use_multiprocessing": true, "use_multiprocessing_for_evaluation": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 5040, "weight_decay": 0.0, "model_class": "LanguageModelingModel", "block_size": 128, "config_name": null, "dataset_class": null, "dataset_type": "simple", "discriminator_config": {}, "discriminator_loss_weight": 50.0, "generator_config": {}, "max_steps": -1, "min_frequency": 2, "mlm": true, "mlm_probability": 0.15, "sliding_window": false, "special_tokens": ["<s>", "<pad>", "</s>", "<unk>", "<mask>"], "stride": 0.8, "tie_generator_and_discriminator_embeddings": true, "vocab_size": 30000, "clean_text": true, "handle_chinese_chars": true, "special_tokens_list": [], "strip_accents": true}
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4cd7710a78d7e0382d1fa8328b2d917b051725cf3566e916c9bfe39ef344de4
3
+ size 436537529
special_tokens_map.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": "[CLS]",
10
+ "eos_token": {
11
+ "content": "</s>",
12
+ "lstrip": false,
13
+ "normalized": true,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ "mask_token": "[MASK]",
18
+ "pad_token": "[PAD]",
19
+ "sep_token": "[SEP]",
20
+ "unk_token": "[UNK]"
21
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "cls_token": {
12
+ "__type": "AddedToken",
13
+ "content": "[CLS]",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "do_basic_tokenize": true,
20
+ "do_lower_case": true,
21
+ "eos_token": {
22
+ "__type": "AddedToken",
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": true,
26
+ "rstrip": false,
27
+ "single_word": false
28
+ },
29
+ "errors": "replace",
30
+ "mask_token": {
31
+ "__type": "AddedToken",
32
+ "content": "[MASK]",
33
+ "lstrip": true,
34
+ "normalized": true,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ "name_or_path": "BERT-ceb",
39
+ "never_split": null,
40
+ "pad_token": {
41
+ "__type": "AddedToken",
42
+ "content": "[PAD]",
43
+ "lstrip": false,
44
+ "normalized": true,
45
+ "rstrip": false,
46
+ "single_word": false
47
+ },
48
+ "sep_token": {
49
+ "__type": "AddedToken",
50
+ "content": "[SEP]",
51
+ "lstrip": false,
52
+ "normalized": true,
53
+ "rstrip": false,
54
+ "single_word": false
55
+ },
56
+ "special_tokens_map_file": null,
57
+ "strip_accents": null,
58
+ "tokenize_chinese_chars": true,
59
+ "tokenizer_class": "RobertaTokenizer",
60
+ "unk_token": {
61
+ "__type": "AddedToken",
62
+ "content": "[UNK]",
63
+ "lstrip": false,
64
+ "normalized": true,
65
+ "rstrip": false,
66
+ "single_word": false
67
+ }
68
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d36f08f3d1d6f259142f076ba71f24ef1883c5f06bb4f50e40b965b7e9476161
3
+ size 3387
training_progress_scores.csv ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ global_step,perplexity,eval_loss,train_loss
2
+ 840,tensor(5513.1270),8.614887404780816,8.615015983581543
3
+ 1680,tensor(2866.9822),7.961015423327261,7.8731489181518555
4
+ 2520,tensor(2383.4500),7.776304355729812,7.76918888092041
5
+ 3360,tensor(2167.2241),7.681202540465441,7.873549938201904
6
+ 4200,tensor(2011.2488),7.606511034671729,7.7114129066467285
7
+ 5040,tensor(1925.7876),7.563090380899149,7.557165145874023
8
+ 5880,tensor(1873.8759),7.535764445625775,7.542567253112793
9
+ 6720,tensor(1828.3774),7.511184253963814,7.672656536102295
10
+ 7560,tensor(1783.2251),7.4861786467204166,7.458953857421875
11
+ 8400,tensor(1500.5310),7.313574164964576,7.289700508117676
12
+ 9240,tensor(1025.8199),6.933247593341846,7.138363361358643
13
+ 10080,tensor(850.7896),6.74616496370867,6.698551177978516
14
+ 10920,tensor(718.7275),6.577482442720242,6.610429763793945
15
+ 11760,tensor(591.0831),6.381956342272284,6.376274585723877
16
+ 12600,tensor(477.7185),6.16902169458109,6.171719551086426
17
+ 13440,tensor(357.5952),5.879401738044775,6.041259765625
18
+ 14280,tensor(300.5546),5.705629161183868,5.919500827789307
19
+ 15120,tensor(259.2738),5.557884844558499,5.610713958740234
20
+ 15960,tensor(236.5213),5.46603816380433,5.657792091369629
21
+ 16800,tensor(216.0584),5.375549020360431,5.34259033203125
22
+ 17640,tensor(199.4774),5.295700963639535,5.249312400817871
23
+ 18480,tensor(186.8399),5.230252159715263,5.466444492340088
24
+ 19320,tensor(174.4325),5.161537850637571,5.218142032623291
25
+ 20000,tensor(165.4119),5.108438900861695,5.233940601348877
26
+ 20160,tensor(164.5391),5.103147904454814,5.230566024780273
27
+ 21000,tensor(154.7863),5.042045109644885,5.188614845275879
28
+ 21840,tensor(146.6164),4.9878196422522665,5.2135701179504395
29
+ 22680,tensor(140.0771),4.942193148825406,4.814875602722168
30
+ 23520,tensor(134.7099),4.903123423951497,4.779435157775879
31
+ 24360,tensor(127.4303),4.8475694950158,4.873952388763428
32
+ 25200,tensor(122.9376),4.811676481888758,4.701533317565918
33
+ 26040,tensor(118.4770),4.77471930381811,4.734566688537598
34
+ 26880,tensor(113.0559),4.727882242880726,4.733781814575195
35
+ 27720,tensor(108.0131),4.682252737018169,4.784864902496338
36
+ 28560,tensor(105.1070),4.654978862870926,4.570435523986816
37
+ 29400,tensor(102.5748),4.6305917653992275,4.797003746032715
38
+ 30240,tensor(98.3004),4.588028168791278,4.692539691925049
39
+ 31080,tensor(95.5100),4.559231265461276,4.472110271453857
40
+ 31920,tensor(92.8367),4.530842494060643,4.457094192504883
41
+ 32760,tensor(91.0269),4.511155435824281,4.4847493171691895
42
+ 33600,tensor(89.1943),4.490817592042317,4.467089653015137
43
+ 34440,tensor(85.9484),4.453746852151591,4.714705467224121
44
+ 35280,tensor(84.4949),4.436690606212164,4.459835529327393
45
+ 36120,tensor(83.1834),4.4210476332931155,4.568218231201172
46
+ 36960,tensor(81.0199),4.394695270682963,4.207172870635986
47
+ 37800,tensor(78.6906),4.3655232479222015,4.555455207824707
48
+ 38640,tensor(77.7475),4.353466031675655,4.264974594116211
49
+ 39480,tensor(75.5957),4.32539894456547,4.3341593742370605
50
+ 40000,tensor(76.1601),4.3328377375670515,4.343103408813477
51
+ 40320,tensor(75.1305),4.3192271779498785,4.320957183837891
52
+ 41160,tensor(73.8844),4.302502227620491,4.186777591705322
53
+ 42000,tensor(72.7903),4.287583556785402,4.321813583374023
54
+ 42840,tensor(71.3143),4.2670973850087535,4.38191556930542
55
+ 43680,tensor(70.2743),4.2524054344231486,4.150886535644531
56
+ 44520,tensor(69.5957),4.242702348537355,4.198707580566406
57
+ 45360,tensor(67.9366),4.218575100198176,4.0208282470703125
58
+ 46200,tensor(68.2866),4.223713122272944,4.201685428619385
59
+ 47040,tensor(66.6248),4.1990775677830126,4.237384796142578
60
+ 47880,tensor(66.2193),4.192972441985144,4.080409049987793
61
+ 48720,tensor(65.3626),4.179950529930151,4.410434722900391
62
+ 49560,tensor(64.7271),4.170180269892182,4.0652947425842285
63
+ 50400,tensor(64.2023),4.162038403099747,4.079613208770752
64
+ 51240,tensor(62.7907),4.139807715800136,4.082955360412598
65
+ 52080,tensor(62.3912),4.133424125011499,4.0272297859191895
66
+ 52920,tensor(62.1102),4.12891009633575,4.157703399658203
67
+ 53760,tensor(61.3228),4.116151399522031,4.108161926269531
68
+ 54600,tensor(60.7504),4.106774397935912,4.0041704177856445
69
+ 55440,tensor(60.3719),4.1005239068614365,4.1067047119140625
70
+ 56280,tensor(59.6579),4.08862609094918,4.0335893630981445
71
+ 57120,tensor(59.8366),4.091618014737893,4.1296000480651855
72
+ 57960,tensor(59.0609),4.078569598672514,4.050683498382568
73
+ 58800,tensor(58.5802),4.070397336336109,4.029040813446045
74
+ 59640,tensor(58.5353),4.0696296974381,4.057146072387695
75
+ 60000,tensor(58.2795),4.065249836275363,4.020354747772217
76
+ 60480,tensor(58.3460),4.066390033016837,3.941168785095215
77
+ 61320,tensor(57.4409),4.050756845429046,4.060215950012207
78
+ 62160,tensor(56.9148),4.041554759463993,3.8870997428894043
79
+ 63000,tensor(56.7257),4.038228142318002,4.010282039642334
80
+ 63840,tensor(56.4911),4.034082424019186,3.8918683528900146
81
+ 64680,tensor(55.8974),4.023517806383106,3.913522481918335
82
+ 65520,tensor(55.8921),4.023423032173048,3.9048819541931152
83
+ 66360,tensor(55.6332),4.018779180626169,4.0070977210998535
84
+ 67200,tensor(55.3677),4.013996507319229,3.8241348266601562
85
+ 68040,tensor(55.1016),4.009178212468658,3.8613393306732178
86
+ 68880,tensor(54.4008),3.9963787630271006,3.984696388244629
87
+ 69720,tensor(54.6007),4.000046700662911,3.971323251724243
88
+ 70560,tensor(54.7101),4.002047452881438,3.8017985820770264
89
+ 71400,tensor(54.2510),3.993622047641266,4.045064926147461
90
+ 72240,tensor(54.2700),3.9939723986584994,3.7902510166168213
91
+ 73080,tensor(53.8975),3.987083983082342,4.033565998077393
92
+ 73920,tensor(53.4055),3.97791349718356,3.8635339736938477
93
+ 74760,tensor(53.2877),3.975705491423042,4.0212907791137695
94
+ 75600,tensor(53.2083),3.9742152747384747,3.9361703395843506
95
+ 76440,tensor(53.2604),3.975193798824509,3.7788941860198975
96
+ 77280,tensor(53.3057),3.9760427565371255,3.9743189811706543
97
+ 78120,tensor(52.6205),3.9631065192380786,3.8015661239624023
98
+ 78960,tensor(52.6849),3.964329612198599,3.915081262588501
99
+ 79800,tensor(53.0415),3.971074112218703,4.116347312927246
100
+ 80000,tensor(52.3625),3.9581908987596703,3.8000710010528564
101
+ 80640,tensor(52.4477),3.959817202735286,3.951565742492676
102
+ 81480,tensor(52.9591),3.969520092010498,3.83418345451355
103
+ 82320,tensor(52.5453),3.961675506067502,3.841287851333618
104
+ 83160,tensor(52.6411),3.963497509888563,4.05776309967041
105
+ 84000,tensor(52.3799),3.9585225909807105,3.8181488513946533
vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
vocab.txt ADDED
The diff for this file is too large to render. See raw diff