tiagoblima commited on
Commit
92fa24f
1 Parent(s): 95f84a2

End of training

Browse files
Files changed (5) hide show
  1. README.md +3 -1
  2. all_results.json +6 -6
  3. eval_results.json +3 -3
  4. train_results.json +3 -3
  5. trainer_state.json +18 -18
README.md CHANGED
@@ -3,6 +3,8 @@ license: mit
3
  base_model: facebook/mbart-large-50
4
  tags:
5
  - generated_from_trainer
 
 
6
  model-index:
7
  - name: mbart50-qg-aas
8
  results: []
@@ -13,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # mbart50-qg-aas
15
 
16
- This model is a fine-tuned version of [facebook/mbart-large-50](https://huggingface.co/facebook/mbart-large-50) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
  - Loss: 5.1971
19
 
 
3
  base_model: facebook/mbart-large-50
4
  tags:
5
  - generated_from_trainer
6
+ datasets:
7
+ - tiagoblima/qg_squad_v1_pt
8
  model-index:
9
  - name: mbart50-qg-aas
10
  results: []
 
15
 
16
  # mbart50-qg-aas
17
 
18
+ This model is a fine-tuned version of [facebook/mbart-large-50](https://huggingface.co/facebook/mbart-large-50) on the tiagoblima/qg_squad_v1_pt dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 5.1971
21
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 5.0,
3
  "eval_loss": 5.197052955627441,
4
- "eval_runtime": 198.4096,
5
  "eval_samples": 6327,
6
- "eval_samples_per_second": 31.889,
7
- "eval_steps_per_second": 7.973,
8
  "train_loss": 4.983092109755714,
9
- "train_runtime": 9308.9617,
10
  "train_samples": 51704,
11
- "train_samples_per_second": 27.771,
12
- "train_steps_per_second": 0.434
13
  }
 
1
  {
2
  "epoch": 5.0,
3
  "eval_loss": 5.197052955627441,
4
+ "eval_runtime": 198.6642,
5
  "eval_samples": 6327,
6
+ "eval_samples_per_second": 31.848,
7
+ "eval_steps_per_second": 7.963,
8
  "train_loss": 4.983092109755714,
9
+ "train_runtime": 9355.0169,
10
  "train_samples": 51704,
11
+ "train_samples_per_second": 27.634,
12
+ "train_steps_per_second": 0.432
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
  "eval_loss": 5.197052955627441,
4
- "eval_runtime": 198.4096,
5
  "eval_samples": 6327,
6
- "eval_samples_per_second": 31.889,
7
- "eval_steps_per_second": 7.973
8
  }
 
1
  {
2
  "epoch": 5.0,
3
  "eval_loss": 5.197052955627441,
4
+ "eval_runtime": 198.6642,
5
  "eval_samples": 6327,
6
+ "eval_samples_per_second": 31.848,
7
+ "eval_steps_per_second": 7.963
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
  "train_loss": 4.983092109755714,
4
- "train_runtime": 9308.9617,
5
  "train_samples": 51704,
6
- "train_samples_per_second": 27.771,
7
- "train_steps_per_second": 0.434
8
  }
 
1
  {
2
  "epoch": 5.0,
3
  "train_loss": 4.983092109755714,
4
+ "train_runtime": 9355.0169,
5
  "train_samples": 51704,
6
+ "train_samples_per_second": 27.634,
7
+ "train_steps_per_second": 0.432
8
  }
trainer_state.json CHANGED
@@ -59,9 +59,9 @@
59
  {
60
  "epoch": 1.0,
61
  "eval_loss": 8.01647663116455,
62
- "eval_runtime": 198.6559,
63
- "eval_samples_per_second": 31.849,
64
- "eval_steps_per_second": 7.964,
65
  "step": 808
66
  },
67
  {
@@ -115,9 +115,9 @@
115
  {
116
  "epoch": 2.0,
117
  "eval_loss": 7.2192888259887695,
118
- "eval_runtime": 198.5597,
119
- "eval_samples_per_second": 31.864,
120
- "eval_steps_per_second": 7.967,
121
  "step": 1616
122
  },
123
  {
@@ -171,9 +171,9 @@
171
  {
172
  "epoch": 3.0,
173
  "eval_loss": 6.484571933746338,
174
- "eval_runtime": 198.533,
175
- "eval_samples_per_second": 31.869,
176
- "eval_steps_per_second": 7.968,
177
  "step": 2424
178
  },
179
  {
@@ -227,9 +227,9 @@
227
  {
228
  "epoch": 4.0,
229
  "eval_loss": 5.73320198059082,
230
- "eval_runtime": 198.2238,
231
- "eval_samples_per_second": 31.918,
232
- "eval_steps_per_second": 7.981,
233
  "step": 3232
234
  },
235
  {
@@ -283,9 +283,9 @@
283
  {
284
  "epoch": 5.0,
285
  "eval_loss": 5.197052955627441,
286
- "eval_runtime": 198.3119,
287
- "eval_samples_per_second": 31.904,
288
- "eval_steps_per_second": 7.977,
289
  "step": 4040
290
  },
291
  {
@@ -293,9 +293,9 @@
293
  "step": 4040,
294
  "total_flos": 3.6330926562607104e+17,
295
  "train_loss": 4.983092109755714,
296
- "train_runtime": 9308.9617,
297
- "train_samples_per_second": 27.771,
298
- "train_steps_per_second": 0.434
299
  }
300
  ],
301
  "logging_steps": 100,
 
59
  {
60
  "epoch": 1.0,
61
  "eval_loss": 8.01647663116455,
62
+ "eval_runtime": 198.4343,
63
+ "eval_samples_per_second": 31.885,
64
+ "eval_steps_per_second": 7.972,
65
  "step": 808
66
  },
67
  {
 
115
  {
116
  "epoch": 2.0,
117
  "eval_loss": 7.2192888259887695,
118
+ "eval_runtime": 198.6961,
119
+ "eval_samples_per_second": 31.843,
120
+ "eval_steps_per_second": 7.962,
121
  "step": 1616
122
  },
123
  {
 
171
  {
172
  "epoch": 3.0,
173
  "eval_loss": 6.484571933746338,
174
+ "eval_runtime": 198.9778,
175
+ "eval_samples_per_second": 31.798,
176
+ "eval_steps_per_second": 7.951,
177
  "step": 2424
178
  },
179
  {
 
227
  {
228
  "epoch": 4.0,
229
  "eval_loss": 5.73320198059082,
230
+ "eval_runtime": 198.6381,
231
+ "eval_samples_per_second": 31.852,
232
+ "eval_steps_per_second": 7.964,
233
  "step": 3232
234
  },
235
  {
 
283
  {
284
  "epoch": 5.0,
285
  "eval_loss": 5.197052955627441,
286
+ "eval_runtime": 198.6931,
287
+ "eval_samples_per_second": 31.843,
288
+ "eval_steps_per_second": 7.962,
289
  "step": 4040
290
  },
291
  {
 
293
  "step": 4040,
294
  "total_flos": 3.6330926562607104e+17,
295
  "train_loss": 4.983092109755714,
296
+ "train_runtime": 9355.0169,
297
+ "train_samples_per_second": 27.634,
298
+ "train_steps_per_second": 0.432
299
  }
300
  ],
301
  "logging_steps": 100,