Tidzo commited on
Commit
bb2ccf9
1 Parent(s): 5fff3af

End of training

Browse files
Files changed (5) hide show
  1. README.md +3 -3
  2. all_results.json +9 -9
  3. eval_results.json +5 -5
  4. train_results.json +4 -4
  5. trainer_state.json +53 -53
README.md CHANGED
@@ -21,7 +21,7 @@ model-index:
21
  metrics:
22
  - name: Accuracy
23
  type: accuracy
24
- value: 0.8977272727272727
25
  ---
26
 
27
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -31,8 +31,8 @@ should probably proofread and complete it, then remove this comment. -->
31
 
32
  This model is a fine-tuned version of [Tidzo/dit-base-rvlcdip-finetuned-grp-actual](https://huggingface.co/Tidzo/dit-base-rvlcdip-finetuned-grp-actual) on the imagefolder dataset.
33
  It achieves the following results on the evaluation set:
34
- - Loss: 0.4582
35
- - Accuracy: 0.8977
36
 
37
  ## Model description
38
 
 
21
  metrics:
22
  - name: Accuracy
23
  type: accuracy
24
+ value: 0.9015151515151515
25
  ---
26
 
27
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
31
 
32
  This model is a fine-tuned version of [Tidzo/dit-base-rvlcdip-finetuned-grp-actual](https://huggingface.co/Tidzo/dit-base-rvlcdip-finetuned-grp-actual) on the imagefolder dataset.
33
  It achieves the following results on the evaluation set:
34
+ - Loss: 0.4601
35
+ - Accuracy: 0.9015
36
 
37
  ## Model description
38
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 6.72,
3
- "eval_accuracy": 0.7575757575757576,
4
- "eval_loss": 1.3033273220062256,
5
- "eval_runtime": 14.5204,
6
- "eval_samples_per_second": 18.181,
7
- "eval_steps_per_second": 0.62,
8
  "total_flos": 1.2388649195611423e+18,
9
- "train_loss": 1.7003454405163962,
10
- "train_runtime": 3667.1764,
11
- "train_samples_per_second": 4.535,
12
- "train_steps_per_second": 0.034
13
  }
 
1
  {
2
  "epoch": 6.72,
3
+ "eval_accuracy": 0.9015151515151515,
4
+ "eval_loss": 0.46005359292030334,
5
+ "eval_runtime": 17.5331,
6
+ "eval_samples_per_second": 15.057,
7
+ "eval_steps_per_second": 0.513,
8
  "total_flos": 1.2388649195611423e+18,
9
+ "train_loss": 0.7077309669010223,
10
+ "train_runtime": 1389.7418,
11
+ "train_samples_per_second": 11.968,
12
+ "train_steps_per_second": 0.091
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 6.72,
3
- "eval_accuracy": 0.7575757575757576,
4
- "eval_loss": 1.3033273220062256,
5
- "eval_runtime": 14.5204,
6
- "eval_samples_per_second": 18.181,
7
- "eval_steps_per_second": 0.62
8
  }
 
1
  {
2
  "epoch": 6.72,
3
+ "eval_accuracy": 0.9015151515151515,
4
+ "eval_loss": 0.46005359292030334,
5
+ "eval_runtime": 17.5331,
6
+ "eval_samples_per_second": 15.057,
7
+ "eval_steps_per_second": 0.513
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 6.72,
3
  "total_flos": 1.2388649195611423e+18,
4
- "train_loss": 1.7003454405163962,
5
- "train_runtime": 3667.1764,
6
- "train_samples_per_second": 4.535,
7
- "train_steps_per_second": 0.034
8
  }
 
1
  {
2
  "epoch": 6.72,
3
  "total_flos": 1.2388649195611423e+18,
4
+ "train_loss": 0.7077309669010223,
5
+ "train_runtime": 1389.7418,
6
+ "train_samples_per_second": 11.968,
7
+ "train_steps_per_second": 0.091
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.7575757575757576,
3
- "best_model_checkpoint": "dit-base-rvlcdip-finetuned-grp-actual/checkpoint-93",
4
  "epoch": 6.72,
5
  "eval_steps": 500,
6
  "global_step": 126,
@@ -11,146 +11,146 @@
11
  {
12
  "epoch": 0.53,
13
  "learning_rate": 3.846153846153846e-05,
14
- "loss": 2.3577,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.96,
19
- "eval_accuracy": 0.5113636363636364,
20
- "eval_loss": 2.086306571960449,
21
- "eval_runtime": 240.3077,
22
- "eval_samples_per_second": 1.099,
23
- "eval_steps_per_second": 0.037,
24
  "step": 18
25
  },
26
  {
27
  "epoch": 1.07,
28
  "learning_rate": 4.690265486725664e-05,
29
- "loss": 2.2163,
30
  "step": 20
31
  },
32
  {
33
  "epoch": 1.6,
34
  "learning_rate": 4.247787610619469e-05,
35
- "loss": 2.0601,
36
  "step": 30
37
  },
38
  {
39
  "epoch": 1.97,
40
- "eval_accuracy": 0.6477272727272727,
41
- "eval_loss": 1.8153679370880127,
42
- "eval_runtime": 14.6331,
43
- "eval_samples_per_second": 18.041,
44
- "eval_steps_per_second": 0.615,
45
  "step": 37
46
  },
47
  {
48
  "epoch": 2.13,
49
  "learning_rate": 3.8053097345132744e-05,
50
- "loss": 1.943,
51
  "step": 40
52
  },
53
  {
54
  "epoch": 2.67,
55
  "learning_rate": 3.3628318584070804e-05,
56
- "loss": 1.8068,
57
  "step": 50
58
  },
59
  {
60
  "epoch": 2.99,
61
- "eval_accuracy": 0.6704545454545454,
62
- "eval_loss": 1.5881296396255493,
63
- "eval_runtime": 14.8318,
64
- "eval_samples_per_second": 17.8,
65
- "eval_steps_per_second": 0.607,
66
  "step": 56
67
  },
68
  {
69
  "epoch": 3.2,
70
  "learning_rate": 2.9203539823008852e-05,
71
- "loss": 1.66,
72
  "step": 60
73
  },
74
  {
75
  "epoch": 3.73,
76
  "learning_rate": 2.4778761061946905e-05,
77
- "loss": 1.5953,
78
  "step": 70
79
  },
80
  {
81
  "epoch": 4.0,
82
- "eval_accuracy": 0.7159090909090909,
83
- "eval_loss": 1.4111517667770386,
84
- "eval_runtime": 14.6707,
85
- "eval_samples_per_second": 17.995,
86
- "eval_steps_per_second": 0.613,
87
  "step": 75
88
  },
89
  {
90
  "epoch": 4.27,
91
  "learning_rate": 2.0353982300884957e-05,
92
- "loss": 1.4929,
93
  "step": 80
94
  },
95
  {
96
  "epoch": 4.8,
97
  "learning_rate": 1.592920353982301e-05,
98
- "loss": 1.4304,
99
  "step": 90
100
  },
101
  {
102
  "epoch": 4.96,
103
- "eval_accuracy": 0.7575757575757576,
104
- "eval_loss": 1.3033273220062256,
105
- "eval_runtime": 14.9724,
106
- "eval_samples_per_second": 17.632,
107
- "eval_steps_per_second": 0.601,
108
  "step": 93
109
  },
110
  {
111
  "epoch": 5.33,
112
  "learning_rate": 1.1504424778761062e-05,
113
- "loss": 1.3606,
114
  "step": 100
115
  },
116
  {
117
  "epoch": 5.87,
118
  "learning_rate": 7.079646017699115e-06,
119
- "loss": 1.3458,
120
  "step": 110
121
  },
122
  {
123
  "epoch": 5.97,
124
- "eval_accuracy": 0.75,
125
- "eval_loss": 1.2400753498077393,
126
- "eval_runtime": 14.8483,
127
- "eval_samples_per_second": 17.78,
128
- "eval_steps_per_second": 0.606,
129
  "step": 112
130
  },
131
  {
132
  "epoch": 6.4,
133
  "learning_rate": 2.6548672566371683e-06,
134
- "loss": 1.3523,
135
  "step": 120
136
  },
137
  {
138
  "epoch": 6.72,
139
- "eval_accuracy": 0.7575757575757576,
140
- "eval_loss": 1.2240339517593384,
141
- "eval_runtime": 14.9425,
142
- "eval_samples_per_second": 17.668,
143
- "eval_steps_per_second": 0.602,
144
  "step": 126
145
  },
146
  {
147
  "epoch": 6.72,
148
  "step": 126,
149
  "total_flos": 1.2388649195611423e+18,
150
- "train_loss": 1.7003454405163962,
151
- "train_runtime": 3667.1764,
152
- "train_samples_per_second": 4.535,
153
- "train_steps_per_second": 0.034
154
  }
155
  ],
156
  "logging_steps": 10,
 
1
  {
2
+ "best_metric": 0.9015151515151515,
3
+ "best_model_checkpoint": "dit-base-rvlcdip-finetuned-grp-actual/checkpoint-112",
4
  "epoch": 6.72,
5
  "eval_steps": 500,
6
  "global_step": 126,
 
11
  {
12
  "epoch": 0.53,
13
  "learning_rate": 3.846153846153846e-05,
14
+ "loss": 0.8692,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.96,
19
+ "eval_accuracy": 0.8560606060606061,
20
+ "eval_loss": 0.6972441673278809,
21
+ "eval_runtime": 15.3616,
22
+ "eval_samples_per_second": 17.186,
23
+ "eval_steps_per_second": 0.586,
24
  "step": 18
25
  },
26
  {
27
  "epoch": 1.07,
28
  "learning_rate": 4.690265486725664e-05,
29
+ "loss": 0.7995,
30
  "step": 20
31
  },
32
  {
33
  "epoch": 1.6,
34
  "learning_rate": 4.247787610619469e-05,
35
+ "loss": 0.7348,
36
  "step": 30
37
  },
38
  {
39
  "epoch": 1.97,
40
+ "eval_accuracy": 0.8598484848484849,
41
+ "eval_loss": 0.6350475549697876,
42
+ "eval_runtime": 16.1781,
43
+ "eval_samples_per_second": 16.318,
44
+ "eval_steps_per_second": 0.556,
45
  "step": 37
46
  },
47
  {
48
  "epoch": 2.13,
49
  "learning_rate": 3.8053097345132744e-05,
50
+ "loss": 0.6855,
51
  "step": 40
52
  },
53
  {
54
  "epoch": 2.67,
55
  "learning_rate": 3.3628318584070804e-05,
56
+ "loss": 0.6655,
57
  "step": 50
58
  },
59
  {
60
  "epoch": 2.99,
61
+ "eval_accuracy": 0.8712121212121212,
62
+ "eval_loss": 0.533867359161377,
63
+ "eval_runtime": 15.3125,
64
+ "eval_samples_per_second": 17.241,
65
+ "eval_steps_per_second": 0.588,
66
  "step": 56
67
  },
68
  {
69
  "epoch": 3.2,
70
  "learning_rate": 2.9203539823008852e-05,
71
+ "loss": 0.7083,
72
  "step": 60
73
  },
74
  {
75
  "epoch": 3.73,
76
  "learning_rate": 2.4778761061946905e-05,
77
+ "loss": 0.7167,
78
  "step": 70
79
  },
80
  {
81
  "epoch": 4.0,
82
+ "eval_accuracy": 0.8901515151515151,
83
+ "eval_loss": 0.5045967102050781,
84
+ "eval_runtime": 15.2445,
85
+ "eval_samples_per_second": 17.318,
86
+ "eval_steps_per_second": 0.59,
87
  "step": 75
88
  },
89
  {
90
  "epoch": 4.27,
91
  "learning_rate": 2.0353982300884957e-05,
92
+ "loss": 0.6926,
93
  "step": 80
94
  },
95
  {
96
  "epoch": 4.8,
97
  "learning_rate": 1.592920353982301e-05,
98
+ "loss": 0.694,
99
  "step": 90
100
  },
101
  {
102
  "epoch": 4.96,
103
+ "eval_accuracy": 0.8863636363636364,
104
+ "eval_loss": 0.5026077628135681,
105
+ "eval_runtime": 15.2119,
106
+ "eval_samples_per_second": 17.355,
107
+ "eval_steps_per_second": 0.592,
108
  "step": 93
109
  },
110
  {
111
  "epoch": 5.33,
112
  "learning_rate": 1.1504424778761062e-05,
113
+ "loss": 0.6366,
114
  "step": 100
115
  },
116
  {
117
  "epoch": 5.87,
118
  "learning_rate": 7.079646017699115e-06,
119
+ "loss": 0.6638,
120
  "step": 110
121
  },
122
  {
123
  "epoch": 5.97,
124
+ "eval_accuracy": 0.9015151515151515,
125
+ "eval_loss": 0.46005359292030334,
126
+ "eval_runtime": 15.7633,
127
+ "eval_samples_per_second": 16.748,
128
+ "eval_steps_per_second": 0.571,
129
  "step": 112
130
  },
131
  {
132
  "epoch": 6.4,
133
  "learning_rate": 2.6548672566371683e-06,
134
+ "loss": 0.6618,
135
  "step": 120
136
  },
137
  {
138
  "epoch": 6.72,
139
+ "eval_accuracy": 0.8977272727272727,
140
+ "eval_loss": 0.45817553997039795,
141
+ "eval_runtime": 17.722,
142
+ "eval_samples_per_second": 14.897,
143
+ "eval_steps_per_second": 0.508,
144
  "step": 126
145
  },
146
  {
147
  "epoch": 6.72,
148
  "step": 126,
149
  "total_flos": 1.2388649195611423e+18,
150
+ "train_loss": 0.7077309669010223,
151
+ "train_runtime": 1389.7418,
152
+ "train_samples_per_second": 11.968,
153
+ "train_steps_per_second": 0.091
154
  }
155
  ],
156
  "logging_steps": 10,