Galuh Sahid
commited on
Commit
•
d354c6f
1
Parent(s):
e551781
Add checkpoints from experiments
Browse files- logs/events.out.tfevents.1629166761.t1v-n-aa35e684-w-0.1228888.3.v2 +3 -0
- training_v1_unfreeze/config.json +171 -0
- training_v1_unfreeze/flax_model.msgpack +3 -0
- training_v1_unfreeze/logs/events.out.tfevents.1629082441.t1v-n-aa35e684-w-0.589252.3.v2 +3 -0
- training_v2/ckpt-13499/config.json +171 -0
- training_v2/ckpt-13499/flax_model.msgpack +3 -0
- training_v2/ckpt-13499/opt_state.msgpack +3 -0
- training_v2/ckpt-13499/training_state.json +1 -0
- training_v2/config.json +171 -0
- training_v2/flax_model.msgpack +3 -0
- training_v2/logs/events.out.tfevents.1629094610.t1v-n-aa35e684-w-0.710957.3.v2 +3 -0
- training_v2_unfreeze/ckpt-8999/config.json +171 -0
- training_v2_unfreeze/ckpt-8999/flax_model.msgpack +3 -0
- training_v2_unfreeze/ckpt-8999/opt_state.msgpack +3 -0
- training_v2_unfreeze/ckpt-8999/training_state.json +1 -0
- training_v2_unfreeze/config.json +171 -0
- training_v2_unfreeze/flax_model.msgpack +3 -0
- training_v2_unfreeze/logs/events.out.tfevents.1629108761.t1v-n-aa35e684-w-0.903353.3.v2 +3 -0
- training_v3/ckpt-39999/config.json +171 -0
- training_v3/ckpt-39999/flax_model.msgpack +3 -0
- training_v3/ckpt-39999/opt_state.msgpack +3 -0
- training_v3/ckpt-39999/training_state.json +1 -0
- training_v3/logs/events.out.tfevents.1629124531.t1v-n-aa35e684-w-0.1038480.3.v2 +3 -0
- training_v3_unfreeze/ckpt-43999/config.json +171 -0
- training_v3_unfreeze/ckpt-43999/flax_model.msgpack +3 -0
- training_v3_unfreeze/ckpt-43999/opt_state.msgpack +3 -0
- training_v3_unfreeze/ckpt-43999/training_state.json +1 -0
- training_v3_unfreeze/logs/events.out.tfevents.1629166761.t1v-n-aa35e684-w-0.1228888.3.v2 +3 -0
- training_v4/ckpt-70999/config.json +157 -0
- training_v4/ckpt-70999/flax_model.msgpack +3 -0
- training_v4/ckpt-70999/opt_state.msgpack +3 -0
- training_v4/ckpt-70999/training_state.json +1 -0
- training_v4/logs/events.out.tfevents.1629228812.t1v-n-aa35e684-w-0.1594221.3.v2 +3 -0
- training_v4_unfreeze/ckpt-43499/config.json +157 -0
- training_v4_unfreeze/ckpt-43499/flax_model.msgpack +3 -0
- training_v4_unfreeze/ckpt-43499/opt_state.msgpack +3 -0
- training_v4_unfreeze/ckpt-43499/training_state.json +1 -0
- training_v4_unfreeze/logs/events.out.tfevents.1629521842.t1v-n-aa35e684-w-0.2115740.3.v2 +3 -0
logs/events.out.tfevents.1629166761.t1v-n-aa35e684-w-0.1228888.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eccc93a6e752377aafd17bae1d0e28344ee6a7455aa7f533338ab5aff4721601
|
3 |
+
size 6574429
|
training_v1_unfreeze/config.json
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"HybridCLIP"
|
4 |
+
],
|
5 |
+
"freeze_backbones": true,
|
6 |
+
"initializer_factor": 1.0,
|
7 |
+
"model_type": "hybrid-clip",
|
8 |
+
"projection_dim": 512,
|
9 |
+
"seed": 42,
|
10 |
+
"text_config": {
|
11 |
+
"_name_or_path": "",
|
12 |
+
"_num_labels": 5,
|
13 |
+
"add_cross_attention": false,
|
14 |
+
"architectures": [
|
15 |
+
"BertModel"
|
16 |
+
],
|
17 |
+
"attention_probs_dropout_prob": 0.1,
|
18 |
+
"bad_words_ids": null,
|
19 |
+
"bos_token_id": null,
|
20 |
+
"chunk_size_feed_forward": 0,
|
21 |
+
"decoder_start_token_id": null,
|
22 |
+
"directionality": "bidi",
|
23 |
+
"diversity_penalty": 0.0,
|
24 |
+
"do_sample": false,
|
25 |
+
"early_stopping": false,
|
26 |
+
"encoder_no_repeat_ngram_size": 0,
|
27 |
+
"eos_token_id": null,
|
28 |
+
"finetuning_task": null,
|
29 |
+
"forced_bos_token_id": null,
|
30 |
+
"forced_eos_token_id": null,
|
31 |
+
"gradient_checkpointing": false,
|
32 |
+
"hidden_act": "gelu",
|
33 |
+
"hidden_dropout_prob": 0.1,
|
34 |
+
"hidden_size": 1024,
|
35 |
+
"id2label": {
|
36 |
+
"0": "LABEL_0",
|
37 |
+
"1": "LABEL_1",
|
38 |
+
"2": "LABEL_2",
|
39 |
+
"3": "LABEL_3",
|
40 |
+
"4": "LABEL_4"
|
41 |
+
},
|
42 |
+
"initializer_range": 0.02,
|
43 |
+
"intermediate_size": 4096,
|
44 |
+
"is_decoder": false,
|
45 |
+
"is_encoder_decoder": false,
|
46 |
+
"label2id": {
|
47 |
+
"LABEL_0": 0,
|
48 |
+
"LABEL_1": 1,
|
49 |
+
"LABEL_2": 2,
|
50 |
+
"LABEL_3": 3,
|
51 |
+
"LABEL_4": 4
|
52 |
+
},
|
53 |
+
"layer_norm_eps": 1e-12,
|
54 |
+
"length_penalty": 1.0,
|
55 |
+
"max_length": 20,
|
56 |
+
"max_position_embeddings": 512,
|
57 |
+
"min_length": 0,
|
58 |
+
"model_type": "bert",
|
59 |
+
"no_repeat_ngram_size": 0,
|
60 |
+
"num_attention_heads": 16,
|
61 |
+
"num_beam_groups": 1,
|
62 |
+
"num_beams": 1,
|
63 |
+
"num_hidden_layers": 24,
|
64 |
+
"num_return_sequences": 1,
|
65 |
+
"output_attentions": false,
|
66 |
+
"output_hidden_states": false,
|
67 |
+
"output_past": true,
|
68 |
+
"output_scores": false,
|
69 |
+
"pad_token_id": 0,
|
70 |
+
"pooler_fc_size": 768,
|
71 |
+
"pooler_num_attention_heads": 12,
|
72 |
+
"pooler_num_fc_layers": 3,
|
73 |
+
"pooler_size_per_head": 128,
|
74 |
+
"pooler_type": "first_token_transform",
|
75 |
+
"position_embedding_type": "absolute",
|
76 |
+
"prefix": null,
|
77 |
+
"problem_type": null,
|
78 |
+
"pruned_heads": {},
|
79 |
+
"remove_invalid_values": false,
|
80 |
+
"repetition_penalty": 1.0,
|
81 |
+
"return_dict": true,
|
82 |
+
"return_dict_in_generate": false,
|
83 |
+
"sep_token_id": null,
|
84 |
+
"task_specific_params": null,
|
85 |
+
"temperature": 1.0,
|
86 |
+
"tie_encoder_decoder": false,
|
87 |
+
"tie_word_embeddings": true,
|
88 |
+
"tokenizer_class": null,
|
89 |
+
"top_k": 50,
|
90 |
+
"top_p": 1.0,
|
91 |
+
"torch_dtype": null,
|
92 |
+
"torchscript": false,
|
93 |
+
"transformers_version": "4.9.1",
|
94 |
+
"type_vocab_size": 2,
|
95 |
+
"use_bfloat16": false,
|
96 |
+
"use_cache": true,
|
97 |
+
"vocab_size": 30522
|
98 |
+
},
|
99 |
+
"transformers_version": null,
|
100 |
+
"vision_config": {
|
101 |
+
"_name_or_path": "",
|
102 |
+
"add_cross_attention": false,
|
103 |
+
"architectures": null,
|
104 |
+
"attention_dropout": 0.0,
|
105 |
+
"bad_words_ids": null,
|
106 |
+
"bos_token_id": null,
|
107 |
+
"chunk_size_feed_forward": 0,
|
108 |
+
"decoder_start_token_id": null,
|
109 |
+
"diversity_penalty": 0.0,
|
110 |
+
"do_sample": false,
|
111 |
+
"dropout": 0.0,
|
112 |
+
"early_stopping": false,
|
113 |
+
"encoder_no_repeat_ngram_size": 0,
|
114 |
+
"eos_token_id": null,
|
115 |
+
"finetuning_task": null,
|
116 |
+
"forced_bos_token_id": null,
|
117 |
+
"forced_eos_token_id": null,
|
118 |
+
"gradient_checkpointing": false,
|
119 |
+
"hidden_act": "quick_gelu",
|
120 |
+
"hidden_size": 768,
|
121 |
+
"id2label": {
|
122 |
+
"0": "LABEL_0",
|
123 |
+
"1": "LABEL_1"
|
124 |
+
},
|
125 |
+
"image_size": 224,
|
126 |
+
"initializer_factor": 1.0,
|
127 |
+
"initializer_range": 0.02,
|
128 |
+
"intermediate_size": 3072,
|
129 |
+
"is_decoder": false,
|
130 |
+
"is_encoder_decoder": false,
|
131 |
+
"label2id": {
|
132 |
+
"LABEL_0": 0,
|
133 |
+
"LABEL_1": 1
|
134 |
+
},
|
135 |
+
"layer_norm_eps": 1e-05,
|
136 |
+
"length_penalty": 1.0,
|
137 |
+
"max_length": 20,
|
138 |
+
"min_length": 0,
|
139 |
+
"model_type": "clip_vision_model",
|
140 |
+
"no_repeat_ngram_size": 0,
|
141 |
+
"num_attention_heads": 12,
|
142 |
+
"num_beam_groups": 1,
|
143 |
+
"num_beams": 1,
|
144 |
+
"num_hidden_layers": 12,
|
145 |
+
"num_return_sequences": 1,
|
146 |
+
"output_attentions": false,
|
147 |
+
"output_hidden_states": false,
|
148 |
+
"output_scores": false,
|
149 |
+
"pad_token_id": null,
|
150 |
+
"patch_size": 32,
|
151 |
+
"prefix": null,
|
152 |
+
"problem_type": null,
|
153 |
+
"pruned_heads": {},
|
154 |
+
"remove_invalid_values": false,
|
155 |
+
"repetition_penalty": 1.0,
|
156 |
+
"return_dict": true,
|
157 |
+
"return_dict_in_generate": false,
|
158 |
+
"sep_token_id": null,
|
159 |
+
"task_specific_params": null,
|
160 |
+
"temperature": 1.0,
|
161 |
+
"tie_encoder_decoder": false,
|
162 |
+
"tie_word_embeddings": true,
|
163 |
+
"tokenizer_class": null,
|
164 |
+
"top_k": 50,
|
165 |
+
"top_p": 1.0,
|
166 |
+
"torch_dtype": null,
|
167 |
+
"torchscript": false,
|
168 |
+
"transformers_version": "4.9.1",
|
169 |
+
"use_bfloat16": false
|
170 |
+
}
|
171 |
+
}
|
training_v1_unfreeze/flax_model.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb251f2e72934a63c7f0eba42d86b98b5528bf2863957afc793bfbf03e4988f0
|
3 |
+
size 1694082566
|
training_v1_unfreeze/logs/events.out.tfevents.1629082441.t1v-n-aa35e684-w-0.589252.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3004f1d378e0e4697daeb975bb452c41a309edd50d82068d644e6e712eaed887
|
3 |
+
size 972046
|
training_v2/ckpt-13499/config.json
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"HybridCLIP"
|
4 |
+
],
|
5 |
+
"freeze_backbones": true,
|
6 |
+
"initializer_factor": 1.0,
|
7 |
+
"model_type": "hybrid-clip",
|
8 |
+
"projection_dim": 512,
|
9 |
+
"seed": 42,
|
10 |
+
"text_config": {
|
11 |
+
"_name_or_path": "",
|
12 |
+
"_num_labels": 5,
|
13 |
+
"add_cross_attention": false,
|
14 |
+
"architectures": [
|
15 |
+
"BertModel"
|
16 |
+
],
|
17 |
+
"attention_probs_dropout_prob": 0.1,
|
18 |
+
"bad_words_ids": null,
|
19 |
+
"bos_token_id": null,
|
20 |
+
"chunk_size_feed_forward": 0,
|
21 |
+
"decoder_start_token_id": null,
|
22 |
+
"directionality": "bidi",
|
23 |
+
"diversity_penalty": 0.0,
|
24 |
+
"do_sample": false,
|
25 |
+
"early_stopping": false,
|
26 |
+
"encoder_no_repeat_ngram_size": 0,
|
27 |
+
"eos_token_id": null,
|
28 |
+
"finetuning_task": null,
|
29 |
+
"forced_bos_token_id": null,
|
30 |
+
"forced_eos_token_id": null,
|
31 |
+
"gradient_checkpointing": false,
|
32 |
+
"hidden_act": "gelu",
|
33 |
+
"hidden_dropout_prob": 0.1,
|
34 |
+
"hidden_size": 1024,
|
35 |
+
"id2label": {
|
36 |
+
"0": "LABEL_0",
|
37 |
+
"1": "LABEL_1",
|
38 |
+
"2": "LABEL_2",
|
39 |
+
"3": "LABEL_3",
|
40 |
+
"4": "LABEL_4"
|
41 |
+
},
|
42 |
+
"initializer_range": 0.02,
|
43 |
+
"intermediate_size": 4096,
|
44 |
+
"is_decoder": false,
|
45 |
+
"is_encoder_decoder": false,
|
46 |
+
"label2id": {
|
47 |
+
"LABEL_0": 0,
|
48 |
+
"LABEL_1": 1,
|
49 |
+
"LABEL_2": 2,
|
50 |
+
"LABEL_3": 3,
|
51 |
+
"LABEL_4": 4
|
52 |
+
},
|
53 |
+
"layer_norm_eps": 1e-12,
|
54 |
+
"length_penalty": 1.0,
|
55 |
+
"max_length": 20,
|
56 |
+
"max_position_embeddings": 512,
|
57 |
+
"min_length": 0,
|
58 |
+
"model_type": "bert",
|
59 |
+
"no_repeat_ngram_size": 0,
|
60 |
+
"num_attention_heads": 16,
|
61 |
+
"num_beam_groups": 1,
|
62 |
+
"num_beams": 1,
|
63 |
+
"num_hidden_layers": 24,
|
64 |
+
"num_return_sequences": 1,
|
65 |
+
"output_attentions": false,
|
66 |
+
"output_hidden_states": false,
|
67 |
+
"output_past": true,
|
68 |
+
"output_scores": false,
|
69 |
+
"pad_token_id": 0,
|
70 |
+
"pooler_fc_size": 768,
|
71 |
+
"pooler_num_attention_heads": 12,
|
72 |
+
"pooler_num_fc_layers": 3,
|
73 |
+
"pooler_size_per_head": 128,
|
74 |
+
"pooler_type": "first_token_transform",
|
75 |
+
"position_embedding_type": "absolute",
|
76 |
+
"prefix": null,
|
77 |
+
"problem_type": null,
|
78 |
+
"pruned_heads": {},
|
79 |
+
"remove_invalid_values": false,
|
80 |
+
"repetition_penalty": 1.0,
|
81 |
+
"return_dict": true,
|
82 |
+
"return_dict_in_generate": false,
|
83 |
+
"sep_token_id": null,
|
84 |
+
"task_specific_params": null,
|
85 |
+
"temperature": 1.0,
|
86 |
+
"tie_encoder_decoder": false,
|
87 |
+
"tie_word_embeddings": true,
|
88 |
+
"tokenizer_class": null,
|
89 |
+
"top_k": 50,
|
90 |
+
"top_p": 1.0,
|
91 |
+
"torch_dtype": null,
|
92 |
+
"torchscript": false,
|
93 |
+
"transformers_version": "4.9.1",
|
94 |
+
"type_vocab_size": 2,
|
95 |
+
"use_bfloat16": false,
|
96 |
+
"use_cache": true,
|
97 |
+
"vocab_size": 30522
|
98 |
+
},
|
99 |
+
"transformers_version": null,
|
100 |
+
"vision_config": {
|
101 |
+
"_name_or_path": "",
|
102 |
+
"add_cross_attention": false,
|
103 |
+
"architectures": null,
|
104 |
+
"attention_dropout": 0.0,
|
105 |
+
"bad_words_ids": null,
|
106 |
+
"bos_token_id": null,
|
107 |
+
"chunk_size_feed_forward": 0,
|
108 |
+
"decoder_start_token_id": null,
|
109 |
+
"diversity_penalty": 0.0,
|
110 |
+
"do_sample": false,
|
111 |
+
"dropout": 0.0,
|
112 |
+
"early_stopping": false,
|
113 |
+
"encoder_no_repeat_ngram_size": 0,
|
114 |
+
"eos_token_id": null,
|
115 |
+
"finetuning_task": null,
|
116 |
+
"forced_bos_token_id": null,
|
117 |
+
"forced_eos_token_id": null,
|
118 |
+
"gradient_checkpointing": false,
|
119 |
+
"hidden_act": "quick_gelu",
|
120 |
+
"hidden_size": 768,
|
121 |
+
"id2label": {
|
122 |
+
"0": "LABEL_0",
|
123 |
+
"1": "LABEL_1"
|
124 |
+
},
|
125 |
+
"image_size": 224,
|
126 |
+
"initializer_factor": 1.0,
|
127 |
+
"initializer_range": 0.02,
|
128 |
+
"intermediate_size": 3072,
|
129 |
+
"is_decoder": false,
|
130 |
+
"is_encoder_decoder": false,
|
131 |
+
"label2id": {
|
132 |
+
"LABEL_0": 0,
|
133 |
+
"LABEL_1": 1
|
134 |
+
},
|
135 |
+
"layer_norm_eps": 1e-05,
|
136 |
+
"length_penalty": 1.0,
|
137 |
+
"max_length": 20,
|
138 |
+
"min_length": 0,
|
139 |
+
"model_type": "clip_vision_model",
|
140 |
+
"no_repeat_ngram_size": 0,
|
141 |
+
"num_attention_heads": 12,
|
142 |
+
"num_beam_groups": 1,
|
143 |
+
"num_beams": 1,
|
144 |
+
"num_hidden_layers": 12,
|
145 |
+
"num_return_sequences": 1,
|
146 |
+
"output_attentions": false,
|
147 |
+
"output_hidden_states": false,
|
148 |
+
"output_scores": false,
|
149 |
+
"pad_token_id": null,
|
150 |
+
"patch_size": 32,
|
151 |
+
"prefix": null,
|
152 |
+
"problem_type": null,
|
153 |
+
"pruned_heads": {},
|
154 |
+
"remove_invalid_values": false,
|
155 |
+
"repetition_penalty": 1.0,
|
156 |
+
"return_dict": true,
|
157 |
+
"return_dict_in_generate": false,
|
158 |
+
"sep_token_id": null,
|
159 |
+
"task_specific_params": null,
|
160 |
+
"temperature": 1.0,
|
161 |
+
"tie_encoder_decoder": false,
|
162 |
+
"tie_word_embeddings": true,
|
163 |
+
"tokenizer_class": null,
|
164 |
+
"top_k": 50,
|
165 |
+
"top_p": 1.0,
|
166 |
+
"torch_dtype": null,
|
167 |
+
"torchscript": false,
|
168 |
+
"transformers_version": "4.9.1",
|
169 |
+
"use_bfloat16": false
|
170 |
+
}
|
171 |
+
}
|
training_v2/ckpt-13499/flax_model.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c66b7dbcf0a442f0a50e82bfb447c61f0a2062f8f56dcb754f14f06cb7d94eb1
|
3 |
+
size 1694082566
|
training_v2/ckpt-13499/opt_state.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7b7ea085329906f9c46ccc31640686ed03d7d70890d00fc26f8abf5005b5313
|
3 |
+
size 3388165359
|
training_v2/ckpt-13499/training_state.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 13500}
|
training_v2/config.json
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"HybridCLIP"
|
4 |
+
],
|
5 |
+
"freeze_backbones": true,
|
6 |
+
"initializer_factor": 1.0,
|
7 |
+
"model_type": "hybrid-clip",
|
8 |
+
"projection_dim": 512,
|
9 |
+
"seed": 42,
|
10 |
+
"text_config": {
|
11 |
+
"_name_or_path": "",
|
12 |
+
"_num_labels": 5,
|
13 |
+
"add_cross_attention": false,
|
14 |
+
"architectures": [
|
15 |
+
"BertModel"
|
16 |
+
],
|
17 |
+
"attention_probs_dropout_prob": 0.1,
|
18 |
+
"bad_words_ids": null,
|
19 |
+
"bos_token_id": null,
|
20 |
+
"chunk_size_feed_forward": 0,
|
21 |
+
"decoder_start_token_id": null,
|
22 |
+
"directionality": "bidi",
|
23 |
+
"diversity_penalty": 0.0,
|
24 |
+
"do_sample": false,
|
25 |
+
"early_stopping": false,
|
26 |
+
"encoder_no_repeat_ngram_size": 0,
|
27 |
+
"eos_token_id": null,
|
28 |
+
"finetuning_task": null,
|
29 |
+
"forced_bos_token_id": null,
|
30 |
+
"forced_eos_token_id": null,
|
31 |
+
"gradient_checkpointing": false,
|
32 |
+
"hidden_act": "gelu",
|
33 |
+
"hidden_dropout_prob": 0.1,
|
34 |
+
"hidden_size": 1024,
|
35 |
+
"id2label": {
|
36 |
+
"0": "LABEL_0",
|
37 |
+
"1": "LABEL_1",
|
38 |
+
"2": "LABEL_2",
|
39 |
+
"3": "LABEL_3",
|
40 |
+
"4": "LABEL_4"
|
41 |
+
},
|
42 |
+
"initializer_range": 0.02,
|
43 |
+
"intermediate_size": 4096,
|
44 |
+
"is_decoder": false,
|
45 |
+
"is_encoder_decoder": false,
|
46 |
+
"label2id": {
|
47 |
+
"LABEL_0": 0,
|
48 |
+
"LABEL_1": 1,
|
49 |
+
"LABEL_2": 2,
|
50 |
+
"LABEL_3": 3,
|
51 |
+
"LABEL_4": 4
|
52 |
+
},
|
53 |
+
"layer_norm_eps": 1e-12,
|
54 |
+
"length_penalty": 1.0,
|
55 |
+
"max_length": 20,
|
56 |
+
"max_position_embeddings": 512,
|
57 |
+
"min_length": 0,
|
58 |
+
"model_type": "bert",
|
59 |
+
"no_repeat_ngram_size": 0,
|
60 |
+
"num_attention_heads": 16,
|
61 |
+
"num_beam_groups": 1,
|
62 |
+
"num_beams": 1,
|
63 |
+
"num_hidden_layers": 24,
|
64 |
+
"num_return_sequences": 1,
|
65 |
+
"output_attentions": false,
|
66 |
+
"output_hidden_states": false,
|
67 |
+
"output_past": true,
|
68 |
+
"output_scores": false,
|
69 |
+
"pad_token_id": 0,
|
70 |
+
"pooler_fc_size": 768,
|
71 |
+
"pooler_num_attention_heads": 12,
|
72 |
+
"pooler_num_fc_layers": 3,
|
73 |
+
"pooler_size_per_head": 128,
|
74 |
+
"pooler_type": "first_token_transform",
|
75 |
+
"position_embedding_type": "absolute",
|
76 |
+
"prefix": null,
|
77 |
+
"problem_type": null,
|
78 |
+
"pruned_heads": {},
|
79 |
+
"remove_invalid_values": false,
|
80 |
+
"repetition_penalty": 1.0,
|
81 |
+
"return_dict": true,
|
82 |
+
"return_dict_in_generate": false,
|
83 |
+
"sep_token_id": null,
|
84 |
+
"task_specific_params": null,
|
85 |
+
"temperature": 1.0,
|
86 |
+
"tie_encoder_decoder": false,
|
87 |
+
"tie_word_embeddings": true,
|
88 |
+
"tokenizer_class": null,
|
89 |
+
"top_k": 50,
|
90 |
+
"top_p": 1.0,
|
91 |
+
"torch_dtype": null,
|
92 |
+
"torchscript": false,
|
93 |
+
"transformers_version": "4.9.1",
|
94 |
+
"type_vocab_size": 2,
|
95 |
+
"use_bfloat16": false,
|
96 |
+
"use_cache": true,
|
97 |
+
"vocab_size": 30522
|
98 |
+
},
|
99 |
+
"transformers_version": null,
|
100 |
+
"vision_config": {
|
101 |
+
"_name_or_path": "",
|
102 |
+
"add_cross_attention": false,
|
103 |
+
"architectures": null,
|
104 |
+
"attention_dropout": 0.0,
|
105 |
+
"bad_words_ids": null,
|
106 |
+
"bos_token_id": null,
|
107 |
+
"chunk_size_feed_forward": 0,
|
108 |
+
"decoder_start_token_id": null,
|
109 |
+
"diversity_penalty": 0.0,
|
110 |
+
"do_sample": false,
|
111 |
+
"dropout": 0.0,
|
112 |
+
"early_stopping": false,
|
113 |
+
"encoder_no_repeat_ngram_size": 0,
|
114 |
+
"eos_token_id": null,
|
115 |
+
"finetuning_task": null,
|
116 |
+
"forced_bos_token_id": null,
|
117 |
+
"forced_eos_token_id": null,
|
118 |
+
"gradient_checkpointing": false,
|
119 |
+
"hidden_act": "quick_gelu",
|
120 |
+
"hidden_size": 768,
|
121 |
+
"id2label": {
|
122 |
+
"0": "LABEL_0",
|
123 |
+
"1": "LABEL_1"
|
124 |
+
},
|
125 |
+
"image_size": 224,
|
126 |
+
"initializer_factor": 1.0,
|
127 |
+
"initializer_range": 0.02,
|
128 |
+
"intermediate_size": 3072,
|
129 |
+
"is_decoder": false,
|
130 |
+
"is_encoder_decoder": false,
|
131 |
+
"label2id": {
|
132 |
+
"LABEL_0": 0,
|
133 |
+
"LABEL_1": 1
|
134 |
+
},
|
135 |
+
"layer_norm_eps": 1e-05,
|
136 |
+
"length_penalty": 1.0,
|
137 |
+
"max_length": 20,
|
138 |
+
"min_length": 0,
|
139 |
+
"model_type": "clip_vision_model",
|
140 |
+
"no_repeat_ngram_size": 0,
|
141 |
+
"num_attention_heads": 12,
|
142 |
+
"num_beam_groups": 1,
|
143 |
+
"num_beams": 1,
|
144 |
+
"num_hidden_layers": 12,
|
145 |
+
"num_return_sequences": 1,
|
146 |
+
"output_attentions": false,
|
147 |
+
"output_hidden_states": false,
|
148 |
+
"output_scores": false,
|
149 |
+
"pad_token_id": null,
|
150 |
+
"patch_size": 32,
|
151 |
+
"prefix": null,
|
152 |
+
"problem_type": null,
|
153 |
+
"pruned_heads": {},
|
154 |
+
"remove_invalid_values": false,
|
155 |
+
"repetition_penalty": 1.0,
|
156 |
+
"return_dict": true,
|
157 |
+
"return_dict_in_generate": false,
|
158 |
+
"sep_token_id": null,
|
159 |
+
"task_specific_params": null,
|
160 |
+
"temperature": 1.0,
|
161 |
+
"tie_encoder_decoder": false,
|
162 |
+
"tie_word_embeddings": true,
|
163 |
+
"tokenizer_class": null,
|
164 |
+
"top_k": 50,
|
165 |
+
"top_p": 1.0,
|
166 |
+
"torch_dtype": null,
|
167 |
+
"torchscript": false,
|
168 |
+
"transformers_version": "4.9.1",
|
169 |
+
"use_bfloat16": false
|
170 |
+
}
|
171 |
+
}
|
training_v2/flax_model.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13d726f98e7c26ec5f22800d9bc1a65489a669f77cdacbaf52a74cc5776c132a
|
3 |
+
size 1694082566
|
training_v2/logs/events.out.tfevents.1629094610.t1v-n-aa35e684-w-0.710957.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d250d15cef2e48517fbe271c25a344b95a26f7e726d49e5f7fc72ef8d9e8813
|
3 |
+
size 1796251
|
training_v2_unfreeze/ckpt-8999/config.json
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"HybridCLIP"
|
4 |
+
],
|
5 |
+
"freeze_backbones": true,
|
6 |
+
"initializer_factor": 1.0,
|
7 |
+
"model_type": "hybrid-clip",
|
8 |
+
"projection_dim": 512,
|
9 |
+
"seed": 42,
|
10 |
+
"text_config": {
|
11 |
+
"_name_or_path": "",
|
12 |
+
"_num_labels": 5,
|
13 |
+
"add_cross_attention": false,
|
14 |
+
"architectures": [
|
15 |
+
"BertModel"
|
16 |
+
],
|
17 |
+
"attention_probs_dropout_prob": 0.1,
|
18 |
+
"bad_words_ids": null,
|
19 |
+
"bos_token_id": null,
|
20 |
+
"chunk_size_feed_forward": 0,
|
21 |
+
"decoder_start_token_id": null,
|
22 |
+
"directionality": "bidi",
|
23 |
+
"diversity_penalty": 0.0,
|
24 |
+
"do_sample": false,
|
25 |
+
"early_stopping": false,
|
26 |
+
"encoder_no_repeat_ngram_size": 0,
|
27 |
+
"eos_token_id": null,
|
28 |
+
"finetuning_task": null,
|
29 |
+
"forced_bos_token_id": null,
|
30 |
+
"forced_eos_token_id": null,
|
31 |
+
"gradient_checkpointing": false,
|
32 |
+
"hidden_act": "gelu",
|
33 |
+
"hidden_dropout_prob": 0.1,
|
34 |
+
"hidden_size": 1024,
|
35 |
+
"id2label": {
|
36 |
+
"0": "LABEL_0",
|
37 |
+
"1": "LABEL_1",
|
38 |
+
"2": "LABEL_2",
|
39 |
+
"3": "LABEL_3",
|
40 |
+
"4": "LABEL_4"
|
41 |
+
},
|
42 |
+
"initializer_range": 0.02,
|
43 |
+
"intermediate_size": 4096,
|
44 |
+
"is_decoder": false,
|
45 |
+
"is_encoder_decoder": false,
|
46 |
+
"label2id": {
|
47 |
+
"LABEL_0": 0,
|
48 |
+
"LABEL_1": 1,
|
49 |
+
"LABEL_2": 2,
|
50 |
+
"LABEL_3": 3,
|
51 |
+
"LABEL_4": 4
|
52 |
+
},
|
53 |
+
"layer_norm_eps": 1e-12,
|
54 |
+
"length_penalty": 1.0,
|
55 |
+
"max_length": 20,
|
56 |
+
"max_position_embeddings": 512,
|
57 |
+
"min_length": 0,
|
58 |
+
"model_type": "bert",
|
59 |
+
"no_repeat_ngram_size": 0,
|
60 |
+
"num_attention_heads": 16,
|
61 |
+
"num_beam_groups": 1,
|
62 |
+
"num_beams": 1,
|
63 |
+
"num_hidden_layers": 24,
|
64 |
+
"num_return_sequences": 1,
|
65 |
+
"output_attentions": false,
|
66 |
+
"output_hidden_states": false,
|
67 |
+
"output_past": true,
|
68 |
+
"output_scores": false,
|
69 |
+
"pad_token_id": 0,
|
70 |
+
"pooler_fc_size": 768,
|
71 |
+
"pooler_num_attention_heads": 12,
|
72 |
+
"pooler_num_fc_layers": 3,
|
73 |
+
"pooler_size_per_head": 128,
|
74 |
+
"pooler_type": "first_token_transform",
|
75 |
+
"position_embedding_type": "absolute",
|
76 |
+
"prefix": null,
|
77 |
+
"problem_type": null,
|
78 |
+
"pruned_heads": {},
|
79 |
+
"remove_invalid_values": false,
|
80 |
+
"repetition_penalty": 1.0,
|
81 |
+
"return_dict": true,
|
82 |
+
"return_dict_in_generate": false,
|
83 |
+
"sep_token_id": null,
|
84 |
+
"task_specific_params": null,
|
85 |
+
"temperature": 1.0,
|
86 |
+
"tie_encoder_decoder": false,
|
87 |
+
"tie_word_embeddings": true,
|
88 |
+
"tokenizer_class": null,
|
89 |
+
"top_k": 50,
|
90 |
+
"top_p": 1.0,
|
91 |
+
"torch_dtype": null,
|
92 |
+
"torchscript": false,
|
93 |
+
"transformers_version": "4.9.1",
|
94 |
+
"type_vocab_size": 2,
|
95 |
+
"use_bfloat16": false,
|
96 |
+
"use_cache": true,
|
97 |
+
"vocab_size": 30522
|
98 |
+
},
|
99 |
+
"transformers_version": null,
|
100 |
+
"vision_config": {
|
101 |
+
"_name_or_path": "",
|
102 |
+
"add_cross_attention": false,
|
103 |
+
"architectures": null,
|
104 |
+
"attention_dropout": 0.0,
|
105 |
+
"bad_words_ids": null,
|
106 |
+
"bos_token_id": null,
|
107 |
+
"chunk_size_feed_forward": 0,
|
108 |
+
"decoder_start_token_id": null,
|
109 |
+
"diversity_penalty": 0.0,
|
110 |
+
"do_sample": false,
|
111 |
+
"dropout": 0.0,
|
112 |
+
"early_stopping": false,
|
113 |
+
"encoder_no_repeat_ngram_size": 0,
|
114 |
+
"eos_token_id": null,
|
115 |
+
"finetuning_task": null,
|
116 |
+
"forced_bos_token_id": null,
|
117 |
+
"forced_eos_token_id": null,
|
118 |
+
"gradient_checkpointing": false,
|
119 |
+
"hidden_act": "quick_gelu",
|
120 |
+
"hidden_size": 768,
|
121 |
+
"id2label": {
|
122 |
+
"0": "LABEL_0",
|
123 |
+
"1": "LABEL_1"
|
124 |
+
},
|
125 |
+
"image_size": 224,
|
126 |
+
"initializer_factor": 1.0,
|
127 |
+
"initializer_range": 0.02,
|
128 |
+
"intermediate_size": 3072,
|
129 |
+
"is_decoder": false,
|
130 |
+
"is_encoder_decoder": false,
|
131 |
+
"label2id": {
|
132 |
+
"LABEL_0": 0,
|
133 |
+
"LABEL_1": 1
|
134 |
+
},
|
135 |
+
"layer_norm_eps": 1e-05,
|
136 |
+
"length_penalty": 1.0,
|
137 |
+
"max_length": 20,
|
138 |
+
"min_length": 0,
|
139 |
+
"model_type": "clip_vision_model",
|
140 |
+
"no_repeat_ngram_size": 0,
|
141 |
+
"num_attention_heads": 12,
|
142 |
+
"num_beam_groups": 1,
|
143 |
+
"num_beams": 1,
|
144 |
+
"num_hidden_layers": 12,
|
145 |
+
"num_return_sequences": 1,
|
146 |
+
"output_attentions": false,
|
147 |
+
"output_hidden_states": false,
|
148 |
+
"output_scores": false,
|
149 |
+
"pad_token_id": null,
|
150 |
+
"patch_size": 32,
|
151 |
+
"prefix": null,
|
152 |
+
"problem_type": null,
|
153 |
+
"pruned_heads": {},
|
154 |
+
"remove_invalid_values": false,
|
155 |
+
"repetition_penalty": 1.0,
|
156 |
+
"return_dict": true,
|
157 |
+
"return_dict_in_generate": false,
|
158 |
+
"sep_token_id": null,
|
159 |
+
"task_specific_params": null,
|
160 |
+
"temperature": 1.0,
|
161 |
+
"tie_encoder_decoder": false,
|
162 |
+
"tie_word_embeddings": true,
|
163 |
+
"tokenizer_class": null,
|
164 |
+
"top_k": 50,
|
165 |
+
"top_p": 1.0,
|
166 |
+
"torch_dtype": null,
|
167 |
+
"torchscript": false,
|
168 |
+
"transformers_version": "4.9.1",
|
169 |
+
"use_bfloat16": false
|
170 |
+
}
|
171 |
+
}
|
training_v2_unfreeze/ckpt-8999/flax_model.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f22db44da0beb3e1937fc479b86997ceb2344e310f79245333c6fe1211bfb987
|
3 |
+
size 1694082566
|
training_v2_unfreeze/ckpt-8999/opt_state.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8d495a612851186f7f7d4f19321dfd53caf950b87c6cca8ea1637c9821cba2f
|
3 |
+
size 3388165359
|
training_v2_unfreeze/ckpt-8999/training_state.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 9000}
|
training_v2_unfreeze/config.json
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"HybridCLIP"
|
4 |
+
],
|
5 |
+
"freeze_backbones": true,
|
6 |
+
"initializer_factor": 1.0,
|
7 |
+
"model_type": "hybrid-clip",
|
8 |
+
"projection_dim": 512,
|
9 |
+
"seed": 42,
|
10 |
+
"text_config": {
|
11 |
+
"_name_or_path": "",
|
12 |
+
"_num_labels": 5,
|
13 |
+
"add_cross_attention": false,
|
14 |
+
"architectures": [
|
15 |
+
"BertModel"
|
16 |
+
],
|
17 |
+
"attention_probs_dropout_prob": 0.1,
|
18 |
+
"bad_words_ids": null,
|
19 |
+
"bos_token_id": null,
|
20 |
+
"chunk_size_feed_forward": 0,
|
21 |
+
"decoder_start_token_id": null,
|
22 |
+
"directionality": "bidi",
|
23 |
+
"diversity_penalty": 0.0,
|
24 |
+
"do_sample": false,
|
25 |
+
"early_stopping": false,
|
26 |
+
"encoder_no_repeat_ngram_size": 0,
|
27 |
+
"eos_token_id": null,
|
28 |
+
"finetuning_task": null,
|
29 |
+
"forced_bos_token_id": null,
|
30 |
+
"forced_eos_token_id": null,
|
31 |
+
"gradient_checkpointing": false,
|
32 |
+
"hidden_act": "gelu",
|
33 |
+
"hidden_dropout_prob": 0.1,
|
34 |
+
"hidden_size": 1024,
|
35 |
+
"id2label": {
|
36 |
+
"0": "LABEL_0",
|
37 |
+
"1": "LABEL_1",
|
38 |
+
"2": "LABEL_2",
|
39 |
+
"3": "LABEL_3",
|
40 |
+
"4": "LABEL_4"
|
41 |
+
},
|
42 |
+
"initializer_range": 0.02,
|
43 |
+
"intermediate_size": 4096,
|
44 |
+
"is_decoder": false,
|
45 |
+
"is_encoder_decoder": false,
|
46 |
+
"label2id": {
|
47 |
+
"LABEL_0": 0,
|
48 |
+
"LABEL_1": 1,
|
49 |
+
"LABEL_2": 2,
|
50 |
+
"LABEL_3": 3,
|
51 |
+
"LABEL_4": 4
|
52 |
+
},
|
53 |
+
"layer_norm_eps": 1e-12,
|
54 |
+
"length_penalty": 1.0,
|
55 |
+
"max_length": 20,
|
56 |
+
"max_position_embeddings": 512,
|
57 |
+
"min_length": 0,
|
58 |
+
"model_type": "bert",
|
59 |
+
"no_repeat_ngram_size": 0,
|
60 |
+
"num_attention_heads": 16,
|
61 |
+
"num_beam_groups": 1,
|
62 |
+
"num_beams": 1,
|
63 |
+
"num_hidden_layers": 24,
|
64 |
+
"num_return_sequences": 1,
|
65 |
+
"output_attentions": false,
|
66 |
+
"output_hidden_states": false,
|
67 |
+
"output_past": true,
|
68 |
+
"output_scores": false,
|
69 |
+
"pad_token_id": 0,
|
70 |
+
"pooler_fc_size": 768,
|
71 |
+
"pooler_num_attention_heads": 12,
|
72 |
+
"pooler_num_fc_layers": 3,
|
73 |
+
"pooler_size_per_head": 128,
|
74 |
+
"pooler_type": "first_token_transform",
|
75 |
+
"position_embedding_type": "absolute",
|
76 |
+
"prefix": null,
|
77 |
+
"problem_type": null,
|
78 |
+
"pruned_heads": {},
|
79 |
+
"remove_invalid_values": false,
|
80 |
+
"repetition_penalty": 1.0,
|
81 |
+
"return_dict": true,
|
82 |
+
"return_dict_in_generate": false,
|
83 |
+
"sep_token_id": null,
|
84 |
+
"task_specific_params": null,
|
85 |
+
"temperature": 1.0,
|
86 |
+
"tie_encoder_decoder": false,
|
87 |
+
"tie_word_embeddings": true,
|
88 |
+
"tokenizer_class": null,
|
89 |
+
"top_k": 50,
|
90 |
+
"top_p": 1.0,
|
91 |
+
"torch_dtype": null,
|
92 |
+
"torchscript": false,
|
93 |
+
"transformers_version": "4.9.1",
|
94 |
+
"type_vocab_size": 2,
|
95 |
+
"use_bfloat16": false,
|
96 |
+
"use_cache": true,
|
97 |
+
"vocab_size": 30522
|
98 |
+
},
|
99 |
+
"transformers_version": null,
|
100 |
+
"vision_config": {
|
101 |
+
"_name_or_path": "",
|
102 |
+
"add_cross_attention": false,
|
103 |
+
"architectures": null,
|
104 |
+
"attention_dropout": 0.0,
|
105 |
+
"bad_words_ids": null,
|
106 |
+
"bos_token_id": null,
|
107 |
+
"chunk_size_feed_forward": 0,
|
108 |
+
"decoder_start_token_id": null,
|
109 |
+
"diversity_penalty": 0.0,
|
110 |
+
"do_sample": false,
|
111 |
+
"dropout": 0.0,
|
112 |
+
"early_stopping": false,
|
113 |
+
"encoder_no_repeat_ngram_size": 0,
|
114 |
+
"eos_token_id": null,
|
115 |
+
"finetuning_task": null,
|
116 |
+
"forced_bos_token_id": null,
|
117 |
+
"forced_eos_token_id": null,
|
118 |
+
"gradient_checkpointing": false,
|
119 |
+
"hidden_act": "quick_gelu",
|
120 |
+
"hidden_size": 768,
|
121 |
+
"id2label": {
|
122 |
+
"0": "LABEL_0",
|
123 |
+
"1": "LABEL_1"
|
124 |
+
},
|
125 |
+
"image_size": 224,
|
126 |
+
"initializer_factor": 1.0,
|
127 |
+
"initializer_range": 0.02,
|
128 |
+
"intermediate_size": 3072,
|
129 |
+
"is_decoder": false,
|
130 |
+
"is_encoder_decoder": false,
|
131 |
+
"label2id": {
|
132 |
+
"LABEL_0": 0,
|
133 |
+
"LABEL_1": 1
|
134 |
+
},
|
135 |
+
"layer_norm_eps": 1e-05,
|
136 |
+
"length_penalty": 1.0,
|
137 |
+
"max_length": 20,
|
138 |
+
"min_length": 0,
|
139 |
+
"model_type": "clip_vision_model",
|
140 |
+
"no_repeat_ngram_size": 0,
|
141 |
+
"num_attention_heads": 12,
|
142 |
+
"num_beam_groups": 1,
|
143 |
+
"num_beams": 1,
|
144 |
+
"num_hidden_layers": 12,
|
145 |
+
"num_return_sequences": 1,
|
146 |
+
"output_attentions": false,
|
147 |
+
"output_hidden_states": false,
|
148 |
+
"output_scores": false,
|
149 |
+
"pad_token_id": null,
|
150 |
+
"patch_size": 32,
|
151 |
+
"prefix": null,
|
152 |
+
"problem_type": null,
|
153 |
+
"pruned_heads": {},
|
154 |
+
"remove_invalid_values": false,
|
155 |
+
"repetition_penalty": 1.0,
|
156 |
+
"return_dict": true,
|
157 |
+
"return_dict_in_generate": false,
|
158 |
+
"sep_token_id": null,
|
159 |
+
"task_specific_params": null,
|
160 |
+
"temperature": 1.0,
|
161 |
+
"tie_encoder_decoder": false,
|
162 |
+
"tie_word_embeddings": true,
|
163 |
+
"tokenizer_class": null,
|
164 |
+
"top_k": 50,
|
165 |
+
"top_p": 1.0,
|
166 |
+
"torch_dtype": null,
|
167 |
+
"torchscript": false,
|
168 |
+
"transformers_version": "4.9.1",
|
169 |
+
"use_bfloat16": false
|
170 |
+
}
|
171 |
+
}
|
training_v2_unfreeze/flax_model.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cccf505cbf831a982e042c27d6378ee299f3039e15f41d49acc9ab35944f5852
|
3 |
+
size 1694082566
|
training_v2_unfreeze/logs/events.out.tfevents.1629108761.t1v-n-aa35e684-w-0.903353.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e7946c3ca37aaf95994694ba2e0e872de7b27368c263013272007c059aa7988
|
3 |
+
size 1202329
|
training_v3/ckpt-39999/config.json
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"HybridCLIP"
|
4 |
+
],
|
5 |
+
"freeze_backbones": true,
|
6 |
+
"initializer_factor": 1.0,
|
7 |
+
"model_type": "hybrid-clip",
|
8 |
+
"projection_dim": 512,
|
9 |
+
"seed": 42,
|
10 |
+
"text_config": {
|
11 |
+
"_name_or_path": "",
|
12 |
+
"_num_labels": 5,
|
13 |
+
"add_cross_attention": false,
|
14 |
+
"architectures": [
|
15 |
+
"BertModel"
|
16 |
+
],
|
17 |
+
"attention_probs_dropout_prob": 0.1,
|
18 |
+
"bad_words_ids": null,
|
19 |
+
"bos_token_id": null,
|
20 |
+
"chunk_size_feed_forward": 0,
|
21 |
+
"decoder_start_token_id": null,
|
22 |
+
"directionality": "bidi",
|
23 |
+
"diversity_penalty": 0.0,
|
24 |
+
"do_sample": false,
|
25 |
+
"early_stopping": false,
|
26 |
+
"encoder_no_repeat_ngram_size": 0,
|
27 |
+
"eos_token_id": null,
|
28 |
+
"finetuning_task": null,
|
29 |
+
"forced_bos_token_id": null,
|
30 |
+
"forced_eos_token_id": null,
|
31 |
+
"gradient_checkpointing": false,
|
32 |
+
"hidden_act": "gelu",
|
33 |
+
"hidden_dropout_prob": 0.1,
|
34 |
+
"hidden_size": 1024,
|
35 |
+
"id2label": {
|
36 |
+
"0": "LABEL_0",
|
37 |
+
"1": "LABEL_1",
|
38 |
+
"2": "LABEL_2",
|
39 |
+
"3": "LABEL_3",
|
40 |
+
"4": "LABEL_4"
|
41 |
+
},
|
42 |
+
"initializer_range": 0.02,
|
43 |
+
"intermediate_size": 4096,
|
44 |
+
"is_decoder": false,
|
45 |
+
"is_encoder_decoder": false,
|
46 |
+
"label2id": {
|
47 |
+
"LABEL_0": 0,
|
48 |
+
"LABEL_1": 1,
|
49 |
+
"LABEL_2": 2,
|
50 |
+
"LABEL_3": 3,
|
51 |
+
"LABEL_4": 4
|
52 |
+
},
|
53 |
+
"layer_norm_eps": 1e-12,
|
54 |
+
"length_penalty": 1.0,
|
55 |
+
"max_length": 20,
|
56 |
+
"max_position_embeddings": 512,
|
57 |
+
"min_length": 0,
|
58 |
+
"model_type": "bert",
|
59 |
+
"no_repeat_ngram_size": 0,
|
60 |
+
"num_attention_heads": 16,
|
61 |
+
"num_beam_groups": 1,
|
62 |
+
"num_beams": 1,
|
63 |
+
"num_hidden_layers": 24,
|
64 |
+
"num_return_sequences": 1,
|
65 |
+
"output_attentions": false,
|
66 |
+
"output_hidden_states": false,
|
67 |
+
"output_past": true,
|
68 |
+
"output_scores": false,
|
69 |
+
"pad_token_id": 0,
|
70 |
+
"pooler_fc_size": 768,
|
71 |
+
"pooler_num_attention_heads": 12,
|
72 |
+
"pooler_num_fc_layers": 3,
|
73 |
+
"pooler_size_per_head": 128,
|
74 |
+
"pooler_type": "first_token_transform",
|
75 |
+
"position_embedding_type": "absolute",
|
76 |
+
"prefix": null,
|
77 |
+
"problem_type": null,
|
78 |
+
"pruned_heads": {},
|
79 |
+
"remove_invalid_values": false,
|
80 |
+
"repetition_penalty": 1.0,
|
81 |
+
"return_dict": true,
|
82 |
+
"return_dict_in_generate": false,
|
83 |
+
"sep_token_id": null,
|
84 |
+
"task_specific_params": null,
|
85 |
+
"temperature": 1.0,
|
86 |
+
"tie_encoder_decoder": false,
|
87 |
+
"tie_word_embeddings": true,
|
88 |
+
"tokenizer_class": null,
|
89 |
+
"top_k": 50,
|
90 |
+
"top_p": 1.0,
|
91 |
+
"torch_dtype": null,
|
92 |
+
"torchscript": false,
|
93 |
+
"transformers_version": "4.9.1",
|
94 |
+
"type_vocab_size": 2,
|
95 |
+
"use_bfloat16": false,
|
96 |
+
"use_cache": true,
|
97 |
+
"vocab_size": 30522
|
98 |
+
},
|
99 |
+
"transformers_version": null,
|
100 |
+
"vision_config": {
|
101 |
+
"_name_or_path": "",
|
102 |
+
"add_cross_attention": false,
|
103 |
+
"architectures": null,
|
104 |
+
"attention_dropout": 0.0,
|
105 |
+
"bad_words_ids": null,
|
106 |
+
"bos_token_id": null,
|
107 |
+
"chunk_size_feed_forward": 0,
|
108 |
+
"decoder_start_token_id": null,
|
109 |
+
"diversity_penalty": 0.0,
|
110 |
+
"do_sample": false,
|
111 |
+
"dropout": 0.0,
|
112 |
+
"early_stopping": false,
|
113 |
+
"encoder_no_repeat_ngram_size": 0,
|
114 |
+
"eos_token_id": null,
|
115 |
+
"finetuning_task": null,
|
116 |
+
"forced_bos_token_id": null,
|
117 |
+
"forced_eos_token_id": null,
|
118 |
+
"gradient_checkpointing": false,
|
119 |
+
"hidden_act": "quick_gelu",
|
120 |
+
"hidden_size": 768,
|
121 |
+
"id2label": {
|
122 |
+
"0": "LABEL_0",
|
123 |
+
"1": "LABEL_1"
|
124 |
+
},
|
125 |
+
"image_size": 224,
|
126 |
+
"initializer_factor": 1.0,
|
127 |
+
"initializer_range": 0.02,
|
128 |
+
"intermediate_size": 3072,
|
129 |
+
"is_decoder": false,
|
130 |
+
"is_encoder_decoder": false,
|
131 |
+
"label2id": {
|
132 |
+
"LABEL_0": 0,
|
133 |
+
"LABEL_1": 1
|
134 |
+
},
|
135 |
+
"layer_norm_eps": 1e-05,
|
136 |
+
"length_penalty": 1.0,
|
137 |
+
"max_length": 20,
|
138 |
+
"min_length": 0,
|
139 |
+
"model_type": "clip_vision_model",
|
140 |
+
"no_repeat_ngram_size": 0,
|
141 |
+
"num_attention_heads": 12,
|
142 |
+
"num_beam_groups": 1,
|
143 |
+
"num_beams": 1,
|
144 |
+
"num_hidden_layers": 12,
|
145 |
+
"num_return_sequences": 1,
|
146 |
+
"output_attentions": false,
|
147 |
+
"output_hidden_states": false,
|
148 |
+
"output_scores": false,
|
149 |
+
"pad_token_id": null,
|
150 |
+
"patch_size": 32,
|
151 |
+
"prefix": null,
|
152 |
+
"problem_type": null,
|
153 |
+
"pruned_heads": {},
|
154 |
+
"remove_invalid_values": false,
|
155 |
+
"repetition_penalty": 1.0,
|
156 |
+
"return_dict": true,
|
157 |
+
"return_dict_in_generate": false,
|
158 |
+
"sep_token_id": null,
|
159 |
+
"task_specific_params": null,
|
160 |
+
"temperature": 1.0,
|
161 |
+
"tie_encoder_decoder": false,
|
162 |
+
"tie_word_embeddings": true,
|
163 |
+
"tokenizer_class": null,
|
164 |
+
"top_k": 50,
|
165 |
+
"top_p": 1.0,
|
166 |
+
"torch_dtype": null,
|
167 |
+
"torchscript": false,
|
168 |
+
"transformers_version": "4.9.1",
|
169 |
+
"use_bfloat16": false
|
170 |
+
}
|
171 |
+
}
|
training_v3/ckpt-39999/flax_model.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:56032e3d91e3de88da870e7a0bf1379d8df205236269b75501cb103152407b78
|
3 |
+
size 1694082566
|
training_v3/ckpt-39999/opt_state.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fdd5c25b0ff4b7d729fee116a32331f2879192bf30e074075f97d0e5f04b6ac
|
3 |
+
size 3388165359
|
training_v3/ckpt-39999/training_state.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 40000}
|
training_v3/logs/events.out.tfevents.1629124531.t1v-n-aa35e684-w-0.1038480.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c077f50fd2335426b22ab0615fb0cd3069d979877ec130dbd219f93498b81ff
|
3 |
+
size 5931805
|
training_v3_unfreeze/ckpt-43999/config.json
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"HybridCLIP"
|
4 |
+
],
|
5 |
+
"freeze_backbones": true,
|
6 |
+
"initializer_factor": 1.0,
|
7 |
+
"model_type": "hybrid-clip",
|
8 |
+
"projection_dim": 512,
|
9 |
+
"seed": 42,
|
10 |
+
"text_config": {
|
11 |
+
"_name_or_path": "",
|
12 |
+
"_num_labels": 5,
|
13 |
+
"add_cross_attention": false,
|
14 |
+
"architectures": [
|
15 |
+
"BertModel"
|
16 |
+
],
|
17 |
+
"attention_probs_dropout_prob": 0.1,
|
18 |
+
"bad_words_ids": null,
|
19 |
+
"bos_token_id": null,
|
20 |
+
"chunk_size_feed_forward": 0,
|
21 |
+
"decoder_start_token_id": null,
|
22 |
+
"directionality": "bidi",
|
23 |
+
"diversity_penalty": 0.0,
|
24 |
+
"do_sample": false,
|
25 |
+
"early_stopping": false,
|
26 |
+
"encoder_no_repeat_ngram_size": 0,
|
27 |
+
"eos_token_id": null,
|
28 |
+
"finetuning_task": null,
|
29 |
+
"forced_bos_token_id": null,
|
30 |
+
"forced_eos_token_id": null,
|
31 |
+
"gradient_checkpointing": false,
|
32 |
+
"hidden_act": "gelu",
|
33 |
+
"hidden_dropout_prob": 0.1,
|
34 |
+
"hidden_size": 1024,
|
35 |
+
"id2label": {
|
36 |
+
"0": "LABEL_0",
|
37 |
+
"1": "LABEL_1",
|
38 |
+
"2": "LABEL_2",
|
39 |
+
"3": "LABEL_3",
|
40 |
+
"4": "LABEL_4"
|
41 |
+
},
|
42 |
+
"initializer_range": 0.02,
|
43 |
+
"intermediate_size": 4096,
|
44 |
+
"is_decoder": false,
|
45 |
+
"is_encoder_decoder": false,
|
46 |
+
"label2id": {
|
47 |
+
"LABEL_0": 0,
|
48 |
+
"LABEL_1": 1,
|
49 |
+
"LABEL_2": 2,
|
50 |
+
"LABEL_3": 3,
|
51 |
+
"LABEL_4": 4
|
52 |
+
},
|
53 |
+
"layer_norm_eps": 1e-12,
|
54 |
+
"length_penalty": 1.0,
|
55 |
+
"max_length": 20,
|
56 |
+
"max_position_embeddings": 512,
|
57 |
+
"min_length": 0,
|
58 |
+
"model_type": "bert",
|
59 |
+
"no_repeat_ngram_size": 0,
|
60 |
+
"num_attention_heads": 16,
|
61 |
+
"num_beam_groups": 1,
|
62 |
+
"num_beams": 1,
|
63 |
+
"num_hidden_layers": 24,
|
64 |
+
"num_return_sequences": 1,
|
65 |
+
"output_attentions": false,
|
66 |
+
"output_hidden_states": false,
|
67 |
+
"output_past": true,
|
68 |
+
"output_scores": false,
|
69 |
+
"pad_token_id": 0,
|
70 |
+
"pooler_fc_size": 768,
|
71 |
+
"pooler_num_attention_heads": 12,
|
72 |
+
"pooler_num_fc_layers": 3,
|
73 |
+
"pooler_size_per_head": 128,
|
74 |
+
"pooler_type": "first_token_transform",
|
75 |
+
"position_embedding_type": "absolute",
|
76 |
+
"prefix": null,
|
77 |
+
"problem_type": null,
|
78 |
+
"pruned_heads": {},
|
79 |
+
"remove_invalid_values": false,
|
80 |
+
"repetition_penalty": 1.0,
|
81 |
+
"return_dict": true,
|
82 |
+
"return_dict_in_generate": false,
|
83 |
+
"sep_token_id": null,
|
84 |
+
"task_specific_params": null,
|
85 |
+
"temperature": 1.0,
|
86 |
+
"tie_encoder_decoder": false,
|
87 |
+
"tie_word_embeddings": true,
|
88 |
+
"tokenizer_class": null,
|
89 |
+
"top_k": 50,
|
90 |
+
"top_p": 1.0,
|
91 |
+
"torch_dtype": null,
|
92 |
+
"torchscript": false,
|
93 |
+
"transformers_version": "4.9.1",
|
94 |
+
"type_vocab_size": 2,
|
95 |
+
"use_bfloat16": false,
|
96 |
+
"use_cache": true,
|
97 |
+
"vocab_size": 30522
|
98 |
+
},
|
99 |
+
"transformers_version": null,
|
100 |
+
"vision_config": {
|
101 |
+
"_name_or_path": "",
|
102 |
+
"add_cross_attention": false,
|
103 |
+
"architectures": null,
|
104 |
+
"attention_dropout": 0.0,
|
105 |
+
"bad_words_ids": null,
|
106 |
+
"bos_token_id": null,
|
107 |
+
"chunk_size_feed_forward": 0,
|
108 |
+
"decoder_start_token_id": null,
|
109 |
+
"diversity_penalty": 0.0,
|
110 |
+
"do_sample": false,
|
111 |
+
"dropout": 0.0,
|
112 |
+
"early_stopping": false,
|
113 |
+
"encoder_no_repeat_ngram_size": 0,
|
114 |
+
"eos_token_id": null,
|
115 |
+
"finetuning_task": null,
|
116 |
+
"forced_bos_token_id": null,
|
117 |
+
"forced_eos_token_id": null,
|
118 |
+
"gradient_checkpointing": false,
|
119 |
+
"hidden_act": "quick_gelu",
|
120 |
+
"hidden_size": 768,
|
121 |
+
"id2label": {
|
122 |
+
"0": "LABEL_0",
|
123 |
+
"1": "LABEL_1"
|
124 |
+
},
|
125 |
+
"image_size": 224,
|
126 |
+
"initializer_factor": 1.0,
|
127 |
+
"initializer_range": 0.02,
|
128 |
+
"intermediate_size": 3072,
|
129 |
+
"is_decoder": false,
|
130 |
+
"is_encoder_decoder": false,
|
131 |
+
"label2id": {
|
132 |
+
"LABEL_0": 0,
|
133 |
+
"LABEL_1": 1
|
134 |
+
},
|
135 |
+
"layer_norm_eps": 1e-05,
|
136 |
+
"length_penalty": 1.0,
|
137 |
+
"max_length": 20,
|
138 |
+
"min_length": 0,
|
139 |
+
"model_type": "clip_vision_model",
|
140 |
+
"no_repeat_ngram_size": 0,
|
141 |
+
"num_attention_heads": 12,
|
142 |
+
"num_beam_groups": 1,
|
143 |
+
"num_beams": 1,
|
144 |
+
"num_hidden_layers": 12,
|
145 |
+
"num_return_sequences": 1,
|
146 |
+
"output_attentions": false,
|
147 |
+
"output_hidden_states": false,
|
148 |
+
"output_scores": false,
|
149 |
+
"pad_token_id": null,
|
150 |
+
"patch_size": 32,
|
151 |
+
"prefix": null,
|
152 |
+
"problem_type": null,
|
153 |
+
"pruned_heads": {},
|
154 |
+
"remove_invalid_values": false,
|
155 |
+
"repetition_penalty": 1.0,
|
156 |
+
"return_dict": true,
|
157 |
+
"return_dict_in_generate": false,
|
158 |
+
"sep_token_id": null,
|
159 |
+
"task_specific_params": null,
|
160 |
+
"temperature": 1.0,
|
161 |
+
"tie_encoder_decoder": false,
|
162 |
+
"tie_word_embeddings": true,
|
163 |
+
"tokenizer_class": null,
|
164 |
+
"top_k": 50,
|
165 |
+
"top_p": 1.0,
|
166 |
+
"torch_dtype": null,
|
167 |
+
"torchscript": false,
|
168 |
+
"transformers_version": "4.9.1",
|
169 |
+
"use_bfloat16": false
|
170 |
+
}
|
171 |
+
}
|
training_v3_unfreeze/ckpt-43999/flax_model.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8c13f2e350d439af1d09da40a86e62b9f4c67bf72056edeb32283690e9b3527
|
3 |
+
size 1694082566
|
training_v3_unfreeze/ckpt-43999/opt_state.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:955e02d387978d3a44c07bb34a196ba6b65ec3fa6979020eb560d965f87c5a64
|
3 |
+
size 3388165359
|
training_v3_unfreeze/ckpt-43999/training_state.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 44000}
|
training_v3_unfreeze/logs/events.out.tfevents.1629166761.t1v-n-aa35e684-w-0.1228888.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eccc93a6e752377aafd17bae1d0e28344ee6a7455aa7f533338ab5aff4721601
|
3 |
+
size 6574429
|
training_v4/ckpt-70999/config.json
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"HybridCLIP"
|
4 |
+
],
|
5 |
+
"freeze_backbones": true,
|
6 |
+
"initializer_factor": 1.0,
|
7 |
+
"model_type": "hybrid-clip",
|
8 |
+
"projection_dim": 512,
|
9 |
+
"seed": 42,
|
10 |
+
"text_config": {
|
11 |
+
"_name_or_path": "./",
|
12 |
+
"add_cross_attention": false,
|
13 |
+
"architectures": [
|
14 |
+
"RobertaForMaskedLM"
|
15 |
+
],
|
16 |
+
"attention_probs_dropout_prob": 0.1,
|
17 |
+
"bad_words_ids": null,
|
18 |
+
"bos_token_id": 0,
|
19 |
+
"chunk_size_feed_forward": 0,
|
20 |
+
"decoder_start_token_id": null,
|
21 |
+
"diversity_penalty": 0.0,
|
22 |
+
"do_sample": false,
|
23 |
+
"early_stopping": false,
|
24 |
+
"encoder_no_repeat_ngram_size": 0,
|
25 |
+
"eos_token_id": 2,
|
26 |
+
"finetuning_task": null,
|
27 |
+
"forced_bos_token_id": null,
|
28 |
+
"forced_eos_token_id": null,
|
29 |
+
"gradient_checkpointing": false,
|
30 |
+
"hidden_act": "gelu",
|
31 |
+
"hidden_dropout_prob": 0.1,
|
32 |
+
"hidden_size": 768,
|
33 |
+
"id2label": {
|
34 |
+
"0": "LABEL_0",
|
35 |
+
"1": "LABEL_1"
|
36 |
+
},
|
37 |
+
"initializer_range": 0.02,
|
38 |
+
"intermediate_size": 3072,
|
39 |
+
"is_decoder": false,
|
40 |
+
"is_encoder_decoder": false,
|
41 |
+
"label2id": {
|
42 |
+
"LABEL_0": 0,
|
43 |
+
"LABEL_1": 1
|
44 |
+
},
|
45 |
+
"layer_norm_eps": 1e-05,
|
46 |
+
"length_penalty": 1.0,
|
47 |
+
"max_length": 20,
|
48 |
+
"max_position_embeddings": 514,
|
49 |
+
"min_length": 0,
|
50 |
+
"model_type": "roberta",
|
51 |
+
"no_repeat_ngram_size": 0,
|
52 |
+
"num_attention_heads": 12,
|
53 |
+
"num_beam_groups": 1,
|
54 |
+
"num_beams": 1,
|
55 |
+
"num_hidden_layers": 12,
|
56 |
+
"num_return_sequences": 1,
|
57 |
+
"output_attentions": false,
|
58 |
+
"output_hidden_states": false,
|
59 |
+
"output_scores": false,
|
60 |
+
"pad_token_id": 1,
|
61 |
+
"position_embedding_type": "absolute",
|
62 |
+
"prefix": null,
|
63 |
+
"problem_type": null,
|
64 |
+
"pruned_heads": {},
|
65 |
+
"remove_invalid_values": false,
|
66 |
+
"repetition_penalty": 1.0,
|
67 |
+
"return_dict": true,
|
68 |
+
"return_dict_in_generate": false,
|
69 |
+
"sep_token_id": null,
|
70 |
+
"task_specific_params": null,
|
71 |
+
"temperature": 1.0,
|
72 |
+
"tie_encoder_decoder": false,
|
73 |
+
"tie_word_embeddings": true,
|
74 |
+
"tokenizer_class": null,
|
75 |
+
"top_k": 50,
|
76 |
+
"top_p": 1.0,
|
77 |
+
"torch_dtype": "float32",
|
78 |
+
"torchscript": false,
|
79 |
+
"transformers_version": "4.9.1",
|
80 |
+
"type_vocab_size": 1,
|
81 |
+
"use_bfloat16": false,
|
82 |
+
"use_cache": true,
|
83 |
+
"vocab_size": 50265
|
84 |
+
},
|
85 |
+
"transformers_version": null,
|
86 |
+
"vision_config": {
|
87 |
+
"_name_or_path": "",
|
88 |
+
"add_cross_attention": false,
|
89 |
+
"architectures": null,
|
90 |
+
"attention_dropout": 0.0,
|
91 |
+
"bad_words_ids": null,
|
92 |
+
"bos_token_id": null,
|
93 |
+
"chunk_size_feed_forward": 0,
|
94 |
+
"decoder_start_token_id": null,
|
95 |
+
"diversity_penalty": 0.0,
|
96 |
+
"do_sample": false,
|
97 |
+
"dropout": 0.0,
|
98 |
+
"early_stopping": false,
|
99 |
+
"encoder_no_repeat_ngram_size": 0,
|
100 |
+
"eos_token_id": null,
|
101 |
+
"finetuning_task": null,
|
102 |
+
"forced_bos_token_id": null,
|
103 |
+
"forced_eos_token_id": null,
|
104 |
+
"gradient_checkpointing": false,
|
105 |
+
"hidden_act": "quick_gelu",
|
106 |
+
"hidden_size": 768,
|
107 |
+
"id2label": {
|
108 |
+
"0": "LABEL_0",
|
109 |
+
"1": "LABEL_1"
|
110 |
+
},
|
111 |
+
"image_size": 224,
|
112 |
+
"initializer_factor": 1.0,
|
113 |
+
"initializer_range": 0.02,
|
114 |
+
"intermediate_size": 3072,
|
115 |
+
"is_decoder": false,
|
116 |
+
"is_encoder_decoder": false,
|
117 |
+
"label2id": {
|
118 |
+
"LABEL_0": 0,
|
119 |
+
"LABEL_1": 1
|
120 |
+
},
|
121 |
+
"layer_norm_eps": 1e-05,
|
122 |
+
"length_penalty": 1.0,
|
123 |
+
"max_length": 20,
|
124 |
+
"min_length": 0,
|
125 |
+
"model_type": "clip_vision_model",
|
126 |
+
"no_repeat_ngram_size": 0,
|
127 |
+
"num_attention_heads": 12,
|
128 |
+
"num_beam_groups": 1,
|
129 |
+
"num_beams": 1,
|
130 |
+
"num_hidden_layers": 12,
|
131 |
+
"num_return_sequences": 1,
|
132 |
+
"output_attentions": false,
|
133 |
+
"output_hidden_states": false,
|
134 |
+
"output_scores": false,
|
135 |
+
"pad_token_id": null,
|
136 |
+
"patch_size": 32,
|
137 |
+
"prefix": null,
|
138 |
+
"problem_type": null,
|
139 |
+
"pruned_heads": {},
|
140 |
+
"remove_invalid_values": false,
|
141 |
+
"repetition_penalty": 1.0,
|
142 |
+
"return_dict": true,
|
143 |
+
"return_dict_in_generate": false,
|
144 |
+
"sep_token_id": null,
|
145 |
+
"task_specific_params": null,
|
146 |
+
"temperature": 1.0,
|
147 |
+
"tie_encoder_decoder": false,
|
148 |
+
"tie_word_embeddings": true,
|
149 |
+
"tokenizer_class": null,
|
150 |
+
"top_k": 50,
|
151 |
+
"top_p": 1.0,
|
152 |
+
"torch_dtype": null,
|
153 |
+
"torchscript": false,
|
154 |
+
"transformers_version": "4.9.1",
|
155 |
+
"use_bfloat16": false
|
156 |
+
}
|
157 |
+
}
|
training_v4/ckpt-70999/flax_model.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2bf35faf1b6c0ec8ca94221c4b85527367fbec594ad75e24fcd638aa955079ae
|
3 |
+
size 851566424
|
training_v4/ckpt-70999/opt_state.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c602513cf381db97b734e1d679292f017174181c17b029a0c7491236a314d2d6
|
3 |
+
size 1703133075
|
training_v4/ckpt-70999/training_state.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 71000}
|
training_v4/logs/events.out.tfevents.1629228812.t1v-n-aa35e684-w-0.1594221.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b3dda6b3b9543e835a4dc488f311121644489937528afdbdf8814a94cf8306c
|
3 |
+
size 10450335
|
training_v4_unfreeze/ckpt-43499/config.json
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"HybridCLIP"
|
4 |
+
],
|
5 |
+
"freeze_backbones": true,
|
6 |
+
"initializer_factor": 1.0,
|
7 |
+
"model_type": "hybrid-clip",
|
8 |
+
"projection_dim": 512,
|
9 |
+
"seed": 42,
|
10 |
+
"text_config": {
|
11 |
+
"_name_or_path": "./",
|
12 |
+
"add_cross_attention": false,
|
13 |
+
"architectures": [
|
14 |
+
"RobertaForMaskedLM"
|
15 |
+
],
|
16 |
+
"attention_probs_dropout_prob": 0.1,
|
17 |
+
"bad_words_ids": null,
|
18 |
+
"bos_token_id": 0,
|
19 |
+
"chunk_size_feed_forward": 0,
|
20 |
+
"decoder_start_token_id": null,
|
21 |
+
"diversity_penalty": 0.0,
|
22 |
+
"do_sample": false,
|
23 |
+
"early_stopping": false,
|
24 |
+
"encoder_no_repeat_ngram_size": 0,
|
25 |
+
"eos_token_id": 2,
|
26 |
+
"finetuning_task": null,
|
27 |
+
"forced_bos_token_id": null,
|
28 |
+
"forced_eos_token_id": null,
|
29 |
+
"gradient_checkpointing": false,
|
30 |
+
"hidden_act": "gelu",
|
31 |
+
"hidden_dropout_prob": 0.1,
|
32 |
+
"hidden_size": 768,
|
33 |
+
"id2label": {
|
34 |
+
"0": "LABEL_0",
|
35 |
+
"1": "LABEL_1"
|
36 |
+
},
|
37 |
+
"initializer_range": 0.02,
|
38 |
+
"intermediate_size": 3072,
|
39 |
+
"is_decoder": false,
|
40 |
+
"is_encoder_decoder": false,
|
41 |
+
"label2id": {
|
42 |
+
"LABEL_0": 0,
|
43 |
+
"LABEL_1": 1
|
44 |
+
},
|
45 |
+
"layer_norm_eps": 1e-05,
|
46 |
+
"length_penalty": 1.0,
|
47 |
+
"max_length": 20,
|
48 |
+
"max_position_embeddings": 514,
|
49 |
+
"min_length": 0,
|
50 |
+
"model_type": "roberta",
|
51 |
+
"no_repeat_ngram_size": 0,
|
52 |
+
"num_attention_heads": 12,
|
53 |
+
"num_beam_groups": 1,
|
54 |
+
"num_beams": 1,
|
55 |
+
"num_hidden_layers": 12,
|
56 |
+
"num_return_sequences": 1,
|
57 |
+
"output_attentions": false,
|
58 |
+
"output_hidden_states": false,
|
59 |
+
"output_scores": false,
|
60 |
+
"pad_token_id": 1,
|
61 |
+
"position_embedding_type": "absolute",
|
62 |
+
"prefix": null,
|
63 |
+
"problem_type": null,
|
64 |
+
"pruned_heads": {},
|
65 |
+
"remove_invalid_values": false,
|
66 |
+
"repetition_penalty": 1.0,
|
67 |
+
"return_dict": true,
|
68 |
+
"return_dict_in_generate": false,
|
69 |
+
"sep_token_id": null,
|
70 |
+
"task_specific_params": null,
|
71 |
+
"temperature": 1.0,
|
72 |
+
"tie_encoder_decoder": false,
|
73 |
+
"tie_word_embeddings": true,
|
74 |
+
"tokenizer_class": null,
|
75 |
+
"top_k": 50,
|
76 |
+
"top_p": 1.0,
|
77 |
+
"torch_dtype": "float32",
|
78 |
+
"torchscript": false,
|
79 |
+
"transformers_version": "4.9.1",
|
80 |
+
"type_vocab_size": 1,
|
81 |
+
"use_bfloat16": false,
|
82 |
+
"use_cache": true,
|
83 |
+
"vocab_size": 50265
|
84 |
+
},
|
85 |
+
"transformers_version": null,
|
86 |
+
"vision_config": {
|
87 |
+
"_name_or_path": "",
|
88 |
+
"add_cross_attention": false,
|
89 |
+
"architectures": null,
|
90 |
+
"attention_dropout": 0.0,
|
91 |
+
"bad_words_ids": null,
|
92 |
+
"bos_token_id": null,
|
93 |
+
"chunk_size_feed_forward": 0,
|
94 |
+
"decoder_start_token_id": null,
|
95 |
+
"diversity_penalty": 0.0,
|
96 |
+
"do_sample": false,
|
97 |
+
"dropout": 0.0,
|
98 |
+
"early_stopping": false,
|
99 |
+
"encoder_no_repeat_ngram_size": 0,
|
100 |
+
"eos_token_id": null,
|
101 |
+
"finetuning_task": null,
|
102 |
+
"forced_bos_token_id": null,
|
103 |
+
"forced_eos_token_id": null,
|
104 |
+
"gradient_checkpointing": false,
|
105 |
+
"hidden_act": "quick_gelu",
|
106 |
+
"hidden_size": 768,
|
107 |
+
"id2label": {
|
108 |
+
"0": "LABEL_0",
|
109 |
+
"1": "LABEL_1"
|
110 |
+
},
|
111 |
+
"image_size": 224,
|
112 |
+
"initializer_factor": 1.0,
|
113 |
+
"initializer_range": 0.02,
|
114 |
+
"intermediate_size": 3072,
|
115 |
+
"is_decoder": false,
|
116 |
+
"is_encoder_decoder": false,
|
117 |
+
"label2id": {
|
118 |
+
"LABEL_0": 0,
|
119 |
+
"LABEL_1": 1
|
120 |
+
},
|
121 |
+
"layer_norm_eps": 1e-05,
|
122 |
+
"length_penalty": 1.0,
|
123 |
+
"max_length": 20,
|
124 |
+
"min_length": 0,
|
125 |
+
"model_type": "clip_vision_model",
|
126 |
+
"no_repeat_ngram_size": 0,
|
127 |
+
"num_attention_heads": 12,
|
128 |
+
"num_beam_groups": 1,
|
129 |
+
"num_beams": 1,
|
130 |
+
"num_hidden_layers": 12,
|
131 |
+
"num_return_sequences": 1,
|
132 |
+
"output_attentions": false,
|
133 |
+
"output_hidden_states": false,
|
134 |
+
"output_scores": false,
|
135 |
+
"pad_token_id": null,
|
136 |
+
"patch_size": 32,
|
137 |
+
"prefix": null,
|
138 |
+
"problem_type": null,
|
139 |
+
"pruned_heads": {},
|
140 |
+
"remove_invalid_values": false,
|
141 |
+
"repetition_penalty": 1.0,
|
142 |
+
"return_dict": true,
|
143 |
+
"return_dict_in_generate": false,
|
144 |
+
"sep_token_id": null,
|
145 |
+
"task_specific_params": null,
|
146 |
+
"temperature": 1.0,
|
147 |
+
"tie_encoder_decoder": false,
|
148 |
+
"tie_word_embeddings": true,
|
149 |
+
"tokenizer_class": null,
|
150 |
+
"top_k": 50,
|
151 |
+
"top_p": 1.0,
|
152 |
+
"torch_dtype": null,
|
153 |
+
"torchscript": false,
|
154 |
+
"transformers_version": "4.9.1",
|
155 |
+
"use_bfloat16": false
|
156 |
+
}
|
157 |
+
}
|
training_v4_unfreeze/ckpt-43499/flax_model.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e52a2071cd240f4a93bc57f47ded0813813767dc933ad91fde99bf56592455b
|
3 |
+
size 851566424
|
training_v4_unfreeze/ckpt-43499/opt_state.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b0b2d70e1871d9a33dd1a2c23a4d348cea7e5f537046e0b475b75527881fc36b
|
3 |
+
size 1703133075
|
training_v4_unfreeze/ckpt-43499/training_state.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 43500}
|
training_v4_unfreeze/logs/events.out.tfevents.1629521842.t1v-n-aa35e684-w-0.2115740.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:883a9f7ced02a841c80cd4156cf6bde1cdb880e7d41e4ae67e336952c84f627c
|
3 |
+
size 6499510
|