dirkgr commited on
Commit
67b337b
1 Parent(s): 0715e62

Update repository

Browse files
.gitattributes CHANGED
@@ -25,3 +25,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ *.th filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - allennlp
4
+ ---
5
+
6
+ # TODO: Fill this model card
config.json ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_reader": {
3
+ "type": "drop",
4
+ "instance_format": "drop",
5
+ "passage_length_limit": 400,
6
+ "question_length_limit": 50,
7
+ "skip_when_all_empty": [
8
+ "passage_span",
9
+ "question_span",
10
+ "addition_subtraction",
11
+ "counting"
12
+ ],
13
+ "token_indexers": {
14
+ "token_characters": {
15
+ "type": "characters",
16
+ "min_padding_length": 5
17
+ },
18
+ "tokens": {
19
+ "type": "single_id",
20
+ "lowercase_tokens": true
21
+ }
22
+ }
23
+ },
24
+ "model": {
25
+ "type": "naqanet",
26
+ "answering_abilities": [
27
+ "passage_span_extraction",
28
+ "question_span_extraction",
29
+ "addition_subtraction",
30
+ "counting"
31
+ ],
32
+ "dropout_prob": 0.1,
33
+ "matrix_attention_layer": {
34
+ "type": "linear",
35
+ "combination": "x,y,x*y",
36
+ "tensor_1_dim": 128,
37
+ "tensor_2_dim": 128
38
+ },
39
+ "modeling_layer": {
40
+ "type": "qanet_encoder",
41
+ "attention_dropout_prob": 0,
42
+ "attention_projection_dim": 128,
43
+ "conv_kernel_size": 5,
44
+ "dropout_prob": 0.1,
45
+ "feedforward_hidden_dim": 128,
46
+ "hidden_dim": 128,
47
+ "input_dim": 128,
48
+ "layer_dropout_undecayed_prob": 0.1,
49
+ "num_attention_heads": 8,
50
+ "num_blocks": 6,
51
+ "num_convs_per_block": 2
52
+ },
53
+ "num_highway_layers": 2,
54
+ "phrase_layer": {
55
+ "type": "qanet_encoder",
56
+ "attention_dropout_prob": 0,
57
+ "attention_projection_dim": 128,
58
+ "conv_kernel_size": 7,
59
+ "dropout_prob": 0.1,
60
+ "feedforward_hidden_dim": 128,
61
+ "hidden_dim": 128,
62
+ "input_dim": 128,
63
+ "layer_dropout_undecayed_prob": 0.1,
64
+ "num_attention_heads": 8,
65
+ "num_blocks": 1,
66
+ "num_convs_per_block": 4
67
+ },
68
+ "regularizer": {
69
+ "regexes": [
70
+ [
71
+ ".*",
72
+ {
73
+ "alpha": 1e-07,
74
+ "type": "l2"
75
+ }
76
+ ]
77
+ ]
78
+ },
79
+ "text_field_embedder": {
80
+ "token_embedders": {
81
+ "token_characters": {
82
+ "type": "character_encoding",
83
+ "embedding": {
84
+ "embedding_dim": 64
85
+ },
86
+ "encoder": {
87
+ "type": "cnn",
88
+ "embedding_dim": 64,
89
+ "ngram_filter_sizes": [
90
+ 5
91
+ ],
92
+ "num_filters": 200
93
+ }
94
+ },
95
+ "tokens": {
96
+ "type": "embedding",
97
+ "embedding_dim": 300,
98
+ "pretrained_file": "https://allennlp.s3.amazonaws.com/datasets/glove/glove.840B.300d.lower.converted.zip",
99
+ "trainable": false
100
+ }
101
+ }
102
+ }
103
+ },
104
+ "train_data_path": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/drop/drop_dataset.zip!drop_dataset/drop_dataset_train.json",
105
+ "validation_data_path": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/drop/drop_dataset.zip!drop_dataset/drop_dataset_dev.json",
106
+ "trainer": {
107
+ "callbacks": [
108
+ "tensorboard"
109
+ ],
110
+ "grad_norm": 5,
111
+ "moving_average": {
112
+ "type": "exponential",
113
+ "decay": 0.9999
114
+ },
115
+ "num_epochs": 50,
116
+ "optimizer": {
117
+ "type": "adam",
118
+ "betas": [
119
+ 0.8,
120
+ 0.999
121
+ ],
122
+ "eps": 1e-07,
123
+ "lr": 0.0005
124
+ },
125
+ "patience": 10,
126
+ "validation_metric": "+f1"
127
+ },
128
+ "vocabulary": {
129
+ "min_count": {
130
+ "token_characters": 200
131
+ },
132
+ "only_include_pretrained_words": true,
133
+ "pretrained_files": {
134
+ "tokens": "https://allennlp.s3.amazonaws.com/datasets/glove/glove.840B.300d.lower.converted.zip"
135
+ }
136
+ },
137
+ "data_loader": {
138
+ "batch_sampler": {
139
+ "type": "bucket",
140
+ "batch_size": 16
141
+ }
142
+ },
143
+ "validation_dataset_reader": {
144
+ "type": "drop",
145
+ "instance_format": "drop",
146
+ "passage_length_limit": 1000,
147
+ "question_length_limit": 100,
148
+ "skip_when_all_empty": [],
149
+ "token_indexers": {
150
+ "token_characters": {
151
+ "type": "characters",
152
+ "min_padding_length": 5
153
+ },
154
+ "tokens": {
155
+ "type": "single_id",
156
+ "lowercase_tokens": true
157
+ }
158
+ }
159
+ }
160
+ }
log/train/events.out.tfevents.1641518630.allennlp-server4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1168859509f2ce13825815b6ba6100912e3e4a8308acfe3e53e2b5e6f0286745
3
+ size 775836
log/validation/events.out.tfevents.1641518630.allennlp-server4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49f9f6ee3e67afd73090e6e9b491af6fe040c52ef8b45530a72b102800df142a
3
+ size 10640
metrics.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_epoch": 45,
3
+ "peak_worker_0_memory_MB": 11193.4921875,
4
+ "peak_gpu_0_memory_MB": 6340.828125,
5
+ "training_duration": "13:11:18.777333",
6
+ "epoch": 49,
7
+ "training_em": 0.5403715897718537,
8
+ "training_f1": 0.5968561358724769,
9
+ "training_loss": 1.685971354938646,
10
+ "training_reg_loss": 0.006671493937393214,
11
+ "training_worker_0_memory_MB": 11193.4921875,
12
+ "training_gpu_0_memory_MB": 6132.47216796875,
13
+ "validation_em": 0.46476510067114096,
14
+ "validation_f1": 0.49971371644295354,
15
+ "validation_loss": Infinity,
16
+ "validation_reg_loss": 0.0,
17
+ "best_validation_em": 0.4634018456375839,
18
+ "best_validation_f1": 0.4998416526845643,
19
+ "best_validation_loss": Infinity,
20
+ "best_validation_reg_loss": 0.0
21
+ }
vocabulary/.lock ADDED
File without changes
vocabulary/non_padded_namespaces.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *tags
2
+ *labels
vocabulary/token_characters.txt ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @@UNKNOWN@@
2
+ e
3
+ a
4
+ t
5
+ n
6
+ o
7
+ r
8
+ i
9
+ s
10
+ h
11
+ d
12
+ l
13
+ c
14
+ u
15
+ f
16
+ g
17
+ w
18
+ m
19
+ y
20
+ p
21
+ ,
22
+ .
23
+ b
24
+ k
25
+ v
26
+ 1
27
+ -
28
+ T
29
+ 0
30
+ 2
31
+ B
32
+ C
33
+ S
34
+ 3
35
+ 4
36
+ R
37
+ D
38
+ A
39
+ 5
40
+ 9
41
+ M
42
+ q
43
+ I
44
+ W
45
+ 7
46
+ 6
47
+ P
48
+ H
49
+ 8
50
+ F
51
+ J
52
+ L
53
+ G
54
+ N
55
+ '
56
+ E
57
+ %
58
+ x
59
+ )
60
+ (
61
+ ?
62
+ O
63
+ z
64
+ K
65
+ V
66
+ U
67
+ j
68
+ Q
69
+ "
70
+ ;
71
+ :
72
+ Y
73
+ &
74
+ /
75
+ #
76
+ $
77
+
78
+ Z
79
+ ²
80
+ é
81
+ X
82
+
83
+ á
84
+ ü
85
+
86
+ ó
87
+ ł
88
+ í
89
+ ö
90
+ +
91
+ £
92
+ ō
93
+ °
94
+ ä
95
+ š
96
+ è
97
+ ć
98
+ ç
99
+
100
+ Š
101
+ ø
102
+
103
+
104
+ ė
105
+ É
106
+ å
107
+ ú
108
+ č
109
+ ñ
110
+ ı
111
+ Ö
112
+ â
113
+ ž
114
+ ă
115
+ ū
116
+ Ō
117
+ ș
118
+ Á
vocabulary/tokens.txt ADDED
The diff for this file is too large to render. See raw diff
 
weights.th ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccfded94830c74c26457a4bc010c9621664d2ac16bfe3dec0173317ff1fe2595
3
+ size 64354621