Doron Adler commited on
Commit
9795464
1 Parent(s): bffdb0a

Further train for another 800K steps on CC-100, Twitter, Updated Wikipedia

Browse files
.gitattributes CHANGED
@@ -16,3 +16,4 @@
16
  *.pth filter=lfs diff=lfs merge=lfs -text
17
  *tfevents* filter=lfs diff=lfs merge=lfs -text
18
  model.onnx filter=lfs diff=lfs merge=lfs -text
 
 
16
  *.pth filter=lfs diff=lfs merge=lfs -text
17
  *tfevents* filter=lfs diff=lfs merge=lfs -text
18
  model.onnx filter=lfs diff=lfs merge=lfs -text
19
+ tf_model.h5 filter=lfs diff=lfs merge=lfs -text
config.json CHANGED
@@ -26,7 +26,9 @@
26
  "n_layer": 6,
27
  "n_positions": 1024,
28
  "pad_token_id": 50257,
 
29
  "resid_pdrop": 0.1,
 
30
  "scale_attn_weights": true,
31
  "summary_activation": null,
32
  "summary_first_dropout": 0.1,
@@ -40,7 +42,7 @@
40
  }
41
  },
42
  "torch_dtype": "float32",
43
- "transformers_version": "4.9.0.dev0",
44
  "use_cache": true,
45
  "vocab_size": 50257
46
  }
 
26
  "n_layer": 6,
27
  "n_positions": 1024,
28
  "pad_token_id": 50257,
29
+ "reorder_and_upcast_attn": false,
30
  "resid_pdrop": 0.1,
31
+ "scale_attn_by_inverse_layer_idx": false,
32
  "scale_attn_weights": true,
33
  "summary_activation": null,
34
  "summary_first_dropout": 0.1,
 
42
  }
43
  },
44
  "torch_dtype": "float32",
45
+ "transformers_version": "4.22.0.dev0",
46
  "use_cache": true,
47
  "vocab_size": 50257
48
  }
distilgpt2-base-pretrained-he.mlpackage/Data/com.apple.CoreML/FeatureDescriptions.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "Outputs" : {
3
  "output_logits" : {
4
- "MLFeatureShortDescription" : ""
5
  }
6
  },
7
  "Inputs" : {
8
  "position_ids" : {
9
- "MLFeatureShortDescription" : ""
10
  },
11
  "input_ids" : {
12
- "MLFeatureShortDescription" : ""
13
  }
14
  },
15
  "TrainingInputs" : {
 
1
  {
2
  "Outputs" : {
3
  "output_logits" : {
4
+ "MLFeatureShortDescription" : "--"
5
  }
6
  },
7
  "Inputs" : {
8
  "position_ids" : {
9
+ "MLFeatureShortDescription" : "--"
10
  },
11
  "input_ids" : {
12
+ "MLFeatureShortDescription" : "--"
13
  }
14
  },
15
  "TrainingInputs" : {
distilgpt2-base-pretrained-he.mlpackage/Data/com.apple.CoreML/Metadata.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "MLModelVersionStringKey" : "1.0",
3
  "MLModelDescriptionKey" : "hebrew-distilgpt2\n\nA tiny GPT2 based Hebrew text generation model trained on a TPUv3-8 via the TPU Research Cloud Program.",
4
  "MLModelCreatorDefinedKey" : {
5
  "model_card_url" : "https:\/\/huggingface.co\/Norod78\/distilgpt2-base-pretrained-he"
 
1
  {
2
+ "MLModelVersionStringKey" : "1.01",
3
  "MLModelDescriptionKey" : "hebrew-distilgpt2\n\nA tiny GPT2 based Hebrew text generation model trained on a TPUv3-8 via the TPU Research Cloud Program.",
4
  "MLModelCreatorDefinedKey" : {
5
  "model_card_url" : "https:\/\/huggingface.co\/Norod78\/distilgpt2-base-pretrained-he"
distilgpt2-base-pretrained-he.mlpackage/Data/com.apple.CoreML/model.mlmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dacb6071938fc719d81c51db527ae9a756d4292c183cf05b8d3e646340a1544e
3
  size 482254328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3873d1a72c554711785b15a1b27af3824f1269c2efdc1de65181f1031b6565fa
3
  size 482254328
distilgpt2-base-pretrained-he.mlpackage/Manifest.json CHANGED
@@ -1,24 +1,24 @@
1
  {
2
  "fileFormatVersion": "1.0.0",
3
  "itemInfoEntries": {
4
- "06B636B9-5BE3-49A2-A06B-C09242D53296": {
5
  "author": "com.apple.CoreML",
6
  "description": "External FeatureDescription Overlay",
7
  "name": "FeatureDescriptions.json",
8
  "path": "com.apple.CoreML/FeatureDescriptions.json"
9
  },
10
- "A8586C2A-1DD7-4EEC-BB42-6B6242D7B530": {
 
 
 
 
 
 
11
  "author": "com.apple.CoreML",
12
  "description": "External Metadata Overlay",
13
  "name": "Metadata.json",
14
  "path": "com.apple.CoreML/Metadata.json"
15
- },
16
- "FD401BB0-2CA8-4DB7-BAD6-23B171A68404": {
17
- "author": "com.apple.CoreML",
18
- "description": "CoreML Model Specification",
19
- "name": "distilgpt2-base-pretrained-he-64-6.mlmodel",
20
- "path": "com.apple.CoreML/model.mlmodel"
21
  }
22
  },
23
- "rootModelIdentifier": "FD401BB0-2CA8-4DB7-BAD6-23B171A68404"
24
  }
 
1
  {
2
  "fileFormatVersion": "1.0.0",
3
  "itemInfoEntries": {
4
+ "5CA8030A-3376-40B9-9F77-FE7151EBE0F7": {
5
  "author": "com.apple.CoreML",
6
  "description": "External FeatureDescription Overlay",
7
  "name": "FeatureDescriptions.json",
8
  "path": "com.apple.CoreML/FeatureDescriptions.json"
9
  },
10
+ "5EFA4247-BC5C-47CE-9E64-F1747845A076": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Specification",
13
+ "name": "model.mlmodel",
14
+ "path": "com.apple.CoreML/model.mlmodel"
15
+ },
16
+ "63DB2BDF-B3FD-4CE4-9251-66F29CF34545": {
17
  "author": "com.apple.CoreML",
18
  "description": "External Metadata Overlay",
19
  "name": "Metadata.json",
20
  "path": "com.apple.CoreML/Metadata.json"
 
 
 
 
 
 
21
  }
22
  },
23
+ "rootModelIdentifier": "5EFA4247-BC5C-47CE-9E64-F1747845A076"
24
  }
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e802bdbb1a2aad4bce6864d4c79016213f70e7b877756cad0684a7c9bf70b0eb
3
  size 327652826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fda64ecb202c9ee0b7b7575f8db1c84efd42967f979ff7d1b2f2b0d32fd8b801
3
  size 327652826
model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b9459cc41adc2c406570135eb583a5d351b2f6ea636875d831cd2c36cfcc674
3
  size 488438673
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5276bfcba7af74619c75c10e9115f4198e2e30352a7cf0e7155c5152f236360
3
  size 488438673
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2b55dc856c55569caf9944826aa90c5342e3138e46525af27fb7729dba37ad2
3
- size 333973519
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bef275decdf17187b39d8e6e0a4cd1213c9d8970b2dc737c9139152d9180b295
3
+ size 333969117
tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21b09d55184b71e392d4b6ef2d1e064b34668413849a99d1a86a7312f6cf151e
3
+ size 327744824
tokenizer.json CHANGED
@@ -53,17 +53,20 @@
53
  "pre_tokenizer": {
54
  "type": "ByteLevel",
55
  "add_prefix_space": false,
56
- "trim_offsets": true
 
57
  },
58
  "post_processor": {
59
  "type": "ByteLevel",
60
  "add_prefix_space": true,
61
- "trim_offsets": false
 
62
  },
63
  "decoder": {
64
  "type": "ByteLevel",
65
  "add_prefix_space": true,
66
- "trim_offsets": true
 
67
  },
68
  "model": {
69
  "type": "BPE",
 
53
  "pre_tokenizer": {
54
  "type": "ByteLevel",
55
  "add_prefix_space": false,
56
+ "trim_offsets": true,
57
+ "use_regex": true
58
  },
59
  "post_processor": {
60
  "type": "ByteLevel",
61
  "add_prefix_space": true,
62
+ "trim_offsets": false,
63
+ "use_regex": true
64
  },
65
  "decoder": {
66
  "type": "ByteLevel",
67
  "add_prefix_space": true,
68
+ "trim_offsets": true,
69
+ "use_regex": true
70
  },
71
  "model": {
72
  "type": "BPE",
events.out.tfevents.1626785892.t1v-n-d9fb8529-w-0.668251.3.v2 → training_args.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e056841ed96b9de2cb1af0f327c3d566444a58da894d4e00021fe053564ac26
3
- size 1619079
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2e3602b258d62c2b8a7bd97787a1787754e6a8e44f598e6a5cd298cb5e3fe5a
3
+ size 3439