new model version with better rouge

Browse files

Files changed (10) hide show

README.md +33 -11
onnx/decoder_model.onnx +1 -1
onnx/decoder_model_merged.onnx +1 -1
onnx/decoder_model_merged_quantized.onnx +1 -1
onnx/decoder_model_quantized.onnx +1 -1
onnx/decoder_with_past_model.onnx +1 -1
onnx/decoder_with_past_model_quantized.onnx +1 -1
onnx/encoder_model.onnx +1 -1
onnx/encoder_model_quantized.onnx +2 -2
quantize_config.json +68 -68

README.md CHANGED Viewed

@@ -24,16 +24,16 @@ pipeline_tag: image-to-text
 library_name: transformers.js
 ---
-# ViT-GPT2-FlowerCaptioner-ONNX
 This model is a fine-tuned version of [nlpconnect/vit-gpt2-image-captioning](https://huggingface.co/nlpconnect/vit-gpt2-image-captioning) on the [FlowerEvolver-dataset](https://huggingface.co/datasets/cristianglezm/FlowerEvolver-Dataset) dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.3075
-- Rouge1: 66.3702
-- Rouge2: 45.5642
-- Rougel: 61.401
-- Rougelsum: 64.0587
-- Gen Len: 49.97
 ## sample running code
@@ -74,15 +74,37 @@ The following hyperparameters were used during training:
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 500
-- num_epochs: 3
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Rouge1  | Rouge2  | Rougel  | Rougelsum | Gen Len |
 |:-------------:|:-----:|:----:|:---------------:|:-------:|:-------:|:-------:|:---------:|:-------:|
-| 0.6755        | 1.0   | 100  | 0.5339          | 60.9402 | 39.3331 | 54.6889 | 59.45     | 36.75   |
-| 0.3666        | 2.0   | 200  | 0.3331          | 65.5149 | 43.0245 | 59.3121 | 62.7329   | 52.82   |
-| 0.2983        | 3.0   | 300  | 0.3075          | 66.3702 | 45.5642 | 61.401  | 64.0587   | 49.97   |
 ### Framework versions

 library_name: transformers.js
 ---
+# ViT-GPT2-FlowerCaptioner
 This model is a fine-tuned version of [nlpconnect/vit-gpt2-image-captioning](https://huggingface.co/nlpconnect/vit-gpt2-image-captioning) on the [FlowerEvolver-dataset](https://huggingface.co/datasets/cristianglezm/FlowerEvolver-Dataset) dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.4930
+- Rouge1: 68.3498
+- Rouge2: 46.7534
+- Rougel: 62.3763
+- Rougelsum: 65.9575
+- Gen Len: 49.82
 ## sample running code
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 500
+- num_epochs: 25
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Rouge1  | Rouge2  | Rougel  | Rougelsum | Gen Len |
 |:-------------:|:-----:|:----:|:---------------:|:-------:|:-------:|:-------:|:---------:|:-------:|
+| 0.6986        | 1.0   | 100  | 0.5339          | 64.9813 | 42.4686 | 58.2586 | 63.3933   | 47.25   |
+| 0.3408        | 2.0   | 200  | 0.3263          | 67.5461 | 46.5219 | 62.7962 | 65.6509   | 47.39   |
+| 0.2797        | 3.0   | 300  | 0.2829          | 65.0704 | 42.0682 | 58.4268 | 63.2368   | 56.8    |
+| 0.2584        | 4.0   | 400  | 0.2588          | 65.5074 | 45.227  | 60.2469 | 63.4253   | 52.25   |
+| 0.2589        | 5.0   | 500  | 0.2607          | 66.7346 | 45.8264 | 61.7373 | 64.8857   | 50.64   |
+| 0.2179        | 6.0   | 600  | 0.2697          | 63.8334 | 42.997  | 58.1585 | 61.7704   | 52.43   |
+| 0.1662        | 7.0   | 700  | 0.2631          | 68.6188 | 48.3329 | 63.9474 | 66.6006   | 46.94   |
+| 0.161         | 8.0   | 800  | 0.2749          | 69.0046 | 48.1421 | 63.7844 | 66.8317   | 49.74   |
+| 0.1207        | 9.0   | 900  | 0.3117          | 70.0357 | 48.9002 | 64.416  | 67.7582   | 48.66   |
+| 0.0909        | 10.0  | 1000 | 0.3408          | 65.9578 | 45.2324 | 60.2838 | 63.7493   | 46.92   |
+| 0.0749        | 11.0  | 1100 | 0.3516          | 67.4244 | 46.1985 | 61.6408 | 65.5371   | 46.61   |
+| 0.0665        | 12.0  | 1200 | 0.3730          | 68.6911 | 47.7089 | 63.0381 | 66.6956   | 47.89   |
+| 0.0522        | 13.0  | 1300 | 0.3891          | 67.2365 | 45.4165 | 61.4063 | 64.857    | 48.91   |
+| 0.0355        | 14.0  | 1400 | 0.4128          | 69.1494 | 47.9278 | 63.3334 | 66.5969   | 50.55   |
+| 0.0309        | 15.0  | 1500 | 0.4221          | 66.2447 | 44.937  | 60.1403 | 63.8541   | 50.71   |
+| 0.0265        | 16.0  | 1600 | 0.4343          | 67.8178 | 46.7084 | 61.8173 | 65.4375   | 50.85   |
+| 0.0158        | 17.0  | 1700 | 0.4577          | 67.9846 | 45.9562 | 61.6353 | 65.7207   | 50.81   |
+| 0.0166        | 18.0  | 1800 | 0.4731          | 69.0971 | 47.7001 | 62.856  | 66.7796   | 50.01   |
+| 0.0121        | 19.0  | 1900 | 0.4657          | 68.1397 | 46.4258 | 62.2696 | 65.9332   | 49.15   |
+| 0.0095        | 20.0  | 2000 | 0.4793          | 68.6497 | 47.9446 | 63.0466 | 66.5409   | 50.96   |
+| 0.0086        | 21.0  | 2100 | 0.4780          | 68.4363 | 46.7296 | 62.359  | 66.2626   | 50.02   |
+| 0.0068        | 22.0  | 2200 | 0.4863          | 67.5415 | 46.0821 | 61.57   | 65.4613   | 49.5    |
+| 0.0061        | 23.0  | 2300 | 0.4892          | 68.1283 | 46.5802 | 62.0832 | 66.0203   | 50.21   |
+| 0.006         | 24.0  | 2400 | 0.4912          | 68.1723 | 46.3239 | 62.2007 | 65.6725   | 49.89   |
+| 0.0057        | 25.0  | 2500 | 0.4930          | 68.3498 | 46.7534 | 62.3763 | 65.9575   | 49.82   |
 ### Framework versions

onnx/decoder_model.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ca2595f2af7fb6d879eb3e80f4e0ee2958c5c2dce039ec9bfaddd677a7001b43
 size 613153019

 version https://git-lfs.github.com/spec/v1
+oid sha256:f51820bb5c0f5a4ed7a56f8cebc0941ea4e76cbcd8b92029e6a692fb01e5a078
 size 613153019

onnx/decoder_model_merged.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cdc6df54706d46d199dbe8bee353757c905d9dd2f4355b9f7771a33ca7a24f8d
 size 615070521

 version https://git-lfs.github.com/spec/v1
+oid sha256:bc7e62770352f6844e064cdf22cabb6b00c8f7d166c0839e07156f7d2bae73c5
 size 615070521

onnx/decoder_model_merged_quantized.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2585e1276bdb3da15ed1041c53593268721a174b0b39c0a28863544db6245ad2
 size 158063351

 version https://git-lfs.github.com/spec/v1
+oid sha256:b482ca61962a11ddd30d2a279a5680feedb4cc19e207fee8c2d29860e548deda
 size 158063351

onnx/decoder_model_quantized.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d67a2e2c9bd64f893cd4afe1126550ec9175309bfb522663205e6ed8bec23ba1
 size 155710792

 version https://git-lfs.github.com/spec/v1
+oid sha256:81802b5229986a911d3ad6e9007fb540702b3ee6b42cbb8e687b816dfd948712
 size 155710792

onnx/decoder_with_past_model.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:21dd43d250fc5300859594e7a111e393adc5822798186aa1c18a125351258197
 size 613149344

 version https://git-lfs.github.com/spec/v1
+oid sha256:d8e6cb92689b1b31c3a6f9e4f04e8cf052e6f00e1b433553dc399f4df833ea01
 size 613149344

onnx/decoder_with_past_model_quantized.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0c23e86396e4ce9b690ae55f5922f68cd58cffcd8ad1068deaefbcff4e3cea1c
 size 155701341

 version https://git-lfs.github.com/spec/v1
+oid sha256:bcac5392f1f43d1fb64a0c4c4a9d8e32cc87d88213905997f43e72e73e660750
 size 155701341

onnx/encoder_model.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:091df2b5b67fde5a63f2323d5346063e0e1b312caa3e8aeeb62c382c4344d77c
 size 343410667

 version https://git-lfs.github.com/spec/v1
+oid sha256:4d18e307852a5dca94a0e2d2457e4eb3d397820ca509caa7d80aac41fba4b9aa
 size 343410667

onnx/encoder_model_quantized.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fabd01cd11696da81627401da9ef9bc6dea70d4995be902034b0ddfa2acdacbd
-size 87000252

 version https://git-lfs.github.com/spec/v1
+oid sha256:e7f417ac897abb1ab3f6df34df104737f9da1beb7ecafab5798bbb128fd9f90e
+size 87000254

quantize_config.json CHANGED Viewed

@@ -4,114 +4,114 @@
     "per_model_config": {
         "decoder_model": {
             "op_types": [
-                "Squeeze",
-                "MatMul",
                 "Pow",
-                "Tanh",
-                "Gather",
                 "Sub",
                 "ConstantOfShape",
-                "Mul",
                 "Where",
-                "Concat",
-                "Transpose",
                 "Div",
-                "Add",
-                "Slice",
                 "Unsqueeze",
-                "Softmax",
-                "Gemm",
-                "Constant",
-                "Range",
-                "LayerNormalization",
-                "Reshape",
-                "Shape",
-                "Cast",
-                "Split"
             ],
             "weight_type": "QInt8"
         },
         "decoder_model_merged": {
             "op_types": [
-                "Squeeze",
-                "MatMul",
                 "Pow",
-                "Tanh",
-                "Gather",
                 "If",
                 "Sub",
                 "ConstantOfShape",
-                "Mul",
                 "Where",
-                "Concat",
-                "Transpose",
                 "Div",
-                "Add",
-                "Slice",
                 "Unsqueeze",
-                "Softmax",
-                "Gemm",
-                "Constant",
-                "Range",
-                "LayerNormalization",
-                "Reshape",
-                "Shape",
-                "Cast",
-                "Split"
             ],
             "weight_type": "QInt8"
         },
         "decoder_with_past_model": {
             "op_types": [
-                "Squeeze",
-                "MatMul",
                 "Pow",
-                "Tanh",
-                "Gather",
                 "Sub",
                 "ConstantOfShape",
-                "Mul",
                 "Where",
-                "Concat",
-                "Transpose",
                 "Div",
-                "Add",
-                "Slice",
                 "Unsqueeze",
-                "Softmax",
-                "Gemm",
-                "Constant",
-                "Range",
-                "LayerNormalization",
-                "Reshape",
-                "Shape",
-                "Cast",
-                "Split"
             ],
             "weight_type": "QInt8"
         },
         "encoder_model": {
             "op_types": [
                 "MatMul",
-                "Gather",
-                "ConstantOfShape",
-                "Where",
                 "Mul",
-                "Expand",
                 "Concat",
                 "Transpose",
-                "Conv",
                 "Div",
-                "Add",
-                "Equal",
-                "Slice",
                 "Unsqueeze",
-                "Softmax",
-                "Constant",
-                "LayerNormalization",
-                "Reshape",
-                "Shape",
-                "Erf"
             ],
             "weight_type": "QUInt8"
         }

     "per_model_config": {
         "decoder_model": {
             "op_types": [
                 "Pow",
+                "Split",
+                "Softmax",
+                "Reshape",
+                "Range",
+                "Slice",
+                "Mul",
+                "Constant",
+                "Gemm",
+                "Shape",
                 "Sub",
+                "Concat",
+                "Tanh",
                 "ConstantOfShape",
+                "LayerNormalization",
+                "Cast",
+                "Squeeze",
                 "Where",
                 "Div",
+                "Gather",
+                "Transpose",
+                "MatMul",
                 "Unsqueeze",
+                "Add"
             ],
             "weight_type": "QInt8"
         },
         "decoder_model_merged": {
             "op_types": [
                 "Pow",
                 "If",
+                "Split",
+                "Softmax",
+                "Reshape",
+                "Range",
+                "Slice",
+                "Mul",
+                "Constant",
+                "Gemm",
+                "Shape",
                 "Sub",
+                "Concat",
+                "Tanh",
                 "ConstantOfShape",
+                "LayerNormalization",
+                "Cast",
+                "Squeeze",
                 "Where",
                 "Div",
+                "Gather",
+                "Transpose",
+                "MatMul",
                 "Unsqueeze",
+                "Add"
             ],
             "weight_type": "QInt8"
         },
         "decoder_with_past_model": {
             "op_types": [
                 "Pow",
+                "Split",
+                "Softmax",
+                "Reshape",
+                "Range",
+                "Slice",
+                "Mul",
+                "Constant",
+                "Gemm",
+                "Shape",
                 "Sub",
+                "Concat",
+                "Tanh",
                 "ConstantOfShape",
+                "LayerNormalization",
+                "Cast",
+                "Squeeze",
                 "Where",
                 "Div",
+                "Gather",
+                "Transpose",
+                "MatMul",
                 "Unsqueeze",
+                "Add"
             ],
             "weight_type": "QInt8"
         },
         "encoder_model": {
             "op_types": [
+                "Softmax",
+                "Reshape",
+                "Conv",
+                "Expand",
+                "Slice",
                 "MatMul",
                 "Mul",
+                "Constant",
+                "Erf",
+                "Shape",
                 "Concat",
+                "ConstantOfShape",
+                "LayerNormalization",
+                "Equal",
+                "Where",
+                "Gather",
                 "Transpose",
                 "Div",
                 "Unsqueeze",
+                "Add"
             ],
             "weight_type": "QUInt8"
         }