color_descriptions

Browse files

Files changed (13) hide show

README.md +6 -31
adapter_config.json +5 -3
adapter_model.safetensors +2 -2
runs/Jul19_00-59-20_ip-172-18-179-228.us-west-2.compute.internal/events.out.tfevents.1721350777.ip-172-18-179-228.us-west-2.compute.internal.3457.0 +3 -0
runs/Jul19_01-03-17_ip-172-18-179-228.us-west-2.compute.internal/events.out.tfevents.1721351010.ip-172-18-179-228.us-west-2.compute.internal.3457.1 +3 -0
runs/Jul19_01-04-11_ip-172-18-179-228.us-west-2.compute.internal/events.out.tfevents.1721351090.ip-172-18-179-228.us-west-2.compute.internal.3457.2 +3 -0
runs/Jul19_01-11-33_ip-172-18-179-228.us-west-2.compute.internal/events.out.tfevents.1721351504.ip-172-18-179-228.us-west-2.compute.internal.4758.0 +3 -0
runs/Jul19_02-02-18_ip-172-18-179-228.us-west-2.compute.internal/events.out.tfevents.1721354552.ip-172-18-179-228.us-west-2.compute.internal.4758.1 +3 -0
runs/Jul19_04-32-22_ip-172-18-179-228.us-west-2.compute.internal/events.out.tfevents.1721363551.ip-172-18-179-228.us-west-2.compute.internal.4758.2 +3 -0
runs/Jul19_04-33-51_ip-172-18-179-228.us-west-2.compute.internal/events.out.tfevents.1721363639.ip-172-18-179-228.us-west-2.compute.internal.4758.3 +3 -0
tokenizer.json +6 -28
tokenizer_config.json +1 -1
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -17,8 +17,6 @@ should probably proofread and complete it, then remove this comment. -->
 # results
 This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) on an unknown dataset.
-It achieves the following results on the evaluation set:
-- Loss: 0.5881
 ## Model description
@@ -37,48 +35,25 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 0.0002
 - train_batch_size: 16
-- eval_batch_size: 4
 - seed: 42
-- gradient_accumulation_steps: 4
-- total_train_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.03
-- num_epochs: 2
 ### Training results
-| Training Loss | Epoch  | Step | Validation Loss |
-|:-------------:|:------:|:----:|:---------------:|
-| 0.6511        | 0.0948 | 50   | 0.7913          |
-| 0.5479        | 0.1896 | 100  | 0.7001          |
-| 0.5125        | 0.2844 | 150  | 0.6768          |
-| 0.4974        | 0.3791 | 200  | 0.6564          |
-| 0.4947        | 0.4739 | 250  | 0.6490          |
-| 0.4802        | 0.5687 | 300  | 0.6383          |
-| 0.4762        | 0.6635 | 350  | 0.6289          |
-| 0.4678        | 0.7583 | 400  | 0.6233          |
-| 0.4742        | 0.8531 | 450  | 0.6157          |
-| 0.4633        | 0.9479 | 500  | 0.6127          |
-| 0.6096        | 1.0427 | 550  | 0.6027          |
-| 0.6137        | 1.1374 | 600  | 0.5986          |
-| 0.6163        | 1.2322 | 650  | 0.5963          |
-| 0.6078        | 1.3270 | 700  | 0.5943          |
-| 0.6019        | 1.4218 | 750  | 0.5921          |
-| 0.615         | 1.5166 | 800  | 0.5906          |
-| 0.6061        | 1.6114 | 850  | 0.5897          |
-| 0.6106        | 1.7062 | 900  | 0.5890          |
-| 0.6027        | 1.8009 | 950  | 0.5886          |
-| 0.6094        | 1.8957 | 1000 | 0.5883          |
-| 0.5261        | 1.9905 | 1050 | 0.5881          |
 ### Framework versions
 - PEFT 0.11.1
 - Transformers 4.42.4
-- Pytorch 2.3.1+cu121
 - Datasets 2.20.0
 - Tokenizers 0.19.1

 # results
 This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) on an unknown dataset.
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 5e-05
 - train_batch_size: 16
+- eval_batch_size: 16
 - seed: 42
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 32
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.03
+- num_epochs: 4
 ### Training results
 ### Framework versions
 - PEFT 0.11.1
 - Transformers 4.42.4
+- Pytorch 2.2.1+cu121
 - Datasets 2.20.0
 - Tokenizers 0.19.1

adapter_config.json CHANGED Viewed

@@ -20,11 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "gate_proj",
-    "k_proj",
     "o_proj",
     "q_proj",
-    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "o_proj",
+    "up_proj",
+    "v_proj",
     "q_proj",
+    "k_proj",
+    "gate_proj",
+    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1a2f716d20045f5a32c6b9a608f618466fe254493721f18005d2f1dd889029eb
-size 57701064

 version https://git-lfs.github.com/spec/v1
+oid sha256:645abf6023751672c8614dddd0d1ee30fa2675b3814886885fb9520d2face926
+size 100966336

runs/Jul19_00-59-20_ip-172-18-179-228.us-west-2.compute.internal/events.out.tfevents.1721350777.ip-172-18-179-228.us-west-2.compute.internal.3457.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9cc54f55514bc1f151f2e4020230daea977ea40ac20ece589abae1eb2c12039d
+size 5692

runs/Jul19_01-03-17_ip-172-18-179-228.us-west-2.compute.internal/events.out.tfevents.1721351010.ip-172-18-179-228.us-west-2.compute.internal.3457.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ef7d5908131af25807a4cf5bfff2bdec65469af6b8060c697869f4e861e5e05d
+size 5692

runs/Jul19_01-04-11_ip-172-18-179-228.us-west-2.compute.internal/events.out.tfevents.1721351090.ip-172-18-179-228.us-west-2.compute.internal.3457.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d9090eaa40c6aba1a3a90f5a7c055d4d70dec79c9e19b0e00d9826fc8486e85a
+size 4184

runs/Jul19_01-11-33_ip-172-18-179-228.us-west-2.compute.internal/events.out.tfevents.1721351504.ip-172-18-179-228.us-west-2.compute.internal.4758.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:006f9639b20c8a6cac56e074c518fa863febd289b687887488a9236d1eea0c08
+size 13801

runs/Jul19_02-02-18_ip-172-18-179-228.us-west-2.compute.internal/events.out.tfevents.1721354552.ip-172-18-179-228.us-west-2.compute.internal.4758.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d26e819e9ed3bf2849bb33bf63a34187b22adce70676c0c27caecb09bbe33404
+size 209850

runs/Jul19_04-32-22_ip-172-18-179-228.us-west-2.compute.internal/events.out.tfevents.1721363551.ip-172-18-179-228.us-west-2.compute.internal.4758.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5df9aa8e123395c1f7631afb3a48a5cda27c8aa4730ef75d2545d364d2d3586
+size 8584

runs/Jul19_04-33-51_ip-172-18-179-228.us-west-2.compute.internal/events.out.tfevents.1721363639.ip-172-18-179-228.us-west-2.compute.internal.4758.3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1c480f55a4e64a0ddf47cab4b8e4fdb6c76ab60679366f8d846330f5de461ef5
+size 895952

tokenizer.json CHANGED Viewed

@@ -1,6 +1,11 @@
 {
   "version": "1.0",
-  "truncation": null,
   "padding": null,
   "added_tokens": [
     {
@@ -62,12 +67,6 @@
           "id": "A",
           "type_id": 0
         }
-      },
-      {
-        "SpecialToken": {
-          "id": "</s>",
-          "type_id": 0
-        }
       }
     ],
     "pair": [
@@ -83,12 +82,6 @@
           "type_id": 0
         }
       },
-      {
-        "SpecialToken": {
-          "id": "</s>",
-          "type_id": 0
-        }
-      },
       {
         "SpecialToken": {
           "id": "<s>",
@@ -100,24 +93,9 @@
           "id": "B",
           "type_id": 1
         }
-      },
-      {
-        "SpecialToken": {
-          "id": "</s>",
-          "type_id": 1
-        }
       }
     ],
     "special_tokens": {
-      "</s>": {
-        "id": "</s>",
-        "ids": [
-          2
-        ],
-        "tokens": [
-          "</s>"
-        ]
-      },
       "<s>": {
         "id": "<s>",
         "ids": [

 {
   "version": "1.0",
+  "truncation": {
+    "direction": "Right",
+    "max_length": 4096,
+    "strategy": "LongestFirst",
+    "stride": 0
+  },
   "padding": null,
   "added_tokens": [
     {
           "id": "A",
           "type_id": 0
         }
       }
     ],
     "pair": [
           "type_id": 0
         }
       },
       {
         "SpecialToken": {
           "id": "<s>",
           "id": "B",
           "type_id": 1
         }
       }
     ],
     "special_tokens": {
       "<s>": {
         "id": "<s>",
         "ids": [

tokenizer_config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "add_bos_token": true,
-  "add_eos_token": true,
   "add_prefix_space": null,
   "added_tokens_decoder": {
     "0": {

 {
   "add_bos_token": true,
+  "add_eos_token": false,
   "add_prefix_space": null,
   "added_tokens_decoder": {
     "0": {

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d6d9c3d4237312a4bba1a2b8b3197b72e01c812c239c98b08f19e97a2f942d4e
 size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:dfd7cdd1c35aab3464be0d42f2aa51cc21686ab26d0335c1bb004cbb44880bdd
 size 5368