SourCoachSauers commited on
Commit
0115b42
1 Parent(s): 966e43b

color_descriptions

Browse files
README.md CHANGED
@@ -17,8 +17,6 @@ should probably proofread and complete it, then remove this comment. -->
17
  # results
18
 
19
  This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) on an unknown dataset.
20
- It achieves the following results on the evaluation set:
21
- - Loss: 0.5881
22
 
23
  ## Model description
24
 
@@ -37,48 +35,25 @@ More information needed
37
  ### Training hyperparameters
38
 
39
  The following hyperparameters were used during training:
40
- - learning_rate: 0.0002
41
  - train_batch_size: 16
42
- - eval_batch_size: 4
43
  - seed: 42
44
- - gradient_accumulation_steps: 4
45
- - total_train_batch_size: 64
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: cosine
48
  - lr_scheduler_warmup_ratio: 0.03
49
- - num_epochs: 2
50
 
51
  ### Training results
52
 
53
- | Training Loss | Epoch | Step | Validation Loss |
54
- |:-------------:|:------:|:----:|:---------------:|
55
- | 0.6511 | 0.0948 | 50 | 0.7913 |
56
- | 0.5479 | 0.1896 | 100 | 0.7001 |
57
- | 0.5125 | 0.2844 | 150 | 0.6768 |
58
- | 0.4974 | 0.3791 | 200 | 0.6564 |
59
- | 0.4947 | 0.4739 | 250 | 0.6490 |
60
- | 0.4802 | 0.5687 | 300 | 0.6383 |
61
- | 0.4762 | 0.6635 | 350 | 0.6289 |
62
- | 0.4678 | 0.7583 | 400 | 0.6233 |
63
- | 0.4742 | 0.8531 | 450 | 0.6157 |
64
- | 0.4633 | 0.9479 | 500 | 0.6127 |
65
- | 0.6096 | 1.0427 | 550 | 0.6027 |
66
- | 0.6137 | 1.1374 | 600 | 0.5986 |
67
- | 0.6163 | 1.2322 | 650 | 0.5963 |
68
- | 0.6078 | 1.3270 | 700 | 0.5943 |
69
- | 0.6019 | 1.4218 | 750 | 0.5921 |
70
- | 0.615 | 1.5166 | 800 | 0.5906 |
71
- | 0.6061 | 1.6114 | 850 | 0.5897 |
72
- | 0.6106 | 1.7062 | 900 | 0.5890 |
73
- | 0.6027 | 1.8009 | 950 | 0.5886 |
74
- | 0.6094 | 1.8957 | 1000 | 0.5883 |
75
- | 0.5261 | 1.9905 | 1050 | 0.5881 |
76
 
77
 
78
  ### Framework versions
79
 
80
  - PEFT 0.11.1
81
  - Transformers 4.42.4
82
- - Pytorch 2.3.1+cu121
83
  - Datasets 2.20.0
84
  - Tokenizers 0.19.1
 
17
  # results
18
 
19
  This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) on an unknown dataset.
 
 
20
 
21
  ## Model description
22
 
 
35
  ### Training hyperparameters
36
 
37
  The following hyperparameters were used during training:
38
+ - learning_rate: 5e-05
39
  - train_batch_size: 16
40
+ - eval_batch_size: 16
41
  - seed: 42
42
+ - gradient_accumulation_steps: 2
43
+ - total_train_batch_size: 32
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: cosine
46
  - lr_scheduler_warmup_ratio: 0.03
47
+ - num_epochs: 4
48
 
49
  ### Training results
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
 
53
  ### Framework versions
54
 
55
  - PEFT 0.11.1
56
  - Transformers 4.42.4
57
+ - Pytorch 2.2.1+cu121
58
  - Datasets 2.20.0
59
  - Tokenizers 0.19.1
adapter_config.json CHANGED
@@ -20,11 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "gate_proj",
24
- "k_proj",
25
  "o_proj",
 
 
26
  "q_proj",
27
- "v_proj"
 
 
28
  ],
29
  "task_type": "CAUSAL_LM",
30
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
23
  "o_proj",
24
+ "up_proj",
25
+ "v_proj",
26
  "q_proj",
27
+ "k_proj",
28
+ "gate_proj",
29
+ "down_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a2f716d20045f5a32c6b9a608f618466fe254493721f18005d2f1dd889029eb
3
- size 57701064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:645abf6023751672c8614dddd0d1ee30fa2675b3814886885fb9520d2face926
3
+ size 100966336
runs/Jul19_00-59-20_ip-172-18-179-228.us-west-2.compute.internal/events.out.tfevents.1721350777.ip-172-18-179-228.us-west-2.compute.internal.3457.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cc54f55514bc1f151f2e4020230daea977ea40ac20ece589abae1eb2c12039d
3
+ size 5692
runs/Jul19_01-03-17_ip-172-18-179-228.us-west-2.compute.internal/events.out.tfevents.1721351010.ip-172-18-179-228.us-west-2.compute.internal.3457.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef7d5908131af25807a4cf5bfff2bdec65469af6b8060c697869f4e861e5e05d
3
+ size 5692
runs/Jul19_01-04-11_ip-172-18-179-228.us-west-2.compute.internal/events.out.tfevents.1721351090.ip-172-18-179-228.us-west-2.compute.internal.3457.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9090eaa40c6aba1a3a90f5a7c055d4d70dec79c9e19b0e00d9826fc8486e85a
3
+ size 4184
runs/Jul19_01-11-33_ip-172-18-179-228.us-west-2.compute.internal/events.out.tfevents.1721351504.ip-172-18-179-228.us-west-2.compute.internal.4758.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:006f9639b20c8a6cac56e074c518fa863febd289b687887488a9236d1eea0c08
3
+ size 13801
runs/Jul19_02-02-18_ip-172-18-179-228.us-west-2.compute.internal/events.out.tfevents.1721354552.ip-172-18-179-228.us-west-2.compute.internal.4758.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d26e819e9ed3bf2849bb33bf63a34187b22adce70676c0c27caecb09bbe33404
3
+ size 209850
runs/Jul19_04-32-22_ip-172-18-179-228.us-west-2.compute.internal/events.out.tfevents.1721363551.ip-172-18-179-228.us-west-2.compute.internal.4758.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5df9aa8e123395c1f7631afb3a48a5cda27c8aa4730ef75d2545d364d2d3586
3
+ size 8584
runs/Jul19_04-33-51_ip-172-18-179-228.us-west-2.compute.internal/events.out.tfevents.1721363639.ip-172-18-179-228.us-west-2.compute.internal.4758.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c480f55a4e64a0ddf47cab4b8e4fdb6c76ab60679366f8d846330f5de461ef5
3
+ size 895952
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
@@ -62,12 +67,6 @@
62
  "id": "A",
63
  "type_id": 0
64
  }
65
- },
66
- {
67
- "SpecialToken": {
68
- "id": "</s>",
69
- "type_id": 0
70
- }
71
  }
72
  ],
73
  "pair": [
@@ -83,12 +82,6 @@
83
  "type_id": 0
84
  }
85
  },
86
- {
87
- "SpecialToken": {
88
- "id": "</s>",
89
- "type_id": 0
90
- }
91
- },
92
  {
93
  "SpecialToken": {
94
  "id": "<s>",
@@ -100,24 +93,9 @@
100
  "id": "B",
101
  "type_id": 1
102
  }
103
- },
104
- {
105
- "SpecialToken": {
106
- "id": "</s>",
107
- "type_id": 1
108
- }
109
  }
110
  ],
111
  "special_tokens": {
112
- "</s>": {
113
- "id": "</s>",
114
- "ids": [
115
- 2
116
- ],
117
- "tokens": [
118
- "</s>"
119
- ]
120
- },
121
  "<s>": {
122
  "id": "<s>",
123
  "ids": [
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 4096,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
67
  "id": "A",
68
  "type_id": 0
69
  }
 
 
 
 
 
 
70
  }
71
  ],
72
  "pair": [
 
82
  "type_id": 0
83
  }
84
  },
 
 
 
 
 
 
85
  {
86
  "SpecialToken": {
87
  "id": "<s>",
 
93
  "id": "B",
94
  "type_id": 1
95
  }
 
 
 
 
 
 
96
  }
97
  ],
98
  "special_tokens": {
 
 
 
 
 
 
 
 
 
99
  "<s>": {
100
  "id": "<s>",
101
  "ids": [
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "add_bos_token": true,
3
- "add_eos_token": true,
4
  "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
 
1
  {
2
  "add_bos_token": true,
3
+ "add_eos_token": false,
4
  "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6d9c3d4237312a4bba1a2b8b3197b72e01c812c239c98b08f19e97a2f942d4e
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfd7cdd1c35aab3464be0d42f2aa51cc21686ab26d0335c1bb004cbb44880bdd
3
  size 5368