End of training

Browse files

Files changed (7) hide show

README.md +121 -2
adapter_model.safetensors +1 -1
all_results.json +15 -0
eval_results.json +10 -0
pt_lora_model/adapter_config.json +5 -5
train_results.json +8 -0
trainer_state.json +0 -0

README.md CHANGED Viewed

@@ -4,9 +4,23 @@ library_name: peft
 tags:
 - generated_from_trainer
 base_model: meta-llama/Meta-Llama-3-8B
 model-index:
 - name: cyllama3
-  results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -14,7 +28,10 @@ should probably proofread and complete it, then remove this comment. -->
 # cyllama3
-This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on an unknown dataset.
 ## Model description
@@ -47,6 +64,108 @@ The following hyperparameters were used during training:
 - lr_scheduler_warmup_ratio: 0.05
 - num_epochs: 1.0
 ### Framework versions
 - PEFT 0.10.1.dev0

 tags:
 - generated_from_trainer
 base_model: meta-llama/Meta-Llama-3-8B
+datasets:
+- khangmacon/llmtrain
+metrics:
+- accuracy
 model-index:
 - name: cyllama3
+  results:
+  - task:
+      type: text-generation
+      name: Causal Language Modeling
+    dataset:
+      name: khangmacon/llmtrain
+      type: khangmacon/llmtrain
+    metrics:
+    - type: accuracy
+      value: 0.5590444975644216
+      name: Accuracy
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 # cyllama3
+This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on the khangmacon/llmtrain dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.9930
+- Accuracy: 0.5590
 ## Model description
 - lr_scheduler_warmup_ratio: 0.05
 - num_epochs: 1.0
+### Training results
+| Training Loss | Epoch | Step  | Validation Loss | Accuracy |
+|:-------------:|:-----:|:-----:|:---------------:|:--------:|
+| 2.2432        | 0.01  | 500   | 2.1239          | 0.5358   |
+| 2.209         | 0.02  | 1000  | 2.0922          | 0.5404   |
+| 2.1988        | 0.03  | 1500  | 2.0742          | 0.5436   |
+| 2.1877        | 0.04  | 2000  | 2.0615          | 0.5463   |
+| 2.1743        | 0.05  | 2500  | 2.0514          | 0.5479   |
+| 2.1885        | 0.06  | 3000  | 2.0427          | 0.5495   |
+| 2.1883        | 0.07  | 3500  | 2.0355          | 0.5509   |
+| 2.1954        | 0.08  | 4000  | 2.0298          | 0.5519   |
+| 2.1597        | 0.09  | 4500  | 2.0254          | 0.5526   |
+| 2.1763        | 0.1   | 5000  | 2.0222          | 0.5532   |
+| 2.1413        | 0.11  | 5500  | 2.0195          | 0.5541   |
+| 2.1812        | 0.12  | 6000  | 2.0169          | 0.5545   |
+| 2.1526        | 0.14  | 6500  | 2.0148          | 0.5547   |
+| 2.155         | 0.15  | 7000  | 2.0131          | 0.5554   |
+| 2.1594        | 0.16  | 7500  | 2.0110          | 0.5558   |
+| 2.1681        | 0.17  | 8000  | 2.0097          | 0.5559   |
+| 2.1572        | 0.18  | 8500  | 2.0083          | 0.5562   |
+| 2.0943        | 0.19  | 9000  | 2.0074          | 0.5566   |
+| 2.1421        | 0.2   | 9500  | 2.0063          | 0.5566   |
+| 2.1196        | 0.21  | 10000 | 2.0049          | 0.5568   |
+| 2.1634        | 0.22  | 10500 | 2.0042          | 0.5568   |
+| 2.1361        | 0.23  | 11000 | 2.0035          | 0.5573   |
+| 2.1614        | 0.24  | 11500 | 2.0027          | 0.5572   |
+| 2.1205        | 0.25  | 12000 | 2.0021          | 0.5576   |
+| 2.0984        | 0.26  | 12500 | 2.0011          | 0.5576   |
+| 2.1226        | 0.27  | 13000 | 2.0006          | 0.5575   |
+| 2.1054        | 0.28  | 13500 | 2.0001          | 0.5577   |
+| 2.1297        | 0.29  | 14000 | 1.9997          | 0.5578   |
+| 2.1233        | 0.3   | 14500 | 1.9988          | 0.5581   |
+| 2.1348        | 0.31  | 15000 | 1.9984          | 0.5581   |
+| 2.1494        | 0.32  | 15500 | 1.9980          | 0.5582   |
+| 2.0827        | 0.33  | 16000 | 1.9976          | 0.5584   |
+| 2.0991        | 0.34  | 16500 | 1.9975          | 0.5582   |
+| 2.1108        | 0.35  | 17000 | 1.9972          | 0.5582   |
+| 2.1209        | 0.36  | 17500 | 1.9968          | 0.5583   |
+| 2.1012        | 0.37  | 18000 | 1.9963          | 0.5584   |
+| 2.1155        | 0.38  | 18500 | 1.9959          | 0.5585   |
+| 2.1493        | 0.4   | 19000 | 1.9956          | 0.5585   |
+| 2.1219        | 0.41  | 19500 | 1.9953          | 0.5587   |
+| 2.1584        | 0.42  | 20000 | 1.9952          | 0.5588   |
+| 2.1167        | 0.43  | 20500 | 1.9950          | 0.5587   |
+| 2.1507        | 0.44  | 21000 | 1.9948          | 0.5586   |
+| 2.1043        | 0.45  | 21500 | 1.9946          | 0.5587   |
+| 2.0864        | 0.46  | 22000 | 1.9945          | 0.5587   |
+| 2.1074        | 0.47  | 22500 | 1.9943          | 0.5587   |
+| 2.0858        | 0.48  | 23000 | 1.9942          | 0.5590   |
+| 2.1178        | 0.49  | 23500 | 1.9941          | 0.5588   |
+| 2.1148        | 0.5   | 24000 | 1.9940          | 0.5588   |
+| 2.1165        | 0.51  | 24500 | 1.9939          | 0.5588   |
+| 2.1012        | 0.52  | 25000 | 1.9938          | 0.5590   |
+| 2.1573        | 0.53  | 25500 | 1.9936          | 0.5590   |
+| 2.1674        | 0.54  | 26000 | 1.9936          | 0.5589   |
+| 2.1184        | 0.55  | 26500 | 1.9935          | 0.5590   |
+| 2.1424        | 0.56  | 27000 | 1.9935          | 0.5590   |
+| 2.1437        | 0.57  | 27500 | 1.9935          | 0.5590   |
+| 2.1244        | 0.58  | 28000 | 1.9933          | 0.5591   |
+| 2.0767        | 0.59  | 28500 | 1.9933          | 0.5589   |
+| 2.1182        | 0.6   | 29000 | 1.9934          | 0.5591   |
+| 2.1277        | 0.61  | 29500 | 1.9933          | 0.5591   |
+| 2.1407        | 0.62  | 30000 | 1.9932          | 0.5591   |
+| 2.1222        | 0.63  | 30500 | 1.9932          | 0.5591   |
+| 2.1146        | 0.64  | 31000 | 1.9931          | 0.5591   |
+| 2.1441        | 0.65  | 31500 | 1.9932          | 0.5591   |
+| 2.1224        | 0.67  | 32000 | 1.9931          | 0.5590   |
+| 2.0878        | 0.68  | 32500 | 1.9932          | 0.5591   |
+| 2.1172        | 0.69  | 33000 | 1.9932          | 0.5590   |
+| 2.1166        | 0.7   | 33500 | 1.9931          | 0.5592   |
+| 2.1054        | 0.71  | 34000 | 1.9931          | 0.5591   |
+| 2.0972        | 0.72  | 34500 | 1.9931          | 0.5590   |
+| 2.1228        | 0.73  | 35000 | 1.9931          | 0.5590   |
+| 2.1231        | 0.74  | 35500 | 1.9931          | 0.5592   |
+| 2.0974        | 0.75  | 36000 | 1.9931          | 0.5590   |
+| 2.1025        | 0.76  | 36500 | 1.9931          | 0.5591   |
+| 2.1217        | 0.77  | 37000 | 1.9931          | 0.5590   |
+| 2.1227        | 0.78  | 37500 | 1.9930          | 0.5591   |
+| 2.1272        | 0.79  | 38000 | 1.9931          | 0.5592   |
+| 2.117         | 0.8   | 38500 | 1.9931          | 0.5591   |
+| 2.1325        | 0.81  | 39000 | 1.9931          | 0.5591   |
+| 2.1046        | 0.82  | 39500 | 1.9930          | 0.5591   |
+| 2.1096        | 0.83  | 40000 | 1.9930          | 0.5591   |
+| 2.1149        | 0.84  | 40500 | 1.9931          | 0.5591   |
+| 2.122         | 0.85  | 41000 | 1.9931          | 0.5591   |
+| 2.1137        | 0.86  | 41500 | 1.9931          | 0.5591   |
+| 2.0983        | 0.87  | 42000 | 1.9930          | 0.5590   |
+| 2.1109        | 0.88  | 42500 | 1.9931          | 0.5591   |
+| 2.172         | 0.89  | 43000 | 1.9930          | 0.5590   |
+| 2.0882        | 0.9   | 43500 | 1.9930          | 0.5591   |
+| 2.0646        | 0.91  | 44000 | 1.9930          | 0.5591   |
+| 2.1223        | 0.93  | 44500 | 1.9930          | 0.5591   |
+| 2.1342        | 0.94  | 45000 | 1.9930          | 0.5591   |
+| 2.0991        | 0.95  | 45500 | 1.9930          | 0.5590   |
+| 2.1431        | 0.96  | 46000 | 1.9930          | 0.5592   |
+| 2.0965        | 0.97  | 46500 | 1.9931          | 0.5590   |
+| 2.1377        | 0.98  | 47000 | 1.9931          | 0.5592   |
+| 2.1118        | 0.99  | 47500 | 1.9931          | 0.5592   |
+| 2.089         | 1.0   | 48000 | 1.9930          | 0.5590   |
 ### Framework versions
 - PEFT 0.10.1.dev0

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f8d2cde08e33ca2f842ae37ca875d9beef71974ca2f019b674992367e56fdf73
 size 2436952864

 version https://git-lfs.github.com/spec/v1
+oid sha256:94493ffb396ca39e82a3e8069f1d5c4062cb624e6a7b0b6924dbc5816f4aab9f
 size 2436952864

all_results.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "epoch": 1.0,
+    "eval_accuracy": 0.5590444975644216,
+    "eval_loss": 1.9930245876312256,
+    "eval_runtime": 16.3839,
+    "eval_samples": 595,
+    "eval_samples_per_second": 36.316,
+    "eval_steps_per_second": 1.16,
+    "perplexity": 7.337693730732541,
+    "train_loss": 2.1293136361829954,
+    "train_runtime": 442446.2037,
+    "train_samples": 6156843,
+    "train_samples_per_second": 13.915,
+    "train_steps_per_second": 0.109
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "epoch": 1.0,
+    "eval_accuracy": 0.5590444975644216,
+    "eval_loss": 1.9930245876312256,
+    "eval_runtime": 16.3839,
+    "eval_samples": 595,
+    "eval_samples_per_second": 36.316,
+    "eval_steps_per_second": 1.16,
+    "perplexity": 7.337693730732541
+}

pt_lora_model/adapter_config.json CHANGED Viewed

@@ -23,13 +23,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "up_proj",
-    "gate_proj",
-    "q_proj",
-    "v_proj",
     "o_proj",
     "down_proj",
-    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "k_proj",
     "o_proj",
+    "gate_proj",
     "down_proj",
+    "up_proj",
+    "q_proj",
+    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "train_loss": 2.1293136361829954,
+    "train_runtime": 442446.2037,
+    "train_samples": 6156843,
+    "train_samples_per_second": 13.915,
+    "train_steps_per_second": 0.109
+}

trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff