huggingartists

Files changed (12) hide show

README.md CHANGED Viewed

@@ -45,15 +45,15 @@ from datasets import load_dataset
 dataset = load_dataset("huggingartists/macan")
 ```
-[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/1v1lws1p/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on MACAN's lyrics.
-Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/uechokog) for full transparency and reproducibility.
-At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/uechokog/artifacts) is logged and versioned.
 ## How to use

 dataset = load_dataset("huggingartists/macan")
 ```
+[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/3u3vx3xp/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on MACAN's lyrics.
+Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/23krf2tu) for full transparency and reproducibility.
+At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/23krf2tu/artifacts) is logged and versioned.
 ## How to use

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "gpt2",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"
@@ -18,7 +18,9 @@
   "n_inner": null,
   "n_layer": 12,
   "n_positions": 1024,
   "resid_pdrop": 0.1,
   "scale_attn_weights": true,
   "summary_activation": null,
   "summary_first_dropout": 0.1,
@@ -35,7 +37,7 @@
     }
   },
   "torch_dtype": "float32",
-  "transformers_version": "4.10.3",
   "use_cache": true,
   "vocab_size": 50257
 }

 {
+  "_name_or_path": "macan",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"
   "n_inner": null,
   "n_layer": 12,
   "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
   "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
   "scale_attn_weights": true,
   "summary_activation": null,
   "summary_first_dropout": 0.1,
     }
   },
   "torch_dtype": "float32",
+  "transformers_version": "4.19.1",
   "use_cache": true,
   "vocab_size": 50257
 }

evaluation.txt CHANGED Viewed

	@@ -1 +1 @@
1	- {"eval_loss": 2.~~0351359844207764~~, "eval_runtime": 1.~~0195~~, "eval_samples_per_second": 20.~~599~~, "eval_steps_per_second": 2.~~943~~, "epoch": 1.0}


1	+ {"eval_loss": 1.8112770318984985, "eval_runtime": 0.6104, "eval_samples_per_second": 40.955, "eval_steps_per_second": 6.553, "epoch": 2.0}

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0c71fa449f5689c4860d3485432c97159962c1176f23636370b865d78972c9df
 size 497764120

 version https://git-lfs.github.com/spec/v1
+oid sha256:b6dbfd30e9730d8a9ee64f760f018f8b7e2d42f7822e2b99f1863d7380258b02
 size 497764120

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f57b23ddb49ec545ee34d11cebdf33b7f6d4ca37509e4ef5ce655e15b4786d2f
 size 995603825

 version https://git-lfs.github.com/spec/v1
+oid sha256:cc2dafdc61421fe81a200a75c3c83a4ddcfcb782e87208c3348a1a594e263a9c
 size 995603825

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1a029cea040e781fbbd945bfb92cd764f3d18c30b53a56b8e281baae614302f2
-size 510403817

 version https://git-lfs.github.com/spec/v1
+oid sha256:92cfb9e9f25733d6497b96cda823ae6090035db17d2a03e59e3f27e23974cf45
+size 510396521

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6a5affe1a1a2685dbefbc3e06428328c688eae14c36ea3654255b2fb83c461c7
 size 14567

 version https://git-lfs.github.com/spec/v1
+oid sha256:87b136e91fabd5b40576460a0e184203ca6d02f557e23281294302b69be7a272
 size 14567

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:12c3121dfe95d6c791df88caed756108693d6e32895d921cef6e4d1a2b3e5d79
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d23fd6d48231790858029b93d2886261aee312a553a87a2a69f0af34403e0a3
 size 623

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"unk_token": "<\|endoftext\|>", "bos_token": "<\|endoftext\|>", "eos_token": "<\|endoftext\|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "~~gpt2~~", "tokenizer_class": "GPT2Tokenizer"}


1	+ {"unk_token": "<\|endoftext\|>", "bos_token": "<\|endoftext\|>", "eos_token": "<\|endoftext\|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/macan", "tokenizer_class": "GPT2Tokenizer"}

trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-  "best_metric": 2.0351359844207764,
-  "best_model_checkpoint": "output/macan/checkpoint-19",
-  "epoch": 1.0,
-  "global_step": 19,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -32,11 +32,43 @@
       "eval_samples_per_second": 22.88,
       "eval_steps_per_second": 3.269,
       "step": 19
     }
   ],
-  "max_steps": 19,
-  "num_train_epochs": 1,
-  "total_flos": 18943672320000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 1.8112770318984985,
+  "best_model_checkpoint": "output/macan/checkpoint-36",
+  "epoch": 2.0,
+  "global_step": 36,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 22.88,
       "eval_steps_per_second": 3.269,
       "step": 19
+    },
+    {
+      "epoch": 1.11,
+      "learning_rate": 4.137086214086682e-06,
+      "loss": 2.0597,
+      "step": 20
+    },
+    {
+      "epoch": 1.39,
+      "learning_rate": 4.513741816785908e-05,
+      "loss": 2.0438,
+      "step": 25
+    },
+    {
+      "epoch": 1.67,
+      "learning_rate": 0.00010290000000000001,
+      "loss": 2.1141,
+      "step": 30
+    },
+    {
+      "epoch": 1.94,
+      "learning_rate": 0.00013615781185663748,
+      "loss": 2.0135,
+      "step": 35
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 1.8112770318984985,
+      "eval_runtime": 0.5407,
+      "eval_samples_per_second": 46.236,
+      "eval_steps_per_second": 7.398,
+      "step": 36
     }
   ],
+  "max_steps": 36,
+  "num_train_epochs": 2,
+  "total_flos": 36319592448000.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c1476a3981df93fa35765d235e04053c6b6cb3a02d793df384134d310c609f66
-size 2671

 version https://git-lfs.github.com/spec/v1
+oid sha256:028d7959463ebfdbee6d242cb865ee2b2573ad2c606f3f863ccd74088e8108f3
+size 3247