kejian commited on
Commit
13cbb73
1 Parent(s): 42bfabe

update model card README.md

Browse files
Files changed (1) hide show
  1. README.md +8 -6
README.md CHANGED
@@ -36,9 +36,11 @@ More information needed
36
 
37
  The following hyperparameters were used during training:
38
  - learning_rate: 0.0008
39
- - train_batch_size: 64
40
- - eval_batch_size: 32
41
  - seed: 42
 
 
42
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
  - lr_scheduler_type: linear
44
  - lr_scheduler_warmup_ratio: 0.01
@@ -60,7 +62,7 @@ The following hyperparameters were used during training:
60
  'threshold': 0},
61
  'datasets': ['kejian/codeparrot-train-more-filter-3.3b-cleaned'],
62
  'is_split_by_sentences': True},
63
- 'generation': {'batch_size': 64,
64
  'metrics_configs': [{}, {'n': 1}, {}],
65
  'scenario_configs': [{'display_as_html': True,
66
  'generate_kwargs': {'bad_words_ids': [[32769]],
@@ -73,7 +75,7 @@ The following hyperparameters were used during training:
73
  'top_p': 0.9},
74
  'name': 'unconditional',
75
  'num_hits_threshold': 0,
76
- 'num_samples': 4096,
77
  'prefix': '<|aligned|>',
78
  'use_prompt_for_scoring': False},
79
  {'display_as_html': True,
@@ -87,7 +89,7 @@ The following hyperparameters were used during training:
87
  'top_p': 0.9},
88
  'name': 'functions',
89
  'num_hits_threshold': 0,
90
- 'num_samples': 4096,
91
  'prefix': '<|aligned|>',
92
  'prompt_before_control': True,
93
  'prompts_path': 'resources/functions_csnet.jsonl',
@@ -127,4 +129,4 @@ The following hyperparameters were used during training:
127
  'weight_decay': 0.1}}
128
 
129
  # Wandb URL:
130
- https://wandb.ai/kejian/uncategorized/runs/h0khl32b
 
36
 
37
  The following hyperparameters were used during training:
38
  - learning_rate: 0.0008
39
+ - train_batch_size: 32
40
+ - eval_batch_size: 16
41
  - seed: 42
42
+ - gradient_accumulation_steps: 2
43
+ - total_train_batch_size: 64
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
  - lr_scheduler_warmup_ratio: 0.01
 
62
  'threshold': 0},
63
  'datasets': ['kejian/codeparrot-train-more-filter-3.3b-cleaned'],
64
  'is_split_by_sentences': True},
65
+ 'generation': {'batch_size': 128,
66
  'metrics_configs': [{}, {'n': 1}, {}],
67
  'scenario_configs': [{'display_as_html': True,
68
  'generate_kwargs': {'bad_words_ids': [[32769]],
 
75
  'top_p': 0.9},
76
  'name': 'unconditional',
77
  'num_hits_threshold': 0,
78
+ 'num_samples': 2048,
79
  'prefix': '<|aligned|>',
80
  'use_prompt_for_scoring': False},
81
  {'display_as_html': True,
 
89
  'top_p': 0.9},
90
  'name': 'functions',
91
  'num_hits_threshold': 0,
92
+ 'num_samples': 2048,
93
  'prefix': '<|aligned|>',
94
  'prompt_before_control': True,
95
  'prompts_path': 'resources/functions_csnet.jsonl',
 
129
  'weight_decay': 0.1}}
130
 
131
  # Wandb URL:
132
+ https://wandb.ai/kejian/uncategorized/runs/zpigcpaa