File size: 400 Bytes
8366b03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# generation hyper-parameters
max_len: 512
penalty_alpha: 0.6
top_k: 10
top_p: 0.7
random_prefix_len: 5
sample_num: 2
decoding_method: sampling
generate_len: 512

# lora hyper-parameters
lora_r: 32
lora_alpha: 32
lora_dropout: 0.1

# some train configuration, more can be found under dsconfig folder
train:
    seed: 0
    warmup_rate: 0.1
    epochs: 2
    max_length: 1024
    max_shard_size: 10GB