|
{ |
|
"T": 0.1, |
|
"architectures": [ |
|
"BertModel" |
|
], |
|
"attention_probs_dropout_prob": 0.1, |
|
"augmentation": "none", |
|
"beta1": 0.9, |
|
"beta2": 0.98, |
|
"bi_encoder": false, |
|
"chunk_length": 256, |
|
"classifier_dropout": null, |
|
"continue_training": false, |
|
"contrastive_mode": "moco", |
|
"dropout": 0.1, |
|
"eps": 1e-06, |
|
"eval_data": [ |
|
"minilm_denoised_T0_32_datasets_fixed_instruction_unfollowing_dev.jsonl" |
|
], |
|
"eval_datasets": [], |
|
"eval_datasets_dir": "./", |
|
"eval_freq": 2000, |
|
"eval_normalize_text": false, |
|
"freeze_ctx_encoder": false, |
|
"global_rank": 0, |
|
"hard_order": false, |
|
"hidden_act": "gelu", |
|
"hidden_dropout_prob": 0.1, |
|
"hidden_size": 768, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 3072, |
|
"kd": false, |
|
"label_smoothing": 0.0, |
|
"layer_norm_eps": 1e-12, |
|
"loading_mode": "split", |
|
"local_rank": 0, |
|
"log_freq": 100, |
|
"loss_type": "kl", |
|
"lower_case": false, |
|
"lr": 1e-05, |
|
"lr_min_ratio": 0.0, |
|
"main_addr": "learnfair7603", |
|
"main_port": 15972, |
|
"max_position_embeddings": 512, |
|
"maxload": null, |
|
"moco_train_mode_encoder_k": false, |
|
"model_path": "/checkpoint/akariasai/contriever/contriever_wiki2020_ft_msmarco/checkpoint/step-10000/", |
|
"model_type": "bert", |
|
"momentum": 0.999, |
|
"n_context": 50, |
|
"negative_ctxs": 5, |
|
"negative_hard_min_idx": 0, |
|
"negative_hard_ratio": 0.1, |
|
"norm_doc": false, |
|
"norm_query": false, |
|
"num_attention_heads": 12, |
|
"num_hidden_layers": 12, |
|
"num_workers": 5, |
|
"optim": "adamw", |
|
"output_dir": "/checkpoint/akariasai/contriever/instruction_unfollowing_full_data_from_new_checkpoints_5_0.1", |
|
"pad_token_id": 0, |
|
"per_gpu_batch_size": 16, |
|
"per_gpu_eval_batch_size": 16, |
|
"pooling": "average", |
|
"position_embedding_type": "absolute", |
|
"prob_augmentation": 0.0, |
|
"projection_size": 768, |
|
"queue_size": 65536, |
|
"random_init": false, |
|
"random_sort": false, |
|
"ratio_max": 0.5, |
|
"ratio_min": 0.1, |
|
"retriever_model_id": "bert-base-uncased", |
|
"rho": 0.05, |
|
"sampling_coefficient": 0.0, |
|
"save_freq": 2000, |
|
"scheduler": "linear", |
|
"score_function": "dot", |
|
"seed": 0, |
|
"temperature": 0.05, |
|
"torch_dtype": "float32", |
|
"total_steps": 20000, |
|
"train_data": [ |
|
"minilm_denoised_T0_32_datasets_fixed_instruction_unfollowing_train.jsonl" |
|
], |
|
"transformers_version": "4.36.2", |
|
"type_vocab_size": 2, |
|
"use_cache": true, |
|
"vocab_size": 30522, |
|
"warmup_steps": 1000, |
|
"weight_decay": 0.01, |
|
"world_size": 64 |
|
} |
|
|