{ "accelerator_kwargs": {}, "adap_kl_ctrl": true, "backward_batch_size": 64, "batch_size": 64, "cliprange": 0.2, "cliprange_value": 0.2, "compare_steps": 1, "dataset_num_proc": null, "early_stopping": false, "exp_name": "stego_trainer", "forward_batch_size": null, "gamma": 1, "global_backward_batch_size": 64, "global_batch_size": 64, "gradient_accumulation_steps": 4, "gradient_checkpointing": false, "horizon": 10000, "init_kl_coef": 0.05, "is_encoder_decoder": false, "is_peft_model": true, "kl_penalty": "kl", "lam": 0.95, "learning_rate": 2e-05, "log_with": "wandb", "max_grad_norm": null, "mini_batch_size": 16, "model_name": "unsloth/gemma-2-2b-it", "optimize_cuda_cache": true, "optimize_device_cache": false, "ppo_epochs": 4, "project_kwargs": {}, "push_to_hub_if_best_kwargs": {}, "query_dataset": "imdb", "ratio_threshold": 10.0, "remove_unused_columns": true, "reward_model": "sentiment-analysis:lvwerra/distilbert-imdb", "score_clip": null, "seed": 0, "steps": 20000, "target": 12.0, "target_kl": 1, "task_name": null, "tracker_kwargs": { "wandb": { "name": "cv_gemma-2-2b-it_to_distilbert-base-uncased_EBS64_Joan", "notes": "Dataset: cv\n Same Prompt: \n Payload Prefixes: ['Movie Review: This movie was really amazing!', 'Movie Review: This movie was really terrible!']\n Payload Template: Movie Review: This movie was really {payload}!\n Separate Enc/Dec Data: True\n\n Encoder: gemma-2-2b-it (LR: 2e-05)\n Decoder: distilbert-base-uncased (LR: 0.0001)\n Train Loop: v2_dylan\n\n Effective Batch Sizes:\n - Encoder: 64\n - Decoder: 512\n\n Training Iterations:\n - Encoder updates: 100\n - Decoder updates: 400\n - Update Encoder First: False\n\n Temperatures:\n - Decoder Training: 1.0\n - Encoder Training: 1.0\n - Evaluation: 1.0\n\n Encoder Parameters:\n - KL Coefficient: 0.05\n - LoRA: True\n - Quantization: False\n - Output Length: {'min': 42, 'max': 51}\n\n Decoder Parameters:\n - New Classification Head: True\n - Use Probs Reward: False\n - Weight Decay: 0.01\n - Update Parameters: {'head': True, 'body': True}\n\n Training Configuration:\n - Update Encoder: True\n - Update Decoder: True\n - Paraphrase: False\n - Leak Password: False\n - WandB Logging: True\n - Eval Every N: 50\n - Number of Epochs: 100000\n\n Debug:\n - Override Dec Batch: False", "tags": [ "cv", "gemma-2-2b-it", "distilbert-base-uncased", "v2_dylan", "enc_lr_2e-05", "dec_lr_0.0001", "enc_eff_bs_64", "dec_eff_bs_512", "enc_updates_100", "dec_updates_400", "LoRA", "Full_Precision", "same_prompt_ ", "Separate_Enc_Dec_Data", "Update_Enc", "Update_Dec", "No_Paraphrase", "No_Leak", "1-bit" ] } }, "tracker_project_name": "trl", "use_score_norm": false, "use_score_scaling": false, "vf_coef": 0.1, "whiten_rewards": false, "world_size": 1 }