models: | |
- model: NousResearch/Meta-Llama-3-8B | |
# Base model providing a general foundation without specific parameters | |
- model: NousResearch/Meta-Llama-3-8B-Instruct | |
parameters: | |
density: 0.58 | |
weight: 0.25 # Slightly reduced to allow more contribution from other specialized models | |
- model: cognitivecomputations/dolphin-2.9-llama3-8b | |
parameters: | |
density: 0.52 | |
weight: 0.15 # Reduced to prevent dominance and keep the focus balanced | |
- model: Locutusque/Llama-3-Orca-1.0-8B | |
parameters: | |
density: 0.52 | |
weight: 0.15 # Balanced to complement the base model without overshadowing | |
- model: abacusai/Llama-3-Smaug-8B | |
parameters: | |
density: 0.52 | |
weight: 0.15 # Adjusted to maintain a supportive role in the combined model | |
- model: beomi/Llama-3-Open-Ko-8B-Instruct-preview | |
parameters: | |
density: 0.53 | |
weight: 0.2 # Kept relatively higher to emphasize the Korean language performance | |
merge_method: dare_ties | |
base_model: NousResearch/Meta-Llama-3-8B | |
parameters: | |
int8_mask: true | |
dtype: bfloat16 | |