Added Llama-70b batch_size 4 to inference cache
Browse files
inference-cache-config/llama.json
CHANGED
@@ -105,6 +105,12 @@
|
|
105 |
"sequence_length": 4096,
|
106 |
"num_cores": 24,
|
107 |
"auto_cast_type": "fp16"
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
}
|
109 |
]
|
110 |
}
|
|
|
105 |
"sequence_length": 4096,
|
106 |
"num_cores": 24,
|
107 |
"auto_cast_type": "fp16"
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"batch_size": 4,
|
111 |
+
"sequence_length": 4096,
|
112 |
+
"num_cores": 24,
|
113 |
+
"auto_cast_type": "fp16"
|
114 |
}
|
115 |
]
|
116 |
}
|