Use princeton-nlp/Sheared-LLaMA-1.3B as a test model
Browse files
inference-cache-config/llama-variants.json
CHANGED
@@ -103,7 +103,7 @@
|
|
103 |
"auto_cast_type": "fp16"
|
104 |
}
|
105 |
],
|
106 |
-
"
|
107 |
{
|
108 |
"batch_size": 1,
|
109 |
"sequence_length": 4096,
|
|
|
103 |
"auto_cast_type": "fp16"
|
104 |
}
|
105 |
],
|
106 |
+
"princeton-nlp/Sheared-LLaMA-1.3B": [
|
107 |
{
|
108 |
"batch_size": 1,
|
109 |
"sequence_length": 4096,
|