neuralmagic
/

OpenHermes-2.5-Mistral-7B-pruned50-quant-ds

Text Generation

Model card Files Files and versions Community

mgoin commited on Nov 21, 2023

Commit

fed89b0

•

1 Parent(s): 816ad03

Create recipe.yaml

Files changed (1) hide show

recipe.yaml +31 -0

recipe.yaml ADDED Viewed

	@@ -0,0 +1,31 @@

+test_stage:
+  obcq_modifiers:
+    QuantizationModifier:
+      ignore:
+        # These operations don't make sense to quantize
+        - MistralRotaryEmbedding
+        - MistralRMSNorm
+        - SiLUActivation
+        # Skip quantizing the BMMs
+        # - QuantizableMatMul
+        # Skip quantizing the layers with the most sensitive activations
+        - model.layers.1.mlp.down_proj
+        - model.layers.31.mlp.down_proj
+        - model.layers.30.mlp.down_proj
+        - model.layers.30.mlp.gate_proj
+        - model.layers.30.mlp.up_proj
+      post_oneshot_calibration: true
+      scheme_overrides:
+        Embedding:
+          input_activations: null
+          weights:
+            num_bits: 8
+            symmetric: false
+    SparseGPTModifier:
+      sparsity: 0.5
+      block_size: 128
+      sequential_update: true
+      quantize: true
+      percdamp: 0.01
+      mask_structure: "0:0"
+      targets: ["re:model.layers.\\d*$"]