mgoin commited on
Commit
55ffa2e
1 Parent(s): e8570ed

Updated compression_config to quantization_config

Browse files
Files changed (1) hide show
  1. config.json +42 -42
config.json CHANGED
@@ -10,46 +10,6 @@
10
  "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM"
11
  },
12
  "bos_token_id": 1,
13
- "compression_config": {
14
- "config_groups": {
15
- "group_0": {
16
- "input_activations": {
17
- "block_structure": null,
18
- "dynamic": false,
19
- "group_size": null,
20
- "num_bits": 8,
21
- "observer": "minmax",
22
- "observer_kwargs": {},
23
- "strategy": "tensor",
24
- "symmetric": true,
25
- "type": "float"
26
- },
27
- "output_activations": null,
28
- "targets": [
29
- "Linear"
30
- ],
31
- "weights": {
32
- "block_structure": null,
33
- "dynamic": false,
34
- "group_size": null,
35
- "num_bits": 8,
36
- "observer": "minmax",
37
- "observer_kwargs": {},
38
- "strategy": "tensor",
39
- "symmetric": true,
40
- "type": "float"
41
- }
42
- }
43
- },
44
- "format": "float-quantized",
45
- "global_compression_ratio": 1.2469135767182644,
46
- "ignore": [
47
- "lm_head"
48
- ],
49
- "kv_cache_scheme": null,
50
- "quant_method": "compressed-tensors",
51
- "quantization_status": "frozen"
52
- },
53
  "embd_pdrop": 0.0,
54
  "eos_token_id": 32000,
55
  "hidden_act": "silu",
@@ -206,5 +166,45 @@
206
  "torch_dtype": "float16",
207
  "transformers_version": "4.44.0",
208
  "use_cache": true,
209
- "vocab_size": 32064
210
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM"
11
  },
12
  "bos_token_id": 1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  "embd_pdrop": 0.0,
14
  "eos_token_id": 32000,
15
  "hidden_act": "silu",
 
166
  "torch_dtype": "float16",
167
  "transformers_version": "4.44.0",
168
  "use_cache": true,
169
+ "vocab_size": 32064,
170
+ "quantization_config": {
171
+ "config_groups": {
172
+ "group_0": {
173
+ "input_activations": {
174
+ "block_structure": null,
175
+ "dynamic": false,
176
+ "group_size": null,
177
+ "num_bits": 8,
178
+ "observer": "minmax",
179
+ "observer_kwargs": {},
180
+ "strategy": "tensor",
181
+ "symmetric": true,
182
+ "type": "float"
183
+ },
184
+ "output_activations": null,
185
+ "targets": [
186
+ "Linear"
187
+ ],
188
+ "weights": {
189
+ "block_structure": null,
190
+ "dynamic": false,
191
+ "group_size": null,
192
+ "num_bits": 8,
193
+ "observer": "minmax",
194
+ "observer_kwargs": {},
195
+ "strategy": "tensor",
196
+ "symmetric": true,
197
+ "type": "float"
198
+ }
199
+ }
200
+ },
201
+ "format": "float-quantized",
202
+ "global_compression_ratio": 1.2469135767182644,
203
+ "ignore": [
204
+ "lm_head"
205
+ ],
206
+ "kv_cache_scheme": null,
207
+ "quant_method": "compressed-tensors",
208
+ "quantization_status": "frozen"
209
+ }
210
+ }