CISCai commited on
Commit
055e84c
1 Parent(s): 7a50058

Upload 13 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,15 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Mistral-Nemo-Instruct-2407.imatrix.dat filter=lfs diff=lfs merge=lfs -text
37
+ Mistral-Nemo-Instruct-2407.IQ1_M.gguf filter=lfs diff=lfs merge=lfs -text
38
+ Mistral-Nemo-Instruct-2407.IQ1_S.gguf filter=lfs diff=lfs merge=lfs -text
39
+ Mistral-Nemo-Instruct-2407.IQ2_M.gguf filter=lfs diff=lfs merge=lfs -text
40
+ Mistral-Nemo-Instruct-2407.IQ2_S.gguf filter=lfs diff=lfs merge=lfs -text
41
+ Mistral-Nemo-Instruct-2407.IQ2_XS.gguf filter=lfs diff=lfs merge=lfs -text
42
+ Mistral-Nemo-Instruct-2407.IQ2_XXS.gguf filter=lfs diff=lfs merge=lfs -text
43
+ Mistral-Nemo-Instruct-2407.IQ3_M.gguf filter=lfs diff=lfs merge=lfs -text
44
+ Mistral-Nemo-Instruct-2407.IQ3_S.gguf filter=lfs diff=lfs merge=lfs -text
45
+ Mistral-Nemo-Instruct-2407.IQ3_XS.gguf filter=lfs diff=lfs merge=lfs -text
46
+ Mistral-Nemo-Instruct-2407.IQ3_XXS.gguf filter=lfs diff=lfs merge=lfs -text
47
+ Mistral-Nemo-Instruct-2407.IQ4_XS.gguf filter=lfs diff=lfs merge=lfs -text
Mistral-Nemo-Instruct-2407.IQ1_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b940cc0ffd6e1a493afd429842fe99d967d2173b136eba35b0828a72f56f562a
3
+ size 3221627296
Mistral-Nemo-Instruct-2407.IQ1_S.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82dd0d71d3bae34a9776ec56b9b521bad2193b2f0e7d29002efed379db99d29a
3
+ size 2999214496
Mistral-Nemo-Instruct-2407.IQ2_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97f9afd43bc903b36d49781de3152e25ca6f91f848f01312647868250936b938
3
+ size 4435026336
Mistral-Nemo-Instruct-2407.IQ2_S.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:269e5f67b72c449603b58ac4ca7deb0b54ba688803d85542d02483f105770ebe
3
+ size 4138475936
Mistral-Nemo-Instruct-2407.IQ2_XS.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b4ff647558433d2d013201553c72c2e27d819d055435f75ff70fae3e3e723d2
3
+ size 3915080096
Mistral-Nemo-Instruct-2407.IQ2_XXS.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:332d9a99a4d1012c6adc650e696c6b3762dba47e05abcc21cde5925837bc2a30
3
+ size 3592315296
Mistral-Nemo-Instruct-2407.IQ3_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adda54bc47e014739d3f54700dc352bfe9d7dc939a752402e4b562b65110bb5b
3
+ size 5722235296
Mistral-Nemo-Instruct-2407.IQ3_S.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9888c00e27c193ac59e230a04cad89f9925f66f54253e4e5eff0d423390dea7
3
+ size 5562081696
Mistral-Nemo-Instruct-2407.IQ3_XS.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:150ff66d862134c5a54423f11230c40233e3dc22af8f04fd8d129c6184965c36
3
+ size 5306491296
Mistral-Nemo-Instruct-2407.IQ3_XXS.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f62b4cd119b6270dd92ec9effa9cefd97b910e1aa0dbdab6eaa4a05d30e91d20
3
+ size 4945387936
Mistral-Nemo-Instruct-2407.IQ4_XS.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75cd95b015d33455a76b71a2cdeedc80d2100569654d62d375e5ce0f5b0982f4
3
+ size 6742712736
Mistral-Nemo-Instruct-2407.imatrix.dat ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1379ab50b21551efc0c561e9ca8d864dcc95d42c10082d7f7e3ae4e991d79dd6
3
+ size 7054413
README.md CHANGED
@@ -1,3 +1,286 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: mistralai/Mistral-Nemo-Instruct-2407
3
+ language:
4
+ - en
5
+ pipeline_tag: text-generation
6
+ license: apache-2.0
7
+ model_creator: Mistral AI
8
+ model_name: Mistral-Nemo-Instruct-2407
9
+ model_type: mistral
10
+ quantized_by: CISC
11
+ ---
12
+
13
+ # Mistral-Nemo-Instruct-2407 - SOTA GGUF
14
+ - Model creator: [Mistral AI](https://huggingface.co/mistralai)
15
+ - Original model: [Mistral-Nemo-Instruct-2407](https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407)
16
+
17
+ <!-- description start -->
18
+ ## Description
19
+
20
+ This repo contains State Of The Art quantized GGUF format model files for [Mistral-Nemo-Instruct-2407](https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407).
21
+
22
+ Quantization was done with an importance matrix that was trained for ~1M tokens (256 batches of 4096 tokens) of [groups_merged.txt](https://github.com/ggerganov/llama.cpp/discussions/5263#discussioncomment-8395384) and [wiki.train.raw](https://raw.githubusercontent.com/pytorch/examples/main/word_language_model/data/wikitext-2/train.txt) concatenated.
23
+
24
+ The embedded chat template is the updated one with correct Tekken tokenization and function calling support via OpenAI-compatible `tools` parameter, see [example](#simple-llama-cpp-python-example-function-calling-code).
25
+
26
+ <!-- description end -->
27
+
28
+
29
+ <!-- prompt-template start -->
30
+ ## Prompt template: Mistral Tekken
31
+
32
+ ```
33
+ [AVAILABLE_TOOLS][{"name": "function_name", "description": "Description", "parameters": {...}}, ...][/AVAILABLE_TOOLS][INST]{prompt}[/INST]
34
+ ```
35
+
36
+ <!-- prompt-template end -->
37
+
38
+
39
+ <!-- compatibility_gguf start -->
40
+ ## Compatibility
41
+
42
+ These quantised GGUFv3 files are compatible with llama.cpp from July 22nd 2024 onwards, as of commit [50e0535](https://github.com/ggerganov/llama.cpp/commit/50e05353e88d50b644688caa91f5955e8bdb9eb9)
43
+
44
+ They are also compatible with many third party UIs and libraries provided they are built using a recent llama.cpp.
45
+
46
+ ## Explanation of quantisation methods
47
+
48
+ <details>
49
+ <summary>Click to see details</summary>
50
+
51
+ The new methods available are:
52
+
53
+ * GGML_TYPE_IQ1_S - 1-bit quantization in super-blocks with an importance matrix applied, effectively using 1.56 bits per weight (bpw)
54
+ * GGML_TYPE_IQ1_M - 1-bit quantization in super-blocks with an importance matrix applied, effectively using 1.75 bpw
55
+ * GGML_TYPE_IQ2_XXS - 2-bit quantization in super-blocks with an importance matrix applied, effectively using 2.06 bpw
56
+ * GGML_TYPE_IQ2_XS - 2-bit quantization in super-blocks with an importance matrix applied, effectively using 2.31 bpw
57
+ * GGML_TYPE_IQ2_S - 2-bit quantization in super-blocks with an importance matrix applied, effectively using 2.5 bpw
58
+ * GGML_TYPE_IQ2_M - 2-bit quantization in super-blocks with an importance matrix applied, effectively using 2.7 bpw
59
+ * GGML_TYPE_IQ3_XXS - 3-bit quantization in super-blocks with an importance matrix applied, effectively using 3.06 bpw
60
+ * GGML_TYPE_IQ3_XS - 3-bit quantization in super-blocks with an importance matrix applied, effectively using 3.3 bpw
61
+ * GGML_TYPE_IQ3_S - 3-bit quantization in super-blocks with an importance matrix applied, effectively using 3.44 bpw
62
+ * GGML_TYPE_IQ3_M - 3-bit quantization in super-blocks with an importance matrix applied, effectively using 3.66 bpw
63
+ * GGML_TYPE_IQ4_XS - 4-bit quantization in super-blocks with an importance matrix applied, effectively using 4.25 bpw
64
+ * GGML_TYPE_IQ4_NL - 4-bit non-linearly mapped quantization with an importance matrix applied, effectively using 4.5 bpw
65
+
66
+ Refer to the Provided Files table below to see what files use which methods, and how.
67
+ </details>
68
+ <!-- compatibility_gguf end -->
69
+
70
+ <!-- README_GGUF.md-provided-files start -->
71
+ ## Provided files
72
+
73
+ | Name | Quant method | Bits | Size | Max RAM required | Use case |
74
+ | ---- | ---- | ---- | ---- | ---- | ----- |
75
+ | [Mistral-Nemo-Instruct-2407.IQ1_S.gguf](https://huggingface.co/CISCai/Mistral-Nemo-Instruct-2407-SOTA-GGUF/blob/main/Mistral-Nemo-Instruct-2407.IQ1_S.gguf) | IQ1_S | 1 | 2.8 GB| 3.4 GB | smallest, significant quality loss |
76
+ | [Mistral-Nemo-Instruct-2407.IQ1_M.gguf](https://huggingface.co/CISCai/Mistral-Nemo-Instruct-2407-SOTA-GGUF/blob/main/Mistral-Nemo-Instruct-2407.IQ1_M.gguf) | IQ1_M | 1 | 3.0 GB| 3.6 GB | very small, significant quality loss |
77
+ | [Mistral-Nemo-Instruct-2407.IQ2_XXS.gguf](https://huggingface.co/CISCai/Mistral-Nemo-Instruct-2407-SOTA-GGUF/blob/main/Mistral-Nemo-Instruct-2407.IQ2_XXS.gguf) | IQ2_XXS | 2 | 3.3 GB| 3.9 GB | very small, high quality loss |
78
+ | [Mistral-Nemo-Instruct-2407.IQ2_XS.gguf](https://huggingface.co/CISCai/Mistral-Nemo-Instruct-2407-SOTA-GGUF/blob/main/Mistral-Nemo-Instruct-2407.IQ2_XS.gguf) | IQ2_XS | 2 | 3.6 GB| 4.2 GB | very small, high quality loss |
79
+ | [Mistral-Nemo-Instruct-2407.IQ2_S.gguf](https://huggingface.co/CISCai/Mistral-Nemo-Instruct-2407-SOTA-GGUF/blob/main/Mistral-Nemo-Instruct-2407.IQ2_S.gguf) | IQ2_S | 2 | 3.9 GB| 4.4 GB | small, substantial quality loss |
80
+ | [Mistral-Nemo-Instruct-2407.IQ2_M.gguf](https://huggingface.co/CISCai/Mistral-Nemo-Instruct-2407-SOTA-GGUF/blob/main/Mistral-Nemo-Instruct-2407.IQ2_M.gguf) | IQ2_M | 2 | 4.1 GB| 4.7 GB | small, greater quality loss |
81
+ | [Mistral-Nemo-Instruct-2407.IQ3_XXS.gguf](https://huggingface.co/CISCai/Mistral-Nemo-Instruct-2407-SOTA-GGUF/blob/main/Mistral-Nemo-Instruct-2407.IQ3_XXS.gguf) | IQ3_XXS | 3 | 4.6 GB| 5.2 GB | very small, high quality loss |
82
+ | [Mistral-Nemo-Instruct-2407.IQ3_XS.gguf](https://huggingface.co/CISCai/Mistral-Nemo-Instruct-2407-SOTA-GGUF/blob/main/Mistral-Nemo-Instruct-2407.IQ3_XS.gguf) | IQ3_XS | 3 | 4.9 GB| 5.5 GB | small, substantial quality loss |
83
+ | [Mistral-Nemo-Instruct-2407.IQ3_S.gguf](https://huggingface.co/CISCai/Mistral-Nemo-Instruct-2407-SOTA-GGUF/blob/main/Mistral-Nemo-Instruct-2407.IQ3_S.gguf) | IQ3_S | 3 | 5.2 GB| 5.8 GB | small, greater quality loss |
84
+ | [Mistral-Nemo-Instruct-2407.IQ3_M.gguf](https://huggingface.co/CISCai/Mistral-Nemo-Instruct-2407-SOTA-GGUF/blob/main/Mistral-Nemo-Instruct-2407.IQ3_M.gguf) | IQ3_M | 3 | 5.3 GB| 5.9 GB | medium, balanced quality - recommended |
85
+ | [Mistral-Nemo-Instruct-2407.IQ4_XS.gguf](https://huggingface.co/CISCai/Mistral-Nemo-Instruct-2407-SOTA-GGUF/blob/main/Mistral-Nemo-Instruct-2407.IQ4_XS.gguf) | IQ4_XS | 4 | 6.3 GB| 6.9 GB | small, substantial quality loss |
86
+
87
+ Generated importance matrix file: [Mistral-Nemo-Instruct-2407.imatrix.dat](https://huggingface.co/CISCai/Mistral-Nemo-Instruct-2407-SOTA-GGUF/blob/main/Mistral-Nemo-Instruct-2407.imatrix.dat)
88
+
89
+ **Note**: the above RAM figures assume no GPU offloading with 4K context. If layers are offloaded to the GPU, this will reduce RAM usage and use VRAM instead.
90
+
91
+ <!-- README_GGUF.md-provided-files end -->
92
+
93
+ <!-- README_GGUF.md-how-to-run start -->
94
+ ## Example `llama.cpp` command
95
+
96
+ Make sure you are using `llama.cpp` from commit [50e0535](https://github.com/ggerganov/llama.cpp/commit/50e05353e88d50b644688caa91f5955e8bdb9eb9) or later.
97
+
98
+ ```shell
99
+ ./llama-cli -ngl 41 -m Mistral-Nemo-Instruct-2407.IQ4_XS.gguf --color -c 131072 --temp 0.3 --repeat-penalty 1.1 -p "[AVAILABLE_TOOLS]{tools}[/AVAILABLE_TOOLS][INST]{prompt}[/INST]"
100
+ ```
101
+
102
+ This model is very temperature sensitive, keep it between 0.3 and 0.4 for best results! Also note the lack of spaces between special tokens and input in the prompt; this model is not using the regular Mistral chat template.
103
+
104
+ Change `-ngl 41` to the number of layers to offload to GPU. Remove it if you don't have GPU acceleration.
105
+
106
+ Change `-c 131072` to the desired sequence length.
107
+
108
+ If you are low on V/RAM try quantizing the K-cache with `-ctk q8_0` or even `-ctk q4_0` for big memory savings (depending on context size).
109
+ There is a similar option for V-cache (`-ctv`), however that is [not working yet](https://github.com/ggerganov/llama.cpp/issues/4425) unless you enable Flash Attention (`-fa`) too.
110
+
111
+ For other parameters and how to use them, please refer to [the llama.cpp documentation](https://github.com/ggerganov/llama.cpp/blob/master/examples/main/README.md)
112
+
113
+ ## How to run from Python code
114
+
115
+ You can use GGUF models from Python using the [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) module.
116
+
117
+ ### How to load this model in Python code, using llama-cpp-python
118
+
119
+ For full documentation, please see: [llama-cpp-python docs](https://llama-cpp-python.readthedocs.io/en/latest/).
120
+
121
+ #### First install the package
122
+
123
+ Run one of the following commands, according to your system:
124
+
125
+ ```shell
126
+ # Prebuilt wheel with basic CPU support
127
+ pip install llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
128
+ # Prebuilt wheel with NVidia CUDA acceleration
129
+ pip install llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121 (or cu122 etc.)
130
+ # Prebuilt wheel with Metal GPU acceleration
131
+ pip install llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/metal
132
+ # Build base version with no GPU acceleration
133
+ pip install llama-cpp-python
134
+ # With NVidia CUDA acceleration
135
+ CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python
136
+ # Or with OpenBLAS acceleration
137
+ CMAKE_ARGS="-DGGML_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install llama-cpp-python
138
+ # Or with AMD ROCm GPU acceleration (Linux only)
139
+ CMAKE_ARGS="-DGGML_HIPBLAS=on" pip install llama-cpp-python
140
+ # Or with Metal GPU acceleration for macOS systems only
141
+ CMAKE_ARGS="-DGGML_METAL=on" pip install llama-cpp-python
142
+ # Or with Vulkan acceleration
143
+ CMAKE_ARGS="-DGGML_VULKAN=on" pip install llama-cpp-python
144
+ # Or with SYCL acceleration
145
+ CMAKE_ARGS="-DGGML_SYCL=on -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx" pip install llama-cpp-python
146
+
147
+ # In windows, to set the variables CMAKE_ARGS in PowerShell, follow this format; eg for NVidia CUDA:
148
+ $env:CMAKE_ARGS = "-DGGML_CUDA=on"
149
+ pip install llama-cpp-python
150
+ ```
151
+
152
+ #### Simple llama-cpp-python example code
153
+
154
+ ```python
155
+ from llama_cpp import Llama
156
+
157
+ # Chat Completion API
158
+
159
+ llm = Llama(model_path="./Mistral-Nemo-Instruct-2407.IQ4_XS.gguf", n_gpu_layers=41, n_ctx=131072)
160
+ print(llm.create_chat_completion(
161
+ messages = [
162
+ {
163
+ "role": "user",
164
+ "content": "Pick a LeetCode challenge and solve it in Python."
165
+ }
166
+ ]
167
+ ))
168
+ ```
169
+
170
+ #### Simple llama-cpp-python example function calling code
171
+
172
+ ```python
173
+ from llama_cpp import Llama
174
+
175
+ # Chat Completion API
176
+
177
+ grammar = LlamaGrammar.from_json_schema(json.dumps({
178
+ "type": "array",
179
+ "items": {
180
+ "type": "object",
181
+ "required": [ "name", "arguments" ],
182
+ "properties": {
183
+ "name": {
184
+ "type": "string"
185
+ },
186
+ "arguments": {
187
+ "type": "object"
188
+ }
189
+ }
190
+ }
191
+ }))
192
+
193
+ llm = Llama(model_path="./Mistral-Nemo-Instruct-2407.IQ4_XS.gguf", n_gpu_layers=41, n_ctx=131072)
194
+ response = llm.create_chat_completion(
195
+ temperature = 0.0,
196
+ repeat_penalty = 1.1,
197
+ messages = [
198
+ {
199
+ "role": "user",
200
+ "content": "What's the weather like in Oslo and Stockholm?"
201
+ }
202
+ ],
203
+ tools=[{
204
+ "type": "function",
205
+ "function": {
206
+ "name": "get_current_weather",
207
+ "description": "Get the current weather in a given location",
208
+ "parameters": {
209
+ "type": "object",
210
+ "properties": {
211
+ "location": {
212
+ "type": "string",
213
+ "description": "The city and state, e.g. San Francisco, CA"
214
+ },
215
+ "unit": {
216
+ "type": "string",
217
+ "enum": [ "celsius", "fahrenheit" ]
218
+ }
219
+ },
220
+ "required": [ "location" ]
221
+ }
222
+ }
223
+ }],
224
+ grammar = grammar
225
+ )
226
+ print(json.loads(response["choices"][0]["text"]))
227
+
228
+ print(llm.create_chat_completion(
229
+ temperature = 0.0,
230
+ repeat_penalty = 1.1,
231
+ messages = [
232
+ {
233
+ "role": "user",
234
+ "content": "What's the weather like in Oslo?"
235
+ },
236
+ { # The tool_calls is from the response to the above with tool_choice active
237
+ "role": "assistant",
238
+ "content": None,
239
+ "tool_calls": [
240
+ {
241
+ "id": "call__0_get_current_weather_cmpl-..."[:9], # Make sure to truncate ID (chat template requires it)
242
+ "type": "function",
243
+ "function": {
244
+ "name": "get_current_weather",
245
+ "arguments": '{ "location": "Oslo, NO" ,"unit": "celsius"} '
246
+ }
247
+ }
248
+ ]
249
+ },
250
+ { # The tool_call_id is from tool_calls and content is the result from the function call you made
251
+ "role": "tool",
252
+ "content": "20",
253
+ "tool_call_id": "call__0_get_current_weather_cmpl-..."[:9] # Make sure to truncate ID (chat template requires it)
254
+ }
255
+ ],
256
+ tools=[{
257
+ "type": "function",
258
+ "function": {
259
+ "name": "get_current_weather",
260
+ "description": "Get the current weather in a given location",
261
+ "parameters": {
262
+ "type": "object",
263
+ "properties": {
264
+ "location": {
265
+ "type": "string",
266
+ "description": "The city and state, e.g. San Francisco, CA"
267
+ },
268
+ "unit": {
269
+ "type": "string",
270
+ "enum": [ "celsius", "fahrenheit" ]
271
+ }
272
+ },
273
+ "required": [ "location" ]
274
+ }
275
+ }
276
+ }],
277
+ #tool_choice={
278
+ # "type": "function",
279
+ # "function": {
280
+ # "name": "get_current_weather"
281
+ # }
282
+ #}
283
+ ))
284
+ ```
285
+
286
+ <!-- README_GGUF.md-how-to-run end -->