CISCai
/

Mistral-7B-Instruct-v0.3-SOTA-GGUF

Text Generation

Inference Endpoints

Model card Files Files and versions Community

CISCai commited on May 26

Commit

aa9ae26

•

1 Parent(s): ca40449

Add grammar example

Files changed (1) hide show

README.md +49 -1

README.md CHANGED Viewed

@@ -178,7 +178,55 @@ from llama_cpp import Llama
 # Chat Completion API
 llm = Llama(model_path="./Mistral-7B-Instruct-v0.3.IQ4_XS.gguf", n_gpu_layers=33, n_ctx=32768, temperature=0.0, repeat_penalty=1.1)
 print(llm.create_chat_completion(
       messages = [
         {
@@ -201,7 +249,7 @@ print(llm.create_chat_completion(
         },
         { # The tool_call_id is from tool_calls and content is the result from the function call you made
           "role": "tool",
-          "content": 20,
           "tool_call_id": "call__0_get_current_weather_cmpl-..."
         }
       ],

 # Chat Completion API
+grammar = LlamaGrammar.from_json_schema(json.dumps({
+    "type": "array",
+    "items": {
+        "type": "object",
+        "required": [ "name", "arguments" ],
+        "properties": {
+            "name": {
+                "type": "string"
+            },
+            "arguments": {
+                "type": "object"
+            }
+        }
+    }
+}))
 llm = Llama(model_path="./Mistral-7B-Instruct-v0.3.IQ4_XS.gguf", n_gpu_layers=33, n_ctx=32768, temperature=0.0, repeat_penalty=1.1)
+response = llm.create_chat_completion(
+      messages = [
+        {
+          "role": "user",
+          "content": "What's the weather like in Oslo and Stockholm?"
+        }
+      ],
+      tools=[{
+        "type": "function",
+        "function": {
+          "name": "get_current_weather",
+          "description": "Get the current weather in a given location",
+          "parameters": {
+            "type": "object",
+            "properties": {
+              "location": {
+                "type": "string",
+                "description": "The city and state, e.g. San Francisco, CA"
+              },
+              "unit": {
+                "type": "string",
+                "enum": [ "celsius", "fahrenheit" ]
+              }
+            },
+            "required": [ "location" ]
+          }
+        }
+      }],
+      grammar = grammar
+)
+print(json.loads(response["choices"][0]["text"]))
 print(llm.create_chat_completion(
       messages = [
         {
         },
         { # The tool_call_id is from tool_calls and content is the result from the function call you made
           "role": "tool",
+          "content": "20",
           "tool_call_id": "call__0_get_current_weather_cmpl-..."
         }
       ],