andreasmadsen nsarrazin HF staff commited on
Commit
447c0ca
1 Parent(s): bea3bcf

Make all prompt templates configurable (#400)

Browse files

* Make all prompt templates configurable

This PR enables all prompts to be configurable using handlebar
templates as described in #382.

For backward compatibility the old hardcoded templates are
reimplemented as the default templates. The old template parameters
such as `preprompt`, `userMessageToken`, `userMessageEndToken`,
`assistantMessageToken`, `assistantMessageEndToken` are now considered
legacy. They still work as they are exposed as variables to the default
template. However, new prompt configurations should not use these. And
it is recommended that the legacy support is eventually removed.

As an example, this is how the default chat prompt template is
implemented:

```
{{preprompt}}
{{#each messages}}
{{#ifUser}}{{@root.userMessageToken}}{{content}}{{@root.userMessageEndToken}}{{/ifUser}}
{{#ifAssistant}}{{@root.assistantMessageToken}}{{content}}{{@root.assistantMessageEndToken}}{{/ifAssistant}}
{{/each}}
{{assistantMessageToken}}
```

In addition, this PR fixes an issue where the `model` configuration was used
to generate the prompts in WebSearch. However, the `defaultModel` was used
to query. This caused issues when the `model` and `defaultModel` uses
different prompt configurations. This has now been changed to always use
the `defaultModel`.

Note, when developing this PR, it has been observed that the WebSearch
prompts can violate typical model assumptions. For example, a query may
be generated as:

```
Assistant: The following context ...
User: content ...
```

```
User: user message 1
User: user message 2
Assistant:
```

Models typically assume the prompts to be User -> Assistant -> User. For
best compatability with existing configurations, this issues was not
fixed. Instead, the old behavior is maintained with the default
templates. This is also the reason why `defaultModel` was chosen as the
WebSearch model instead of `model`. As `defaultModel` may allow the
WebSearch format, while `model` might not.

This behavior, as well as the overall aritecture of chat-ui,
necessitated that the template input maintained the format

```
messages: [{from: 'user' | 'assistant', content: string }]
```

For the template to be able to detect which source a message comes from
a `ifUser` and a `ifAssistant` handlebar block-helper was implemented.
The original proposed format in #382 was:

```
history: [{ user: string, assistant: string }]
```

However, using such format would require significant changes to the
project and would make it impossible to implement the existing
websearch templates.

Finally, there may be minor differences in how truncation is
implemented. As in some cases, truncation is now applied to the entire
prompt, rather than part of the prompt.

Fixes: #382

* Add Sagemaker support (#401)


* work on sagemaker support

* fix sagemaker integration

* remove unnecessary deps

* fix default endpoint

* remove unneeded deps, fixed types

* Use conditional validation for endpoints

This was needed because the discriminated union couldn't handle the legacy case where `host` is undefined.

* add note in readme about aws sagemaker

* lint

* -summery +summary

---------

Co-authored-by: Nathan Sarrazin <[email protected]>

README.md CHANGED
@@ -120,9 +120,8 @@ MODELS=`[
120
  "websiteUrl": "https://open-assistant.io",
121
  "userMessageToken": "<|prompter|>", # This does not need to be a token, can be any string
122
  "assistantMessageToken": "<|assistant|>", # This does not need to be a token, can be any string
123
- "messageEndToken": "<|endoftext|>", # This does not need to be a token, can be any string
124
- # "userMessageEndToken": "", # Applies only to user messages, messageEndToken has no effect if specified. Can be any string.
125
- # "assistantMessageEndToken": "", # Applies only to assistant messages, messageEndToken has no effect if specified. Can be any string.
126
  "preprompt": "Below are a series of dialogues between various people and an AI assistant. The AI tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble-but-knowledgeable. The assistant is happy to help with almost anything, and will do its best to understand exactly what is needed. It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer. That said, the assistant is practical and really does its best, and doesn't let caution get too much in the way of being useful.\n-----\n",
127
  "promptExamples": [
128
  {
@@ -152,7 +151,72 @@ MODELS=`[
152
 
153
  You can change things like the parameters, or customize the preprompt to better suit your needs. You can also add more models by adding more objects to the array, with different preprompts for example.
154
 
155
- ### Running your own models using a custom endpoint
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
  If you want to, instead of hitting models on the Hugging Face Inference API, you can run your own models locally.
158
 
 
120
  "websiteUrl": "https://open-assistant.io",
121
  "userMessageToken": "<|prompter|>", # This does not need to be a token, can be any string
122
  "assistantMessageToken": "<|assistant|>", # This does not need to be a token, can be any string
123
+ "userMessageEndToken": "<|endoftext|>", # Applies only to user messages. Can be any string.
124
+ "assistantMessageEndToken": "<|endoftext|>", # Applies only to assistant messages. Can be any string.
 
125
  "preprompt": "Below are a series of dialogues between various people and an AI assistant. The AI tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble-but-knowledgeable. The assistant is happy to help with almost anything, and will do its best to understand exactly what is needed. It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer. That said, the assistant is practical and really does its best, and doesn't let caution get too much in the way of being useful.\n-----\n",
126
  "promptExamples": [
127
  {
 
151
 
152
  You can change things like the parameters, or customize the preprompt to better suit your needs. You can also add more models by adding more objects to the array, with different preprompts for example.
153
 
154
+ #### Custom prompt templates:
155
+
156
+ By default the prompt is constructed using `userMessageToken`, `assistantMessageToken`, `userMessageEndToken`, `assistantMessageEndToken`, `preprompt` parameters and a series of default templates.
157
+
158
+ However, these templates can be modified by setting the `chatPromptTemplate`, `webSearchSummaryPromptTemplate`, and `webSearchQueryPromptTemplate` parameters. Note that if WebSearch is not enabled, only `chatPromptTemplate` needs to be set. The template language is https://handlebarsjs.com. The templates have access to the model's prompt parameters (`preprompt`, etc.). However, if the templates are specified it is recommended to inline the prompt parameters, as using the references (`{{preprompt}}`) is deprecated.
159
+
160
+ For example:
161
+
162
+ ```
163
+ <System>You are an AI, called ChatAI.</System>
164
+ {{#each messages}}
165
+ {{#ifUser}}<User>{{content}}</User>{{/ifUser}}
166
+ {{#ifAssistant}}<Assistant>{{content}}</Assistant>{{/ifAssistant}}
167
+ {{/each}}
168
+ <Assistant>
169
+ ```
170
+
171
+ **chatPromptTemplate**
172
+
173
+ When quering the model for a chat response, the `chatPromptTemplate` template is used. `messages` is an array of chat messages, it has the format `[{ content: string }, ...]`. To idenify if a message is a user message or an assistant message the `ifUser` and `ifAssistant` block helpers can be used.
174
+
175
+ The following is the default `chatPromptTemplate`, although newlines and indentiation have been added for readability.
176
+
177
+ ```
178
+ {{preprompt}}
179
+ {{#each messages}}
180
+ {{#ifUser}}{{@root.userMessageToken}}{{content}}{{@root.userMessageEndToken}}{{/ifUser}}
181
+ {{#ifAssistant}}{{@root.assistantMessageToken}}{{content}}{{@root.assistantMessageEndToken}}{{/ifAssistant}}
182
+ {{/each}}
183
+ {{assistantMessageToken}}
184
+ ```
185
+
186
+ **webSearchQueryPromptTemplate**
187
+
188
+ When performing a websearch, the search query is constructed using the `webSearchQueryPromptTemplate` template. It is recommended that that the prompt instructs the chat model to only return a few keywords.
189
+
190
+ The following is the default `webSearchQueryPromptTemplate`. Note that not all models supports consecutive user-messages which this template uses.
191
+
192
+ ```
193
+ {{userMessageToken}}
194
+ The following messages were written by a user, trying to answer a question.
195
+ {{userMessageEndToken}}
196
+ {{#each messages}}
197
+ {{#ifUser}}{{@root.userMessageToken}}{{content}}{{@root.userMessageEndToken}}{{/ifUser}}
198
+ {{/each}}
199
+ {{userMessageToken}}
200
+ What plain-text english sentence would you input into Google to answer the last question? Answer with a short (10 words max) simple sentence.
201
+ {{userMessageEndToken}}
202
+ {{assistantMessageToken}}Query:
203
+ ```
204
+
205
+ **webSearchSummaryPromptTemplate**
206
+
207
+ The search-engine response (`answer`) is summarized using the following prompt template. However, when `HF_ACCESS_TOKEN` is provided, a dedicated summary model is used instead. Additionally, the model's `query` response to `webSearchQueryPromptTemplate` is also available to this template.
208
+
209
+ The following is the default `webSearchSummaryPromptTemplate`. Note that not all models supports consecutive user-messages which this template uses.
210
+
211
+ ```
212
+ {{userMessageToken}}{{answer}}{{userMessageEndToken}}
213
+ {{userMessageToken}}
214
+ The text above should be summarized to best answer the query: {{query}}.
215
+ {{userMessageEndToken}}
216
+ {{assistantMessageToken}}Summary:
217
+ ```
218
+
219
+ #### Running your own models using a custom endpoint
220
 
221
  If you want to, instead of hitting models on the Hugging Face Inference API, you can run your own models locally.
222
 
package-lock.json CHANGED
@@ -14,6 +14,7 @@
14
  "aws4fetch": "^1.0.17",
15
  "date-fns": "^2.29.3",
16
  "dotenv": "^16.0.3",
 
17
  "highlight.js": "^11.7.0",
18
  "jsdom": "^22.0.0",
19
  "marked": "^4.3.0",
@@ -2671,6 +2672,26 @@
2671
  "integrity": "sha512-bzh50DW9kTPM00T8y4o8vQg89Di9oLJVLW/KaOGIXJWP/iqCN6WKYkbNOF04vFLJhwcpYUh9ydh/+5vpOqV4YQ==",
2672
  "dev": true
2673
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2674
  "node_modules/has": {
2675
  "version": "1.0.3",
2676
  "resolved": "https://registry.npmjs.org/has/-/has-1.0.3.tgz",
@@ -3317,7 +3338,6 @@
3317
  "version": "1.2.8",
3318
  "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
3319
  "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==",
3320
- "dev": true,
3321
  "funding": {
3322
  "url": "https://github.com/sponsors/ljharb"
3323
  }
@@ -3449,6 +3469,11 @@
3449
  "integrity": "sha512-Tj+HTDSJJKaZnfiuw+iaF9skdPpTo2GtEly5JHnWV/hfv2Qj/9RKsGISQtLh2ox3l5EAGw487hnBee0sIJ6v2g==",
3450
  "dev": true
3451
  },
 
 
 
 
 
3452
  "node_modules/node-int64": {
3453
  "version": "0.4.0",
3454
  "resolved": "https://registry.npmjs.org/node-int64/-/node-int64-0.4.0.tgz",
@@ -4438,6 +4463,14 @@
4438
  "sorcery": "bin/sorcery"
4439
  }
4440
  },
 
 
 
 
 
 
 
 
4441
  "node_modules/source-map-js": {
4442
  "version": "1.0.2",
4443
  "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.0.2.tgz",
@@ -5042,6 +5075,18 @@
5042
  "integrity": "sha512-TrY6DsjTQQgyS3E3dBaOXf0TpPD8u9FVrVYmKVegJuFw51n/YB9XPt+U6ydzFG5ZIN7+DIjPbNmXoBj9esYhgQ==",
5043
  "dev": true
5044
  },
 
 
 
 
 
 
 
 
 
 
 
 
5045
  "node_modules/undici": {
5046
  "version": "5.22.0",
5047
  "resolved": "https://registry.npmjs.org/undici/-/undici-5.22.0.tgz",
@@ -5441,6 +5486,11 @@
5441
  "node": ">=0.10.0"
5442
  }
5443
  },
 
 
 
 
 
5444
  "node_modules/wrappy": {
5445
  "version": "1.0.2",
5446
  "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
 
14
  "aws4fetch": "^1.0.17",
15
  "date-fns": "^2.29.3",
16
  "dotenv": "^16.0.3",
17
+ "handlebars": "^4.7.8",
18
  "highlight.js": "^11.7.0",
19
  "jsdom": "^22.0.0",
20
  "marked": "^4.3.0",
 
2672
  "integrity": "sha512-bzh50DW9kTPM00T8y4o8vQg89Di9oLJVLW/KaOGIXJWP/iqCN6WKYkbNOF04vFLJhwcpYUh9ydh/+5vpOqV4YQ==",
2673
  "dev": true
2674
  },
2675
+ "node_modules/handlebars": {
2676
+ "version": "4.7.8",
2677
+ "resolved": "https://registry.npmjs.org/handlebars/-/handlebars-4.7.8.tgz",
2678
+ "integrity": "sha512-vafaFqs8MZkRrSX7sFVUdo3ap/eNiLnb4IakshzvP56X5Nr1iGKAIqdX6tMlm6HcNRIkr6AxO5jFEoJzzpT8aQ==",
2679
+ "dependencies": {
2680
+ "minimist": "^1.2.5",
2681
+ "neo-async": "^2.6.2",
2682
+ "source-map": "^0.6.1",
2683
+ "wordwrap": "^1.0.0"
2684
+ },
2685
+ "bin": {
2686
+ "handlebars": "bin/handlebars"
2687
+ },
2688
+ "engines": {
2689
+ "node": ">=0.4.7"
2690
+ },
2691
+ "optionalDependencies": {
2692
+ "uglify-js": "^3.1.4"
2693
+ }
2694
+ },
2695
  "node_modules/has": {
2696
  "version": "1.0.3",
2697
  "resolved": "https://registry.npmjs.org/has/-/has-1.0.3.tgz",
 
3338
  "version": "1.2.8",
3339
  "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
3340
  "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==",
 
3341
  "funding": {
3342
  "url": "https://github.com/sponsors/ljharb"
3343
  }
 
3469
  "integrity": "sha512-Tj+HTDSJJKaZnfiuw+iaF9skdPpTo2GtEly5JHnWV/hfv2Qj/9RKsGISQtLh2ox3l5EAGw487hnBee0sIJ6v2g==",
3470
  "dev": true
3471
  },
3472
+ "node_modules/neo-async": {
3473
+ "version": "2.6.2",
3474
+ "resolved": "https://registry.npmjs.org/neo-async/-/neo-async-2.6.2.tgz",
3475
+ "integrity": "sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw=="
3476
+ },
3477
  "node_modules/node-int64": {
3478
  "version": "0.4.0",
3479
  "resolved": "https://registry.npmjs.org/node-int64/-/node-int64-0.4.0.tgz",
 
4463
  "sorcery": "bin/sorcery"
4464
  }
4465
  },
4466
+ "node_modules/source-map": {
4467
+ "version": "0.6.1",
4468
+ "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
4469
+ "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==",
4470
+ "engines": {
4471
+ "node": ">=0.10.0"
4472
+ }
4473
+ },
4474
  "node_modules/source-map-js": {
4475
  "version": "1.0.2",
4476
  "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.0.2.tgz",
 
5075
  "integrity": "sha512-TrY6DsjTQQgyS3E3dBaOXf0TpPD8u9FVrVYmKVegJuFw51n/YB9XPt+U6ydzFG5ZIN7+DIjPbNmXoBj9esYhgQ==",
5076
  "dev": true
5077
  },
5078
+ "node_modules/uglify-js": {
5079
+ "version": "3.17.4",
5080
+ "resolved": "https://registry.npmjs.org/uglify-js/-/uglify-js-3.17.4.tgz",
5081
+ "integrity": "sha512-T9q82TJI9e/C1TAxYvfb16xO120tMVFZrGA3f9/P4424DNu6ypK103y0GPFVa17yotwSyZW5iYXgjYHkGrJW/g==",
5082
+ "optional": true,
5083
+ "bin": {
5084
+ "uglifyjs": "bin/uglifyjs"
5085
+ },
5086
+ "engines": {
5087
+ "node": ">=0.8.0"
5088
+ }
5089
+ },
5090
  "node_modules/undici": {
5091
  "version": "5.22.0",
5092
  "resolved": "https://registry.npmjs.org/undici/-/undici-5.22.0.tgz",
 
5486
  "node": ">=0.10.0"
5487
  }
5488
  },
5489
+ "node_modules/wordwrap": {
5490
+ "version": "1.0.0",
5491
+ "resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-1.0.0.tgz",
5492
+ "integrity": "sha512-gvVzJFlPycKc5dZN4yPkP8w7Dc37BtP1yczEneOb4uq34pXZcvrtRTmWV8W+Ume+XCxKgbjM+nevkyFPMybd4Q=="
5493
+ },
5494
  "node_modules/wrappy": {
5495
  "version": "1.0.2",
5496
  "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
package.json CHANGED
@@ -46,6 +46,7 @@
46
  "aws4fetch": "^1.0.17",
47
  "date-fns": "^2.29.3",
48
  "dotenv": "^16.0.3",
 
49
  "highlight.js": "^11.7.0",
50
  "jsdom": "^22.0.0",
51
  "marked": "^4.3.0",
 
46
  "aws4fetch": "^1.0.17",
47
  "date-fns": "^2.29.3",
48
  "dotenv": "^16.0.3",
49
+ "handlebars": "^4.7.8",
50
  "highlight.js": "^11.7.0",
51
  "jsdom": "^22.0.0",
52
  "marked": "^4.3.0",
src/lib/buildPrompt.ts CHANGED
@@ -13,24 +13,6 @@ export async function buildPrompt(
13
  model: BackendModel,
14
  webSearchId?: string
15
  ): Promise<string> {
16
- const userEndToken = model.userMessageEndToken ?? model.messageEndToken;
17
- const assistantEndToken = model.assistantMessageEndToken ?? model.messageEndToken;
18
-
19
- const prompt =
20
- messages
21
- .map((m) =>
22
- m.from === "user"
23
- ? model.userMessageToken +
24
- m.content +
25
- (m.content.endsWith(userEndToken) ? "" : userEndToken)
26
- : model.assistantMessageToken +
27
- m.content +
28
- (m.content.endsWith(assistantEndToken) ? "" : assistantEndToken)
29
- )
30
- .join("") + model.assistantMessageToken;
31
-
32
- let webPrompt = "";
33
-
34
  if (webSearchId) {
35
  const webSearch = await collections.webSearches.findOne({
36
  _id: new ObjectId(webSearchId),
@@ -39,20 +21,22 @@ export async function buildPrompt(
39
  if (!webSearch) throw new Error("Web search not found");
40
 
41
  if (webSearch.summary) {
42
- webPrompt =
43
- model.assistantMessageToken +
44
- `The following context was found while searching the internet: ${webSearch.summary}` +
45
- model.assistantMessageEndToken;
 
 
 
46
  }
47
  }
48
- const finalPrompt =
49
- model.preprompt +
50
- webPrompt +
51
- prompt
 
52
  .split(" ")
53
  .slice(-(model.parameters?.truncate ?? 0))
54
- .join(" ");
55
-
56
- // Not super precise, but it's truncated in the model's backend anyway
57
- return finalPrompt;
58
  }
 
13
  model: BackendModel,
14
  webSearchId?: string
15
  ): Promise<string> {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  if (webSearchId) {
17
  const webSearch = await collections.webSearches.findOne({
18
  _id: new ObjectId(webSearchId),
 
21
  if (!webSearch) throw new Error("Web search not found");
22
 
23
  if (webSearch.summary) {
24
+ messages = [
25
+ {
26
+ from: "assistant",
27
+ content: `The following context was found while searching the internet: ${webSearch.summary}`,
28
+ },
29
+ ...messages,
30
+ ];
31
  }
32
  }
33
+
34
+ return (
35
+ model
36
+ .chatPromptRender({ messages })
37
+ // Not super precise, but it's truncated in the model's backend anyway
38
  .split(" ")
39
  .slice(-(model.parameters?.truncate ?? 0))
40
+ .join(" ")
41
+ );
 
 
42
  }
src/lib/server/models.ts CHANGED
@@ -1,4 +1,10 @@
1
  import { HF_ACCESS_TOKEN, MODELS, OLD_MODELS } from "$env/static/private";
 
 
 
 
 
 
2
  import { z } from "zod";
3
 
4
  const sagemakerEndpoint = z.object({
@@ -46,13 +52,46 @@ const modelsRaw = z
46
  modelUrl: z.string().url().optional(),
47
  datasetName: z.string().min(1).optional(),
48
  datasetUrl: z.string().url().optional(),
49
- userMessageToken: z.string(),
50
  userMessageEndToken: z.string().default(""),
51
- assistantMessageToken: z.string(),
52
  assistantMessageEndToken: z.string().default(""),
53
  messageEndToken: z.string().default(""),
54
  preprompt: z.string().default(""),
55
  prepromptUrl: z.string().url().optional(),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  promptExamples: z
57
  .array(
58
  z.object({
@@ -80,6 +119,15 @@ export const models = await Promise.all(
80
  ...m,
81
  userMessageEndToken: m?.userMessageEndToken || m?.messageEndToken,
82
  assistantMessageEndToken: m?.assistantMessageEndToken || m?.messageEndToken,
 
 
 
 
 
 
 
 
 
83
  id: m.id || m.name,
84
  displayName: m.displayName || m.name,
85
  preprompt: m.prepromptUrl ? await fetch(m.prepromptUrl).then((r) => r.text()) : m.preprompt,
 
1
  import { HF_ACCESS_TOKEN, MODELS, OLD_MODELS } from "$env/static/private";
2
+ import type {
3
+ ChatTemplateInput,
4
+ WebSearchQueryTemplateInput,
5
+ WebSearchSummaryTemplateInput,
6
+ } from "$lib/types/Template";
7
+ import { compileTemplate } from "$lib/utils/template";
8
  import { z } from "zod";
9
 
10
  const sagemakerEndpoint = z.object({
 
52
  modelUrl: z.string().url().optional(),
53
  datasetName: z.string().min(1).optional(),
54
  datasetUrl: z.string().url().optional(),
55
+ userMessageToken: z.string().default(""),
56
  userMessageEndToken: z.string().default(""),
57
+ assistantMessageToken: z.string().default(""),
58
  assistantMessageEndToken: z.string().default(""),
59
  messageEndToken: z.string().default(""),
60
  preprompt: z.string().default(""),
61
  prepromptUrl: z.string().url().optional(),
62
+ chatPromptTemplate: z
63
+ .string()
64
+ .default(
65
+ "{{preprompt}}" +
66
+ "{{#each messages}}" +
67
+ "{{#ifUser}}{{@root.userMessageToken}}{{content}}{{@root.userMessageEndToken}}{{/ifUser}}" +
68
+ "{{#ifAssistant}}{{@root.assistantMessageToken}}{{content}}{{@root.assistantMessageEndToken}}{{/ifAssistant}}" +
69
+ "{{/each}}" +
70
+ "{{assistantMessageToken}}"
71
+ ),
72
+ webSearchSummaryPromptTemplate: z
73
+ .string()
74
+ .default(
75
+ "{{userMessageToken}}{{answer}}{{userMessageEndToken}}" +
76
+ "{{userMessageToken}}" +
77
+ "The text above should be summarized to best answer the query: {{query}}." +
78
+ "{{userMessageEndToken}}" +
79
+ "{{assistantMessageToken}}Summary: "
80
+ ),
81
+ webSearchQueryPromptTemplate: z
82
+ .string()
83
+ .default(
84
+ "{{userMessageToken}}" +
85
+ "The following messages were written by a user, trying to answer a question." +
86
+ "{{userMessageEndToken}}" +
87
+ "{{#each messages}}" +
88
+ "{{#ifUser}}{{@root.userMessageToken}}{{content}}{{@root.userMessageEndToken}}{{/ifUser}}" +
89
+ "{{/each}}" +
90
+ "{{userMessageToken}}" +
91
+ "What plain-text english sentence would you input into Google to answer the last question? Answer with a short (10 words max) simple sentence." +
92
+ "{{userMessageEndToken}}" +
93
+ "{{assistantMessageToken}}Query: "
94
+ ),
95
  promptExamples: z
96
  .array(
97
  z.object({
 
119
  ...m,
120
  userMessageEndToken: m?.userMessageEndToken || m?.messageEndToken,
121
  assistantMessageEndToken: m?.assistantMessageEndToken || m?.messageEndToken,
122
+ chatPromptRender: compileTemplate<ChatTemplateInput>(m.chatPromptTemplate, m),
123
+ webSearchSummaryPromptRender: compileTemplate<WebSearchSummaryTemplateInput>(
124
+ m.webSearchSummaryPromptTemplate,
125
+ m
126
+ ),
127
+ webSearchQueryPromptRender: compileTemplate<WebSearchQueryTemplateInput>(
128
+ m.webSearchQueryPromptTemplate,
129
+ m
130
+ ),
131
  id: m.id || m.name,
132
  displayName: m.displayName || m.name,
133
  preprompt: m.prepromptUrl ? await fetch(m.prepromptUrl).then((r) => r.text()) : m.preprompt,
src/lib/server/websearch/generateQuery.ts CHANGED
@@ -1,21 +1,9 @@
1
  import type { Message } from "$lib/types/Message";
2
  import { generateFromDefaultEndpoint } from "../generateFromDefaultEndpoint";
3
- import type { BackendModel } from "../models";
4
-
5
- export async function generateQuery(messages: Message[], model: BackendModel) {
6
- const promptSearchQuery =
7
- model.userMessageToken +
8
- "The following messages were written by a user, trying to answer a question." +
9
- model.userMessageEndToken +
10
- messages
11
- .filter((message) => message.from === "user")
12
- .map((message) => model.userMessageToken + message.content + model.userMessageEndToken) +
13
- model.userMessageToken +
14
- "What plain-text english sentence would you input into Google to answer the last question? Answer with a short (10 words max) simple sentence." +
15
- model.userMessageEndToken +
16
- model.assistantMessageToken +
17
- "Query: ";
18
 
 
 
19
  const searchQuery = await generateFromDefaultEndpoint(promptSearchQuery).then((query) => {
20
  const arr = query.split(/\r?\n/);
21
  return arr[0].length > 0 ? arr[0] : arr[1];
 
1
  import type { Message } from "$lib/types/Message";
2
  import { generateFromDefaultEndpoint } from "../generateFromDefaultEndpoint";
3
+ import { defaultModel } from "../models";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ export async function generateQuery(messages: Message[]) {
6
+ const promptSearchQuery = defaultModel.webSearchQueryPromptRender({ messages });
7
  const searchQuery = await generateFromDefaultEndpoint(promptSearchQuery).then((query) => {
8
  const arr = query.split(/\r?\n/);
9
  return arr[0].length > 0 ? arr[0] : arr[1];
src/lib/server/websearch/summarizeWeb.ts CHANGED
@@ -1,7 +1,8 @@
1
  import { HF_ACCESS_TOKEN } from "$env/static/private";
2
  import { HfInference } from "@huggingface/inference";
3
- import { generateFromDefaultEndpoint } from "../generateFromDefaultEndpoint";
4
  import type { BackendModel } from "../models";
 
5
 
6
  export async function summarizeWeb(content: string, query: string, model: BackendModel) {
7
  // if HF_ACCESS_TOKEN is set, we use a HF dedicated endpoint for summarization
@@ -23,19 +24,13 @@ export async function summarizeWeb(content: string, query: string, model: Backen
23
  }
24
 
25
  // else we use the LLM to generate a summary
26
- const summaryPrompt =
27
- model.userMessageToken +
28
- content
29
  .split(" ")
30
  .slice(0, model.parameters?.truncate ?? 0)
31
- .join(" ") +
32
- model.userMessageEndToken +
33
- model.userMessageToken +
34
- `The text above should be summarized to best answer the query: ${query}.` +
35
- model.userMessageEndToken +
36
- model.assistantMessageToken +
37
- "Summary: ";
38
-
39
  const summary = await generateFromDefaultEndpoint(summaryPrompt).then((txt: string) =>
40
  txt.trim()
41
  );
 
1
  import { HF_ACCESS_TOKEN } from "$env/static/private";
2
  import { HfInference } from "@huggingface/inference";
3
+ import { defaultModel } from "$lib/server/models";
4
  import type { BackendModel } from "../models";
5
+ import { generateFromDefaultEndpoint } from "../generateFromDefaultEndpoint";
6
 
7
  export async function summarizeWeb(content: string, query: string, model: BackendModel) {
8
  // if HF_ACCESS_TOKEN is set, we use a HF dedicated endpoint for summarization
 
24
  }
25
 
26
  // else we use the LLM to generate a summary
27
+ const summaryPrompt = defaultModel.webSearchSummaryPromptRender({
28
+ answer: content
 
29
  .split(" ")
30
  .slice(0, model.parameters?.truncate ?? 0)
31
+ .join(" "),
32
+ query: query,
33
+ });
 
 
 
 
 
34
  const summary = await generateFromDefaultEndpoint(summaryPrompt).then((txt: string) =>
35
  txt.trim()
36
  );
src/lib/types/Template.ts ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { Message } from "./Message";
2
+
3
+ export type LegacyParamatersTemplateInput = {
4
+ preprompt: string;
5
+ userMessageToken: string;
6
+ userMessageEndToken: string;
7
+ assistantMessageToken: string;
8
+ assistantMessageEndToken: string;
9
+ };
10
+
11
+ export type ChatTemplateInput = {
12
+ messages: Pick<Message, "from" | "content">[];
13
+ };
14
+
15
+ export type WebSearchSummaryTemplateInput = {
16
+ answer: string;
17
+ query: string;
18
+ };
19
+
20
+ export type WebSearchQueryTemplateInput = {
21
+ messages: Pick<Message, "from" | "content">[];
22
+ };
src/lib/utils/template.ts ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { Message } from "$lib/types/Message";
2
+ import type { LegacyParamatersTemplateInput } from "$lib/types/Template";
3
+ import Handlebars from "handlebars";
4
+
5
+ Handlebars.registerHelper("ifUser", function (this: Pick<Message, "from" | "content">, options) {
6
+ if (this.from == "user") return options.fn(this);
7
+ });
8
+
9
+ Handlebars.registerHelper(
10
+ "ifAssistant",
11
+ function (this: Pick<Message, "from" | "content">, options) {
12
+ if (this.from == "assistant") return options.fn(this);
13
+ }
14
+ );
15
+
16
+ export function compileTemplate<T>(input: string, model: LegacyParamatersTemplateInput) {
17
+ const template = Handlebars.compile<T & LegacyParamatersTemplateInput>(input, {
18
+ knownHelpers: { ifUser: true, ifAssistant: true },
19
+ knownHelpersOnly: true,
20
+ noEscape: true,
21
+ strict: true,
22
+ preventIndent: true,
23
+ });
24
+
25
+ return function render(inputs: T, options?: RuntimeOptions) {
26
+ return template({ ...model, ...inputs }, options);
27
+ };
28
+ }
src/routes/conversation/[id]/web-search/+server.ts CHANGED
@@ -69,7 +69,7 @@ export async function GET({ params, locals, url }) {
69
 
70
  try {
71
  appendUpdate("Generating search query");
72
- webSearch.searchQuery = await generateQuery(messages, model);
73
 
74
  appendUpdate("Searching Google", [webSearch.searchQuery]);
75
  const results = await searchWeb(webSearch.searchQuery);
 
69
 
70
  try {
71
  appendUpdate("Generating search query");
72
+ webSearch.searchQuery = await generateQuery(messages);
73
 
74
  appendUpdate("Searching Google", [webSearch.searchQuery]);
75
  const results = await searchWeb(webSearch.searchQuery);