chat-ui

Build error

App Files Files Community

nsarrazin HF staff commited on Sep 20, 2023

Commit

e943a05

•

1 Parent(s): aa07e29

Backend refactor for streaming endpoints (#444)

Browse files

* wip: complete refactor of streaming backend

* working refactoring

* fix missing first token & perf regression in output quality

* lint

* Fix websearch loading from db

* fix loading

* fix invalidate

* remove logs

* fix SSR error

* typo: paragraphs

* fixed save on abort

* lint

* lint

* remove debug log in console

* lint for real

Files changed (20) hide show

src/lib/buildPrompt.ts +18 -33
src/lib/components/OpenWebSearchResults.svelte +5 -5
src/lib/components/chat/ChatMessage.svelte +15 -10
src/lib/components/chat/ChatMessages.svelte +5 -17
src/lib/components/chat/ChatWindow.svelte +2 -4
src/lib/server/getInit.ts +58 -0
src/lib/server/websearch/parseWeb.ts +1 -1
src/lib/server/websearch/runWebSearch.ts +112 -0
src/lib/types/Message.ts +5 -1
src/lib/types/MessageUpdate.ts +39 -0
src/lib/types/WebSearch.ts +7 -33
src/routes/conversation/[id]/+page.server.ts +0 -19
src/routes/conversation/[id]/+page.svelte +78 -129
src/routes/conversation/[id]/+server.ts +181 -173
src/routes/conversation/[id]/share/+server.ts +1 -1
src/routes/conversation/[id]/summarize/+server.ts +1 -1
src/routes/conversation/[id]/web-search/+server.ts +0 -165
src/routes/r/[id]/+page.server.ts +0 -20
src/routes/r/[id]/+page.svelte +0 -1
src/routes/search/[id]/+server.ts +1 -1

src/lib/buildPrompt.ts CHANGED Viewed

@@ -1,9 +1,7 @@
 import type { BackendModel } from "./server/models";
 import type { Message } from "./types/Message";
-import { collections } from "$lib/server/database";
-import { ObjectId } from "mongodb";
-import { authCondition } from "./server/auth";
 import { format } from "date-fns";
 /**
  * Convert [{user: "assistant", content: "hi"}, {user: "user", content: "hello"}] to:
  *
@@ -14,44 +12,31 @@ interface buildPromptOptions {
 	messages: Pick<Message, "from" | "content">[];
 	model: BackendModel;
 	locals?: App.Locals;
-	webSearchId?: string;
 	preprompt?: string;
 }
 export async function buildPrompt({
 	messages,
 	model,
-	locals,
-	webSearchId,
 	preprompt,
 }: buildPromptOptions): Promise<string> {
-	if (webSearchId) {
-		const webSearch = await collections.webSearches.findOne({
-			_id: new ObjectId(webSearchId),
-		});
-		if (!webSearch) throw new Error("Web search not found");
-		if (!locals) throw new Error("User not authenticated");
-		const conversation = await collections.conversations.findOne({
-			_id: webSearch.convId,
-			...authCondition(locals),
-		});
-		if (!conversation) throw new Error("Conversation not found");
-		if (webSearch.context) {
-			const messagesWithoutLastUsrMsg = messages.slice(0, -1);
-			const lastUserMsg = messages.slice(-1)[0];
-			const currentDate = format(new Date(), "MMMM d, yyyy");
-			messages = [
-				...messagesWithoutLastUsrMsg,
-				{
-					from: "user",
-					content: `Please answer my question "${lastUserMsg.content}" using the supplied context below (paragrpahs from various websites). For the context, today is ${currentDate}: \n=====================\n${webSearch.context}\n=====================\nSo my question is "${lastUserMsg.content}"`,
-				},
-			];
-		}
 	}
 	return (

 import type { BackendModel } from "./server/models";
 import type { Message } from "./types/Message";
 import { format } from "date-fns";
+import type { WebSearch } from "./types/WebSearch";
 /**
  * Convert [{user: "assistant", content: "hi"}, {user: "user", content: "hello"}] to:
  *
 	messages: Pick<Message, "from" | "content">[];
 	model: BackendModel;
 	locals?: App.Locals;
+	webSearch?: WebSearch;
 	preprompt?: string;
 }
 export async function buildPrompt({
 	messages,
 	model,
+	webSearch,
 	preprompt,
 }: buildPromptOptions): Promise<string> {
+	if (webSearch && webSearch.context) {
+		const messagesWithoutLastUsrMsg = messages.slice(0, -1);
+		const lastUserMsg = messages.slice(-1)[0];
+		const currentDate = format(new Date(), "MMMM d, yyyy");
+		messages = [
+			...messagesWithoutLastUsrMsg,
+			{
+				from: "user",
+				content: `Please answer my question "${lastUserMsg.content}" using the supplied context below (paragraphs from various websites). For the context, today is ${currentDate}:
+				=====================
+				${webSearch.context}
+				=====================
+				So my question is "${lastUserMsg.content}"`,
+			},
+		];
 	}
 	return (

src/lib/components/OpenWebSearchResults.svelte CHANGED Viewed

@@ -1,5 +1,5 @@
 <script lang="ts">
-	import type { WebSearchMessage } from "$lib/types/WebSearch";
 	import CarbonCaretRight from "~icons/carbon/caret-right";
 	import CarbonCheckmark from "~icons/carbon/checkmark-filled";
@@ -9,11 +9,11 @@
 	export let loading = false;
 	export let classNames = "";
-	export let webSearchMessages: WebSearchMessage[] = [];
 	let detailsOpen: boolean;
 	let error: boolean;
-	$: error = webSearchMessages[webSearchMessages.length - 2]?.type === "error";
 </script>
 <details
@@ -46,7 +46,7 @@
 		{:else}
 			<ol>
 				{#each webSearchMessages as message}
-					{#if message.type === "update"}
 						<li class="group border-l pb-6 last:!border-transparent last:pb-0 dark:border-gray-800">
 							<div class="flex items-start">
 								<div
@@ -64,7 +64,7 @@
 								</p>
 							{/if}
 						</li>
-					{:else if message.type === "error"}
 						<li class="group border-l pb-6 last:!border-transparent last:pb-0 dark:border-gray-800">
 							<div class="flex items-start">
 								<CarbonError

 <script lang="ts">
+	import type { WebSearchUpdate } from "$lib/types/MessageUpdate";
 	import CarbonCaretRight from "~icons/carbon/caret-right";
 	import CarbonCheckmark from "~icons/carbon/checkmark-filled";
 	export let loading = false;
 	export let classNames = "";
+	export let webSearchMessages: WebSearchUpdate[] = [];
 	let detailsOpen: boolean;
 	let error: boolean;
+	$: error = webSearchMessages[webSearchMessages.length - 1]?.messageType === "error";
 </script>
 <details
 		{:else}
 			<ol>
 				{#each webSearchMessages as message}
+					{#if message.messageType === "update"}
 						<li class="group border-l pb-6 last:!border-transparent last:pb-0 dark:border-gray-800">
 							<div class="flex items-start">
 								<div
 								</p>
 							{/if}
 						</li>
+					{:else if message.messageType === "error"}
 						<li class="group border-l pb-6 last:!border-transparent last:pb-0 dark:border-gray-800">
 							<div class="flex items-start">
 								<CarbonError

src/lib/components/chat/ChatMessage.svelte CHANGED Viewed

@@ -14,9 +14,9 @@
 	import CarbonThumbsDown from "~icons/carbon/thumbs-down";
 	import { PUBLIC_SEP_TOKEN } from "$lib/constants/publicSepToken";
 	import type { Model } from "$lib/types/Model";
-	import type { WebSearchMessage, WebSearchMessageSources } from "$lib/types/WebSearch";
 	import OpenWebSearchResults from "../OpenWebSearchResults.svelte";
 	function sanitizeMd(md: string) {
 		let ret = md
@@ -48,7 +48,7 @@
 	export let readOnly = false;
 	export let isTapped = false;
-	export let webSearchMessages: WebSearchMessage[] = [];
 	const dispatch = createEventDispatcher<{
 		retry: { content: string; id: Message["id"] };
@@ -104,18 +104,23 @@
 		}
 	});
 	$: downloadLink =
 		message.from === "user" ? `${$page.url.pathname}/message/${message.id}/prompt` : undefined;
 	let webSearchIsDone = true;
 	$: webSearchIsDone =
-		webSearchMessages.length > 0 &&
-		webSearchMessages[webSearchMessages.length - 1].type === "result";
-	$: webSearchSources = (
-		webSearchMessages.filter(({ type }) => type === "sources")?.[0] as WebSearchMessageSources
-	)?.sources;
 </script>
 {#if message.from === "assistant"}
@@ -132,11 +137,11 @@
 		<div
 			class="relative min-h-[calc(2rem+theme(spacing[3.5])*2)] min-w-[60px] break-words rounded-2xl border border-gray-100 bg-gradient-to-br from-gray-50 px-5 py-3.5 text-gray-600 prose-pre:my-2 dark:border-gray-800 dark:from-gray-800/40 dark:text-gray-300"
 		>
-			{#if webSearchMessages && webSearchMessages.length > 0}
 				<OpenWebSearchResults
 					classNames={tokens.length ? "mb-3.5" : ""}
-					{webSearchMessages}
-					loading={!webSearchIsDone}
 				/>
 			{/if}
 			{#if !message.content && (webSearchIsDone || (webSearchMessages && webSearchMessages.length === 0))}

 	import CarbonThumbsDown from "~icons/carbon/thumbs-down";
 	import { PUBLIC_SEP_TOKEN } from "$lib/constants/publicSepToken";
 	import type { Model } from "$lib/types/Model";
 	import OpenWebSearchResults from "../OpenWebSearchResults.svelte";
+	import type { WebSearchUpdate } from "$lib/types/MessageUpdate";
 	function sanitizeMd(md: string) {
 		let ret = md
 	export let readOnly = false;
 	export let isTapped = false;
+	export let webSearchMessages: WebSearchUpdate[];
 	const dispatch = createEventDispatcher<{
 		retry: { content: string; id: Message["id"] };
 		}
 	});
+	let searchUpdates: WebSearchUpdate[] = [];
+	$: searchUpdates = ((webSearchMessages.length > 0
+		? webSearchMessages
+		: message.updates?.filter(({ type }) => type === "webSearch")) ?? []) as WebSearchUpdate[];
 	$: downloadLink =
 		message.from === "user" ? `${$page.url.pathname}/message/${message.id}/prompt` : undefined;
 	let webSearchIsDone = true;
 	$: webSearchIsDone =
+		searchUpdates.length > 0 && searchUpdates[searchUpdates.length - 1].messageType === "sources";
+	$: webSearchSources =
+		searchUpdates &&
+		searchUpdates?.filter(({ messageType }) => messageType === "sources")?.[0]?.sources;
 </script>
 {#if message.from === "assistant"}
 		<div
 			class="relative min-h-[calc(2rem+theme(spacing[3.5])*2)] min-w-[60px] break-words rounded-2xl border border-gray-100 bg-gradient-to-br from-gray-50 px-5 py-3.5 text-gray-600 prose-pre:my-2 dark:border-gray-800 dark:from-gray-800/40 dark:text-gray-300"
 		>
+			{#if searchUpdates && searchUpdates.length > 0}
 				<OpenWebSearchResults
 					classNames={tokens.length ? "mb-3.5" : ""}
+					webSearchMessages={searchUpdates}
+					loading={!(searchUpdates[searchUpdates.length - 1]?.messageType === "sources")}
 				/>
 			{/if}
 			{#if !message.content && (webSearchIsDone || (webSearchMessages && webSearchMessages.length === 0))}

src/lib/components/chat/ChatMessages.svelte CHANGED Viewed

@@ -8,7 +8,8 @@
 	import type { LayoutData } from "../../../routes/$types";
 	import ChatIntroduction from "./ChatIntroduction.svelte";
 	import ChatMessage from "./ChatMessage.svelte";
-	import type { WebSearchMessage } from "$lib/types/WebSearch";
 	export let messages: Message[];
 	export let loading: boolean;
@@ -18,12 +19,10 @@
 	export let settings: LayoutData["settings"];
 	export let models: Model[];
 	export let readOnly: boolean;
-	export let searches: Record<string, WebSearchMessage[]>;
-	let webSearchArray: Array<WebSearchMessage[] | undefined> = [];
 	let chatContainer: HTMLElement;
-	export let webSearchMessages: WebSearchMessage[] = [];
 	async function scrollToBottom() {
 		await tick();
@@ -31,20 +30,9 @@
 	}
 	// If last message is from user, scroll to bottom
-	$: if (messages[messages.length - 1]?.from === "user") {
 		scrollToBottom();
 	}
-	$: messages,
-		(webSearchArray = messages.map((message, idx) => {
-			if (message.webSearchId) {
-				return searches[message.webSearchId] ?? [];
-			} else if (idx === messages.length - 1) {
-				return webSearchMessages;
-			} else {
-				return [];
-			}
-		}));
 </script>
 <div
@@ -60,7 +48,7 @@
 				{isAuthor}
 				{readOnly}
 				model={currentModel}
-				webSearchMessages={webSearchArray[i]}
 				on:retry
 				on:vote
 			/>

 	import type { LayoutData } from "../../../routes/$types";
 	import ChatIntroduction from "./ChatIntroduction.svelte";
 	import ChatMessage from "./ChatMessage.svelte";
+	import type { WebSearchUpdate } from "$lib/types/MessageUpdate";
+	import { browser } from "$app/environment";
 	export let messages: Message[];
 	export let loading: boolean;
 	export let settings: LayoutData["settings"];
 	export let models: Model[];
 	export let readOnly: boolean;
 	let chatContainer: HTMLElement;
+	export let webSearchMessages: WebSearchUpdate[] = [];
 	async function scrollToBottom() {
 		await tick();
 	}
 	// If last message is from user, scroll to bottom
+	$: if (browser && messages[messages.length - 1]?.from === "user") {
 		scrollToBottom();
 	}
 </script>
 <div
 				{isAuthor}
 				{readOnly}
 				model={currentModel}
+				webSearchMessages={i === messages.length - 1 ? webSearchMessages : []}
 				on:retry
 				on:vote
 			/>

src/lib/components/chat/ChatWindow.svelte CHANGED Viewed

@@ -13,8 +13,8 @@
 	import type { Model } from "$lib/types/Model";
 	import type { LayoutData } from "../../../routes/$types";
 	import WebSearchToggle from "../WebSearchToggle.svelte";
-	import type { WebSearchMessage } from "$lib/types/WebSearch";
 	import LoginModal from "../LoginModal.svelte";
 	export let messages: Message[] = [];
 	export let loading = false;
@@ -23,8 +23,7 @@
 	export let currentModel: Model;
 	export let models: Model[];
 	export let settings: LayoutData["settings"];
-	export let webSearchMessages: WebSearchMessage[] = [];
-	export let searches: Record<string, WebSearchMessage[]> = {};
 	export let loginRequired = false;
 	$: isReadOnly = !models.some((model) => model.id === currentModel.id);
@@ -60,7 +59,6 @@
 		readOnly={isReadOnly}
 		isAuthor={!shared}
 		{webSearchMessages}
-		{searches}
 		on:message
 		on:vote
 		on:retry={(ev) => {

 	import type { Model } from "$lib/types/Model";
 	import type { LayoutData } from "../../../routes/$types";
 	import WebSearchToggle from "../WebSearchToggle.svelte";
 	import LoginModal from "../LoginModal.svelte";
+	import type { WebSearchUpdate } from "$lib/types/MessageUpdate";
 	export let messages: Message[] = [];
 	export let loading = false;
 	export let currentModel: Model;
 	export let models: Model[];
 	export let settings: LayoutData["settings"];
+	export let webSearchMessages: WebSearchUpdate[] = [];
 	export let loginRequired = false;
 	$: isReadOnly = !models.some((model) => model.id === currentModel.id);
 		readOnly={isReadOnly}
 		isAuthor={!shared}
 		{webSearchMessages}
 		on:message
 		on:vote
 		on:retry={(ev) => {

src/lib/server/getInit.ts ADDED Viewed

	@@ -0,0 +1,58 @@

+import type { Options, RequestArgs } from "@huggingface/inference";
+const HF_INFERENCE_API_BASE_URL = "https://api-inference.huggingface.co/models/";
+/**
+ * Helper that prepares request arguments
+ */
+export function makeRequestOptions(
+	args: RequestArgs & {
+		data?: Blob | ArrayBuffer;
+		stream?: boolean;
+	},
+	options?: Options & {
+		/** For internal HF use, which is why it's not exposed in {@link Options} */
+		includeCredentials?: boolean;
+	}
+): { url: string; info: RequestInit } {
+	const { model, accessToken, ...otherArgs } = args;
+	const headers: Record<string, string> = {};
+	if (accessToken) {
+		headers["Authorization"] = `Bearer ${accessToken}`;
+	}
+	const binary = "data" in args && !!args.data;
+	if (!binary) {
+		headers["Content-Type"] = "application/json";
+	} else {
+		if (options?.wait_for_model) {
+			headers["X-Wait-For-Model"] = "true";
+		}
+		if (options?.use_cache === false) {
+			headers["X-Use-Cache"] = "false";
+		}
+		if (options?.dont_load_model) {
+			headers["X-Load-Model"] = "0";
+		}
+	}
+	const url =
+		/^http(s?):/.test(model) || model.startsWith("/")
+			? model
+			: `${HF_INFERENCE_API_BASE_URL}${model}`;
+	const info: RequestInit = {
+		headers,
+		method: "POST",
+		body: binary
+			? args.data
+			: JSON.stringify({
+					...otherArgs,
+					options,
+			  }),
+		credentials: options?.includeCredentials ? "include" : "same-origin",
+	};
+	return { url, info };
+}

src/lib/server/websearch/parseWeb.ts CHANGED Viewed

@@ -5,7 +5,7 @@ export async function parseWeb(url: string) {
 	setTimeout(() => abortController.abort(), 10000);
 	const htmlString = await fetch(url, { signal: abortController.signal })
 		.then((response) => response.text())
-		.catch((err) => console.log(err));
 	const virtualConsole = new VirtualConsole();
 	virtualConsole.on("error", () => {

 	setTimeout(() => abortController.abort(), 10000);
 	const htmlString = await fetch(url, { signal: abortController.signal })
 		.then((response) => response.text())
+		.catch();
 	const virtualConsole = new VirtualConsole();
 	virtualConsole.on("error", () => {

src/lib/server/websearch/runWebSearch.ts ADDED Viewed

	@@ -0,0 +1,112 @@

+import { searchWeb } from "$lib/server/websearch/searchWeb";
+import type { Message } from "$lib/types/Message";
+import type { WebSearch, WebSearchSource } from "$lib/types/WebSearch";
+import { generateQuery } from "$lib/server/websearch/generateQuery";
+import { parseWeb } from "$lib/server/websearch/parseWeb";
+import { chunk } from "$lib/utils/chunk";
+import { findSimilarSentences } from "$lib/server/websearch/sentenceSimilarity";
+import type { Conversation } from "$lib/types/Conversation";
+import type { MessageUpdate } from "$lib/types/MessageUpdate";
+const MAX_N_PAGES_SCRAPE = 10 as const;
+const MAX_N_PAGES_EMBED = 5 as const;
+export async function runWebSearch(
+	conv: Conversation,
+	prompt: string,
+	updatePad: (upd: MessageUpdate) => void
+) {
+	const messages = (() => {
+		return [...conv.messages, { content: prompt, from: "user", id: crypto.randomUUID() }];
+	})() satisfies Message[];
+	const webSearch: WebSearch = {
+		prompt: prompt,
+		searchQuery: "",
+		results: [],
+		context: "",
+		contextSources: [],
+		createdAt: new Date(),
+		updatedAt: new Date(),
+	};
+	function appendUpdate(message: string, args?: string[], type?: "error" | "update") {
+		updatePad({ type: "webSearch", messageType: type ?? "update", message: message, args: args });
+	}
+	try {
+		webSearch.searchQuery = await generateQuery(messages);
+		appendUpdate("Searching Google", [webSearch.searchQuery]);
+		const results = await searchWeb(webSearch.searchQuery);
+		webSearch.results =
+			(results.organic_results &&
+				results.organic_results.map((el: { title: string; link: string }) => {
+					const { title, link } = el;
+					const { hostname } = new URL(link);
+					return { title, link, hostname };
+				})) ??
+			[];
+		webSearch.results = webSearch.results
+			.filter(({ link }) => !link.includes("youtube.com")) // filter out youtube links
+			.slice(0, MAX_N_PAGES_SCRAPE); // limit to first 10 links only
+		let paragraphChunks: { source: WebSearchSource; text: string }[] = [];
+		if (webSearch.results.length > 0) {
+			appendUpdate("Browsing results");
+			const promises = webSearch.results.map(async (result) => {
+				const { link } = result;
+				let text = "";
+				try {
+					text = await parseWeb(link);
+					appendUpdate("Browsing webpage", [link]);
+				} catch (e) {
+					console.error(`Error parsing webpage "${link}"`, e);
+				}
+				const CHUNK_CAR_LEN = 512;
+				const MAX_N_CHUNKS = 100;
+				const texts = chunk(text, CHUNK_CAR_LEN).slice(0, MAX_N_CHUNKS);
+				return texts.map((t) => ({ source: result, text: t }));
+			});
+			const nestedParagraphChunks = (await Promise.all(promises)).slice(0, MAX_N_PAGES_EMBED);
+			paragraphChunks = nestedParagraphChunks.flat();
+			if (!paragraphChunks.length) {
+				throw new Error("No text found on the first 5 results");
+			}
+		} else {
+			throw new Error("No results found for this search query");
+		}
+		appendUpdate("Extracting relevant information");
+		const topKClosestParagraphs = 8;
+		const texts = paragraphChunks.map(({ text }) => text);
+		const indices = await findSimilarSentences(prompt, texts, {
+			topK: topKClosestParagraphs,
+		});
+		webSearch.context = indices.map((idx) => texts[idx]).join("");
+		const usedSources = new Set<string>();
+		for (const idx of indices) {
+			const { source } = paragraphChunks[idx];
+			if (!usedSources.has(source.link)) {
+				usedSources.add(source.link);
+				webSearch.contextSources.push(source);
+				updatePad({
+					type: "webSearch",
+					messageType: "sources",
+					message: "sources",
+					sources: webSearch.contextSources,
+				});
+			}
+		}
+	} catch (searchError) {
+		if (searchError instanceof Error) {
+			appendUpdate(
+				"An error occurred with the web search",
+				[JSON.stringify(searchError.message)],
+				"error"
+			);
+		}
+	}
+	return webSearch;
+}

src/lib/types/Message.ts CHANGED Viewed

@@ -1,9 +1,13 @@
 import type { Timestamps } from "./Timestamps";
 export type Message = Partial<Timestamps> & {
 	from: "user" | "assistant";
 	id: ReturnType<typeof crypto.randomUUID>;
 	content: string;
-	webSearchId?: string;
 	score?: -1 | 0 | 1;
 };

+import type { MessageUpdate } from "./MessageUpdate";
 import type { Timestamps } from "./Timestamps";
+import type { WebSearch } from "./WebSearch";
 export type Message = Partial<Timestamps> & {
 	from: "user" | "assistant";
 	id: ReturnType<typeof crypto.randomUUID>;
 	content: string;
+	updates?: MessageUpdate[];
+	webSearchId?: WebSearch["_id"]; // legacy version
+	webSearch?: WebSearch;
 	score?: -1 | 0 | 1;
 };

src/lib/types/MessageUpdate.ts ADDED Viewed

	@@ -0,0 +1,39 @@

+import type { WebSearchSource } from "./WebSearch";
+export type FinalAnswer = {
+	type: "finalAnswer";
+	text: string;
+};
+export type TextStreamUpdate = {
+	type: "stream";
+	token: string;
+};
+export type AgentUpdate = {
+	type: "agent";
+	agent: string;
+	content: string;
+	binary?: Blob;
+};
+export type WebSearchUpdate = {
+	type: "webSearch";
+	messageType: "update" | "error" | "sources";
+	message: string;
+	args?: string[];
+	sources?: WebSearchSource[];
+};
+export type StatusUpdate = {
+	type: "status";
+	status: "started" | "pending" | "finished" | "error";
+	message?: string;
+};
+export type MessageUpdate =
+	| FinalAnswer
+	| TextStreamUpdate
+	| AgentUpdate
+	| WebSearchUpdate
+	| StatusUpdate;

src/lib/types/WebSearch.ts CHANGED Viewed

@@ -2,16 +2,9 @@ import type { ObjectId } from "mongodb";
 import type { Conversation } from "./Conversation";
 import type { Timestamps } from "./Timestamps";
-export interface WebSearchSource {
-	title: string;
-	link: string;
-	hostname: string;
-}
 export interface WebSearch extends Timestamps {
-	_id: ObjectId;
-	convId: Conversation["_id"];
 	prompt: string;
@@ -19,34 +12,15 @@ export interface WebSearch extends Timestamps {
 	results: WebSearchSource[];
 	context: string;
 	contextSources: WebSearchSource[];
-	messages: WebSearchMessage[];
 }
-export type WebSearchMessageUpdate = {
-	type: "update";
-	message: string;
-	args?: string[];
-};
-export type WebSearchMessageError = {
-	type: "error";
-	message: string;
-	args?: string[];
-};
-export type WebSearchMessageResult = {
-	type: "result";
-	id: string;
-};
 export type WebSearchMessageSources = {
 	type: "sources";
 	sources: WebSearchSource[];
 };
-export type WebSearchMessage =
-	| WebSearchMessageUpdate
-	| WebSearchMessageResult
-	| WebSearchMessageError
-	| WebSearchMessageSources;

 import type { Conversation } from "./Conversation";
 import type { Timestamps } from "./Timestamps";
 export interface WebSearch extends Timestamps {
+	_id?: ObjectId;
+	convId?: Conversation["_id"];
 	prompt: string;
 	results: WebSearchSource[];
 	context: string;
 	contextSources: WebSearchSource[];
 }
+export interface WebSearchSource {
+	title: string;
+	link: string;
+	hostname: string;
+}
 export type WebSearchMessageSources = {
 	type: "sources";
 	sources: WebSearchSource[];
 };

src/routes/conversation/[id]/+page.server.ts CHANGED Viewed

@@ -2,7 +2,6 @@ import { collections } from "$lib/server/database";
 import { ObjectId } from "mongodb";
 import { error } from "@sveltejs/kit";
 import { authCondition } from "$lib/server/auth";
-import type { WebSearchMessageResult, WebSearchMessageSources } from "$lib/types/WebSearch";
 import { UrlDependency } from "$lib/types/UrlDependency";
 export const load = async ({ params, depends, locals }) => {
@@ -30,27 +29,9 @@ export const load = async ({ params, depends, locals }) => {
 		throw error(404, "Conversation not found.");
 	}
-	const webSearchesId = conversation.messages
-		.filter((message) => message.webSearchId)
-		.map((message) => new ObjectId(message.webSearchId));
-	const results = await collections.webSearches.find({ _id: { $in: webSearchesId } }).toArray();
-	const searches = Object.fromEntries(
-		results.map((x) => [
-			x._id.toString(),
-			[
-				...x.messages,
-				{ type: "sources", sources: x.contextSources ?? [] } satisfies WebSearchMessageSources,
-				{ type: "result", id: x._id.toString() } satisfies WebSearchMessageResult,
-			],
-		])
-	);
 	return {
 		messages: conversation.messages,
 		title: conversation.title,
 		model: conversation.model,
-		searches,
 	};
 };

 import { ObjectId } from "mongodb";
 import { error } from "@sveltejs/kit";
 import { authCondition } from "$lib/server/auth";
 import { UrlDependency } from "$lib/types/UrlDependency";
 export const load = async ({ params, depends, locals }) => {
 		throw error(404, "Conversation not found.");
 	}
 	return {
 		messages: conversation.messages,
 		title: conversation.title,
 		model: conversation.model,
 	};
 };

src/routes/conversation/[id]/+page.svelte CHANGED Viewed

@@ -4,7 +4,6 @@
 	import { pendingMessageIdToRetry } from "$lib/stores/pendingMessageIdToRetry";
 	import { onMount } from "svelte";
 	import { page } from "$app/stores";
-	import { textGenerationStream, type Options } from "@huggingface/inference";
 	import { invalidate } from "$app/navigation";
 	import { base } from "$app/paths";
 	import { shareConversation } from "$lib/shareConversation";
@@ -13,9 +12,9 @@
 	import { randomUUID } from "$lib/utils/randomUuid";
 	import { findCurrentModel } from "$lib/utils/models";
 	import { webSearchParameters } from "$lib/stores/webSearchParameters";
-	import type { WebSearchMessage } from "$lib/types/WebSearch";
 	import type { Message } from "$lib/types/Message";
 	import { PUBLIC_APP_DISCLAIMER } from "$env/static/public";
 	export let data;
@@ -23,7 +22,7 @@
 	let lastLoadedMessages = data.messages;
 	let isAborted = false;
-	let webSearchMessages: WebSearchMessage[] = [];
 	// Since we modify the messages array locally, we don't want to reset it if an old version is passed
 	$: if (data.messages !== lastLoadedMessages) {
@@ -35,91 +34,13 @@
 	let pending = false;
 	let loginRequired = false;
-	async function getTextGenerationStream(
-		inputs: string,
-		messageId: string,
-		isRetry = false,
-		webSearchId?: string
-	) {
-		let conversationId = $page.params.id;
-		const responseId = randomUUID();
-		const response = textGenerationStream(
-			{
-				model: $page.url.href,
-				inputs,
-				parameters: {
-					...data.models.find((m) => m.id === data.model)?.parameters,
-					return_full_text: false,
-				},
-			},
-			{
-				id: messageId,
-				response_id: responseId,
-				is_retry: isRetry,
-				use_cache: false,
-				web_search_id: webSearchId,
-			} as Options
-		);
-		for await (const output of response) {
-			pending = false;
-			if (!output) {
-				break;
-			}
-			if (conversationId !== $page.params.id) {
-				fetch(`${base}/conversation/${conversationId}/stop-generating`, {
-					method: "POST",
-				}).catch(console.error);
-				break;
-			}
-			if (isAborted) {
-				isAborted = false;
-				fetch(`${base}/conversation/${conversationId}/stop-generating`, {
-					method: "POST",
-				}).catch(console.error);
-				break;
-			}
-			// final message
-			if (output.generated_text) {
-				const lastMessage = messages[messages.length - 1];
-				if (lastMessage) {
-					lastMessage.content = output.generated_text;
-					lastMessage.webSearchId = webSearchId;
-					messages = [...messages];
-				}
-				break;
-			}
-			if (!output.token.special) {
-				const lastMessage = messages[messages.length - 1];
-				if (lastMessage?.from !== "assistant") {
-					// First token has a space at the beginning, trim it
-					messages = [
-						...messages,
-						// id doesn't match the backend id but it's not important for assistant messages
-						{ from: "assistant", content: output.token.text.trimStart(), id: responseId },
-					];
-				} else {
-					lastMessage.content += output.token.text;
-					messages = [...messages];
-				}
-			}
-		}
-	}
 	async function summarizeTitle(id: string) {
 		await fetch(`${base}/conversation/${id}/summarize`, {
 			method: "POST",
 		});
 	}
 	async function writeMessage(message: string, messageId = randomUUID()) {
 		if (!message.trim()) return;
@@ -128,76 +49,105 @@
 			loading = true;
 			pending = true;
 			let retryMessageIndex = messages.findIndex((msg) => msg.id === messageId);
 			const isRetry = retryMessageIndex !== -1;
 			if (!isRetry) {
 				retryMessageIndex = messages.length;
 			}
 			messages = [
 				...messages.slice(0, retryMessageIndex),
 				{ from: "user", content: message, id: messageId },
 			];
-			let searchResponseId: string | null = "";
-			if ($webSearchParameters.useSearch) {
-				webSearchMessages = [];
-				const res = await fetch(
-					`${base}/conversation/${$page.params.id}/web-search?` +
-						new URLSearchParams({ prompt: message }),
-					{
-						method: "GET",
-					}
-				);
-				// required bc linting doesn't see TextDecoderStream for some reason?
-				// eslint-disable-next-line no-undef
-				const encoder = new TextDecoderStream();
-				const reader = res?.body?.pipeThrough(encoder).getReader();
-				while (searchResponseId === "") {
-					await new Promise((r) => setTimeout(r, 25));
-					if (isAborted) {
-						reader?.cancel();
 						return;
 					}
-					reader
-						?.read()
-						.then(async ({ done, value }) => {
-							if (done) {
-								reader.cancel();
-								return;
-							}
-							try {
-								webSearchMessages = (JSON.parse(value) as { messages: WebSearchMessage[] })
-									.messages;
-							} catch (parseError) {
-								// in case of parsing error we wait for the next message
-								return;
-							}
-							const lastSearchMessage = webSearchMessages[webSearchMessages.length - 1];
-							if (lastSearchMessage.type === "result") {
-								searchResponseId = lastSearchMessage.id;
-								reader.cancel();
-								data.searches[searchResponseId] = [...webSearchMessages];
-								return;
 							}
-						})
-						.catch(() => {
-							searchResponseId = null;
-						});
-				}
 			}
-			await getTextGenerationStream(message, messageId, isRetry, searchResponseId ?? undefined);
 			webSearchMessages = [];
 			if (messages.filter((m) => m.from === "user").length === 1) {
 				summarizeTitle($page.params.id)
 					.then(() => invalidate(UrlDependency.ConversationList))
@@ -283,7 +233,6 @@
 	{pending}
 	{messages}
 	bind:webSearchMessages
-	searches={{ ...data.searches }}
 	on:message={(event) => writeMessage(event.detail)}
 	on:retry={(event) => writeMessage(event.detail.content, event.detail.id)}
 	on:vote={(event) => voteMessage(event.detail.score, event.detail.id)}

 	import { pendingMessageIdToRetry } from "$lib/stores/pendingMessageIdToRetry";
 	import { onMount } from "svelte";
 	import { page } from "$app/stores";
 	import { invalidate } from "$app/navigation";
 	import { base } from "$app/paths";
 	import { shareConversation } from "$lib/shareConversation";
 	import { randomUUID } from "$lib/utils/randomUuid";
 	import { findCurrentModel } from "$lib/utils/models";
 	import { webSearchParameters } from "$lib/stores/webSearchParameters";
 	import type { Message } from "$lib/types/Message";
 	import { PUBLIC_APP_DISCLAIMER } from "$env/static/public";
+	import type { MessageUpdate, WebSearchUpdate } from "$lib/types/MessageUpdate";
 	export let data;
 	let lastLoadedMessages = data.messages;
 	let isAborted = false;
+	let webSearchMessages: WebSearchUpdate[] = [];
 	// Since we modify the messages array locally, we don't want to reset it if an old version is passed
 	$: if (data.messages !== lastLoadedMessages) {
 	let pending = false;
 	let loginRequired = false;
 	async function summarizeTitle(id: string) {
 		await fetch(`${base}/conversation/${id}/summarize`, {
 			method: "POST",
 		});
 	}
+	// this function is used to send new message to the backends
 	async function writeMessage(message: string, messageId = randomUUID()) {
 		if (!message.trim()) return;
 			loading = true;
 			pending = true;
+			// first we check if the messageId already exists, indicating a retry
 			let retryMessageIndex = messages.findIndex((msg) => msg.id === messageId);
 			const isRetry = retryMessageIndex !== -1;
+			// if it's not a retry we just use the whole array
 			if (!isRetry) {
 				retryMessageIndex = messages.length;
 			}
+			// slice up to the point of the retry
 			messages = [
 				...messages.slice(0, retryMessageIndex),
 				{ from: "user", content: message, id: messageId },
 			];
+			const responseId = randomUUID();
+			const response = await fetch(`${base}/conversation/${$page.params.id}`, {
+				method: "POST",
+				headers: { "Content-Type": "application/json" },
+				body: JSON.stringify({
+					inputs: message,
+					id: messageId,
+					response_id: responseId,
+					is_retry: isRetry,
+					web_search: $webSearchParameters.useSearch,
+				}),
+			});
+			if (!response.body) {
+				throw new Error("Body not defined");
+			}
+			// eslint-disable-next-line no-undef
+			const encoder = new TextDecoderStream();
+			const reader = response?.body?.pipeThrough(encoder).getReader();
+			let finalAnswer = "";
+			// this is a bit ugly
+			// we read the stream until we get the final answer
+			while (finalAnswer === "") {
+				// await new Promise((r) => setTimeout(r, 25));
+				// check for abort
+				if (isAborted) {
+					reader?.cancel();
+					break;
+				}
+				// if there is something to read
+				await reader?.read().then(async ({ done, value }) => {
+					// we read, if it's done we cancel
+					if (done) {
+						reader.cancel();
 						return;
 					}
+					if (!value) {
+						return;
+					}
+					// if it's not done we parse the value, which contains all messages
+					const inputs = value.split("\n");
+					inputs.forEach((el: string) => {
+						try {
+							let update = JSON.parse(el) as MessageUpdate;
+							if (update.type === "finalAnswer") {
+								finalAnswer = update.text;
+								invalidate(UrlDependency.Conversation);
+							} else if (update.type === "stream") {
+								pending = false;
+								let lastMessage = messages[messages.length - 1];
+								if (lastMessage.from !== "assistant") {
+									messages = [
+										...messages,
+										{ from: "assistant", id: randomUUID(), content: update.token },
+									];
+								} else {
+									lastMessage.content += update.token;
+									messages = [...messages];
+								}
+							} else if (update.type === "webSearch") {
+								webSearchMessages = [...webSearchMessages, update];
 							}
+						} catch (parseError) {
+							// in case of parsing error we wait for the next message
+							return;
+						}
+					});
+				});
 			}
+			// reset the websearchmessages
 			webSearchMessages = [];
+			// do title summarization
+			// TODO: we should change this to wait until there is an assistant response.
 			if (messages.filter((m) => m.from === "user").length === 1) {
 				summarizeTitle($page.params.id)
 					.then(() => invalidate(UrlDependency.ConversationList))
 	{pending}
 	{messages}
 	bind:webSearchMessages
 	on:message={(event) => writeMessage(event.detail)}
 	on:retry={(event) => writeMessage(event.detail.content, event.detail.id)}
 	on:vote={(event) => voteMessage(event.detail.score, event.detail.id)}

src/routes/conversation/[id]/+server.ts CHANGED Viewed

@@ -1,34 +1,37 @@
-import { MESSAGES_BEFORE_LOGIN, RATE_LIMIT } from "$env/static/private";
 import { buildPrompt } from "$lib/buildPrompt";
 import { PUBLIC_SEP_TOKEN } from "$lib/constants/publicSepToken";
-import { abortedGenerations } from "$lib/server/abortedGenerations";
 import { authCondition, requiresUser } from "$lib/server/auth";
 import { collections } from "$lib/server/database";
 import { modelEndpoint } from "$lib/server/modelEndpoint";
 import { models } from "$lib/server/models";
-import { ERROR_MESSAGES } from "$lib/stores/errors.js";
 import type { Message } from "$lib/types/Message";
-import { concatUint8Arrays } from "$lib/utils/concatUint8Arrays";
-import { streamToAsyncIterable } from "$lib/utils/streamToAsyncIterable";
 import { trimPrefix } from "$lib/utils/trimPrefix";
 import { trimSuffix } from "$lib/utils/trimSuffix";
-import type { TextGenerationStreamOutput } from "@huggingface/inference";
 import { error } from "@sveltejs/kit";
 import { ObjectId } from "mongodb";
 import { z } from "zod";
 import { AwsClient } from "aws4fetch";
 export async function POST({ request, fetch, locals, params, getClientAddress }) {
 	const id = z.string().parse(params.id);
 	const convId = new ObjectId(id);
-	const date = new Date();
 	const userId = locals.user?._id ?? locals.sessionId;
 	if (!userId) {
 		throw error(401, "Unauthorized");
 	}
 	const conv = await collections.conversations.findOne({
 		_id: convId,
 		...authCondition(locals),
@@ -38,12 +41,14 @@ export async function POST({ request, fetch, locals, params, getClientAddress })
 		throw error(404, "Conversation not found");
 	}
 	await collections.messageEvents.insertOne({
 		userId: userId,
 		createdAt: new Date(),
 		ip: getClientAddress(),
 	});
 	if (
 		!locals.user?._id &&
 		requiresUser &&
@@ -52,6 +57,7 @@ export async function POST({ request, fetch, locals, params, getClientAddress })
 		throw error(429, "Exceeded number of messages before login");
 	}
 	const nEvents = Math.max(
 		await collections.messageEvents.countDocuments({ userId }),
 		await collections.messageEvents.countDocuments({ ip: getClientAddress() })
@@ -61,6 +67,7 @@ export async function POST({ request, fetch, locals, params, getClientAddress })
 		throw error(429, ERROR_MESSAGES.rateLimited);
 	}
 	const model = models.find((m) => m.id === conv.model);
 	const settings = await collections.settings.findOne(authCondition(locals));
@@ -68,24 +75,30 @@ export async function POST({ request, fetch, locals, params, getClientAddress })
 		throw error(410, "Model not available anymore");
 	}
 	const json = await request.json();
 	const {
 		inputs: newPrompt,
-		options: { id: messageId, is_retry, web_search_id, response_id: responseId },
 	} = z
 		.object({
 			inputs: z.string().trim().min(1),
-			options: z.object({
-				id: z.optional(z.string().uuid()),
-				response_id: z.optional(z.string().uuid()),
-				is_retry: z.optional(z.boolean()),
-				web_search_id: z.ostring(),
-			}),
 		})
 		.parse(json);
-	const messages = (() => {
 		if (is_retry && messageId) {
 			let retryMessageIdx = conv.messages.findIndex((message) => message.id === messageId);
 			if (retryMessageIdx === -1) {
 				retryMessageIdx = conv.messages.length;
@@ -94,7 +107,8 @@ export async function POST({ request, fetch, locals, params, getClientAddress })
 				...conv.messages.slice(0, retryMessageIdx),
 				{ content: newPrompt, from: "user", id: messageId as Message["id"], updatedAt: new Date() },
 			];
-		}
 		return [
 			...conv.messages,
 			{
@@ -107,109 +121,171 @@ export async function POST({ request, fetch, locals, params, getClientAddress })
 		];
 	})() satisfies Message[];
-	const prompt = await buildPrompt({
-		messages,
-		model,
-		webSearchId: web_search_id,
-		preprompt: settings?.customPrompts?.[model.id] ?? model.preprompt,
-		locals: locals,
-	});
-	const randomEndpoint = modelEndpoint(model);
-	const abortController = new AbortController();
-	let resp: Response;
-	if (randomEndpoint.host === "sagemaker") {
-		const requestParams = JSON.stringify({
-			...json,
-			inputs: prompt,
-		});
-		const aws = new AwsClient({
-			accessKeyId: randomEndpoint.accessKey,
-			secretAccessKey: randomEndpoint.secretKey,
-			sessionToken: randomEndpoint.sessionToken,
-			service: "sagemaker",
-		});
-		resp = await aws.fetch(randomEndpoint.url, {
-			method: "POST",
-			body: requestParams,
-			signal: abortController.signal,
-			headers: {
-				"Content-Type": "application/json",
-			},
-		});
-	} else {
-		resp = await fetch(randomEndpoint.url, {
-			headers: {
-				"Content-Type": request.headers.get("Content-Type") ?? "application/json",
-				Authorization: randomEndpoint.authorization,
-			},
-			method: "POST",
-			body: JSON.stringify({
-				...json,
-				inputs: prompt,
-			}),
-			signal: abortController.signal,
-		});
-	}
-	if (!resp.body) {
-		throw new Error("Response body is empty");
-	}
-	const [stream1, stream2] = resp.body.tee();
-	async function saveMessage() {
-		let generated_text = await parseGeneratedText(stream2, convId, date, abortController);
-		// We could also check if PUBLIC_ASSISTANT_MESSAGE_TOKEN is present and use it to slice the text
-		if (generated_text.startsWith(prompt)) {
-			generated_text = generated_text.slice(prompt.length);
-		}
-		generated_text = trimSuffix(
-			trimPrefix(generated_text, "<|startoftext|>"),
-			PUBLIC_SEP_TOKEN
-		).trimEnd();
-		for (const stop of [...(model?.parameters?.stop ?? []), "<|endoftext|>"]) {
-			if (generated_text.endsWith(stop)) {
-				generated_text = generated_text.slice(0, -stop.length).trimEnd();
 			}
-		}
-		messages.push({
-			from: "assistant",
-			content: generated_text,
-			webSearchId: web_search_id,
-			id: (responseId as Message["id"]) || crypto.randomUUID(),
-			createdAt: new Date(),
-			updatedAt: new Date(),
-		});
-		await collections.conversations.updateOne(
-			{
-				_id: convId,
-			},
-			{
-				$set: {
-					messages,
-					updatedAt: new Date(),
 				},
 			}
-		);
-	}
-	saveMessage().catch(console.error);
 	// Todo: maybe we should wait for the message to be saved before ending the response - in case of errors
-	return new Response(stream1, {
-		headers: Object.fromEntries(resp.headers.entries()),
-		status: resp.status,
-		statusText: resp.statusText,
-	});
 }
 export async function DELETE({ locals, params }) {
@@ -229,74 +305,6 @@ export async function DELETE({ locals, params }) {
 	return new Response();
 }
-async function parseGeneratedText(
-	stream: ReadableStream,
-	conversationId: ObjectId,
-	promptedAt: Date,
-	abortController: AbortController
-): Promise<string> {
-	const inputs: Uint8Array[] = [];
-	for await (const input of streamToAsyncIterable(stream)) {
-		inputs.push(input);
-		const date = abortedGenerations.get(conversationId.toString());
-		if (date && date > promptedAt) {
-			abortController.abort("Cancelled by user");
-			const completeInput = concatUint8Arrays(inputs);
-			const lines = new TextDecoder()
-				.decode(completeInput)
-				.split("\n")
-				.filter((line) => line.startsWith("data:"));
-			const tokens = lines.map((line) => {
-				try {
-					const json: TextGenerationStreamOutput = JSON.parse(line.slice("data:".length));
-					return json.token.text;
-				} catch {
-					return "";
-				}
-			});
-			return tokens.join("");
-		}
-	}
-	// Merge inputs into a single Uint8Array
-	const completeInput = concatUint8Arrays(inputs);
-	// Get last line starting with "data:" and parse it as JSON to get the generated text
-	const message = new TextDecoder().decode(completeInput);
-	let lastIndex = message.lastIndexOf("\ndata:");
-	if (lastIndex === -1) {
-		lastIndex = message.indexOf("data");
-	}
-	if (lastIndex === -1) {
-		console.error("Could not parse last message", message);
-	}
-	let lastMessage = message.slice(lastIndex).trim().slice("data:".length);
-	if (lastMessage.includes("\n")) {
-		lastMessage = lastMessage.slice(0, lastMessage.indexOf("\n"));
-	}
-	const lastMessageJSON = JSON.parse(lastMessage);
-	if (lastMessageJSON.error) {
-		throw new Error(lastMessageJSON.error);
-	}
-	const res = lastMessageJSON.generated_text;
-	if (typeof res !== "string") {
-		throw new Error("Could not parse generated text");
-	}
-	return res;
-}
 export async function PATCH({ request, locals, params }) {
 	const { title } = z
 		.object({ title: z.string().trim().min(1).max(100) })

+import { HF_ACCESS_TOKEN, MESSAGES_BEFORE_LOGIN, RATE_LIMIT } from "$env/static/private";
 import { buildPrompt } from "$lib/buildPrompt";
 import { PUBLIC_SEP_TOKEN } from "$lib/constants/publicSepToken";
 import { authCondition, requiresUser } from "$lib/server/auth";
 import { collections } from "$lib/server/database";
 import { modelEndpoint } from "$lib/server/modelEndpoint";
 import { models } from "$lib/server/models";
+import { ERROR_MESSAGES } from "$lib/stores/errors";
 import type { Message } from "$lib/types/Message";
 import { trimPrefix } from "$lib/utils/trimPrefix";
 import { trimSuffix } from "$lib/utils/trimSuffix";
+import { textGenerationStream } from "@huggingface/inference";
 import { error } from "@sveltejs/kit";
 import { ObjectId } from "mongodb";
 import { z } from "zod";
 import { AwsClient } from "aws4fetch";
+import type { MessageUpdate } from "$lib/types/MessageUpdate";
+import { runWebSearch } from "$lib/server/websearch/runWebSearch";
+import type { WebSearch } from "$lib/types/WebSearch";
+import { abortedGenerations } from "$lib/server/abortedGenerations.js";
 export async function POST({ request, fetch, locals, params, getClientAddress }) {
 	const id = z.string().parse(params.id);
 	const convId = new ObjectId(id);
+	const promptedAt = new Date();
 	const userId = locals.user?._id ?? locals.sessionId;
+	// check user
 	if (!userId) {
 		throw error(401, "Unauthorized");
 	}
+	// check if the user has access to the conversation
 	const conv = await collections.conversations.findOne({
 		_id: convId,
 		...authCondition(locals),
 		throw error(404, "Conversation not found");
 	}
+	// register the event for ratelimiting
 	await collections.messageEvents.insertOne({
 		userId: userId,
 		createdAt: new Date(),
 		ip: getClientAddress(),
 	});
+	// make sure an anonymous user can't post more than one message
 	if (
 		!locals.user?._id &&
 		requiresUser &&
 		throw error(429, "Exceeded number of messages before login");
 	}
+	// check if the user is rate limited
 	const nEvents = Math.max(
 		await collections.messageEvents.countDocuments({ userId }),
 		await collections.messageEvents.countDocuments({ ip: getClientAddress() })
 		throw error(429, ERROR_MESSAGES.rateLimited);
 	}
+	// fetch the model
 	const model = models.find((m) => m.id === conv.model);
 	const settings = await collections.settings.findOne(authCondition(locals));
 		throw error(410, "Model not available anymore");
 	}
+	// finally parse the content of the request
 	const json = await request.json();
 	const {
 		inputs: newPrompt,
+		response_id: responseId,
+		id: messageId,
+		is_retry,
+		web_search: webSearch,
 	} = z
 		.object({
 			inputs: z.string().trim().min(1),
+			id: z.optional(z.string().uuid()),
+			response_id: z.optional(z.string().uuid()),
+			is_retry: z.optional(z.boolean()),
+			web_search: z.optional(z.boolean()),
 		})
 		.parse(json);
+	// get the list of messages
+	// while checking for retries
+	let messages = (() => {
 		if (is_retry && messageId) {
+			// if the message is a retry, replace the message and remove the messages after it
 			let retryMessageIdx = conv.messages.findIndex((message) => message.id === messageId);
 			if (retryMessageIdx === -1) {
 				retryMessageIdx = conv.messages.length;
 				...conv.messages.slice(0, retryMessageIdx),
 				{ content: newPrompt, from: "user", id: messageId as Message["id"], updatedAt: new Date() },
 			];
+		} // else append the message at the bottom
 		return [
 			...conv.messages,
 			{
 		];
 	})() satisfies Message[];
+	// we now build the stream
+	const stream = new ReadableStream({
+		async start(controller) {
+			const updates: MessageUpdate[] = [];
+			function update(newUpdate: MessageUpdate) {
+				if (newUpdate.type !== "stream") {
+					updates.push(newUpdate);
+				}
+				controller.enqueue(JSON.stringify(newUpdate) + "\n");
+			}
+			update({ type: "status", status: "started" });
+			let webSearchResults: WebSearch | undefined;
+			if (webSearch) {
+				webSearchResults = await runWebSearch(conv, newPrompt, update);
+			}
+			// we can now build the prompt using the messages
+			const prompt = await buildPrompt({
+				messages,
+				model,
+				webSearch: webSearchResults,
+				preprompt: settings?.customPrompts?.[model.id] ?? model.preprompt,
+				locals: locals,
+			});
+			// fetch the endpoint
+			const randomEndpoint = modelEndpoint(model);
+			let usedFetch = fetch;
+			if (randomEndpoint.host === "sagemaker") {
+				const aws = new AwsClient({
+					accessKeyId: randomEndpoint.accessKey,
+					secretAccessKey: randomEndpoint.secretKey,
+					sessionToken: randomEndpoint.sessionToken,
+					service: "sagemaker",
+				});
+				usedFetch = aws.fetch.bind(aws) as typeof fetch;
 			}
+			async function saveLast(generated_text: string) {
+				const lastMessage = messages[messages.length - 1];
+				if (lastMessage) {
+					// We could also check if PUBLIC_ASSISTANT_MESSAGE_TOKEN is present and use it to slice the text
+					if (generated_text.startsWith(prompt)) {
+						generated_text = generated_text.slice(prompt.length);
+					}
+					generated_text = trimSuffix(
+						trimPrefix(generated_text, "<|startoftext|>"),
+						PUBLIC_SEP_TOKEN
+					).trimEnd();
+					// remove the stop tokens
+					for (const stop of [...(model?.parameters?.stop ?? []), "<|endoftext|>"]) {
+						if (generated_text.endsWith(stop)) {
+							generated_text = generated_text.slice(0, -stop.length).trimEnd();
+						}
+					}
+					lastMessage.content = generated_text;
+					await collections.conversations.updateOne(
+						{
+							_id: convId,
+						},
+						{
+							$set: {
+								messages,
+								updatedAt: new Date(),
+							},
+						}
+					);
+					update({
+						type: "finalAnswer",
+						text: generated_text,
+					});
+				}
+			}
+			const tokenStream = textGenerationStream(
+				{
+					parameters: {
+						...models.find((m) => m.id === conv.model)?.parameters,
+						return_full_text: false,
+					},
+					model: randomEndpoint.url,
+					inputs: prompt,
+					accessToken: randomEndpoint.host === "sagemaker" ? undefined : HF_ACCESS_TOKEN,
 				},
+				{
+					use_cache: false,
+					fetch: usedFetch,
+				}
+			);
+			for await (const output of tokenStream) {
+				// if not generated_text is here it means the generation is not done
+				if (!output.generated_text) {
+					// else we get the next token
+					if (!output.token.special) {
+						const lastMessage = messages[messages.length - 1];
+						update({
+							type: "stream",
+							token: output.token.text,
+						});
+						// if the last message is not from assistant, it means this is the first token
+						if (lastMessage?.from !== "assistant") {
+							// so we create a new message
+							messages = [
+								...messages,
+								// id doesn't match the backend id but it's not important for assistant messages
+								// First token has a space at the beginning, trim it
+								{
+									from: "assistant",
+									content: output.token.text.trimStart(),
+									webSearch: webSearchResults,
+									updates: updates,
+									id: (responseId as Message["id"]) || crypto.randomUUID(),
+									createdAt: new Date(),
+									updatedAt: new Date(),
+								},
+							];
+						} else {
+							const date = abortedGenerations.get(convId.toString());
+							if (date && date > promptedAt) {
+								saveLast(lastMessage.content);
+							}
+							if (!output) {
+								break;
+							}
+							// otherwise we just concatenate tokens
+							lastMessage.content += output.token.text;
+						}
+					}
+				} else {
+					saveLast(output.generated_text);
+				}
 			}
+		},
+		async cancel() {
+			await collections.conversations.updateOne(
+				{
+					_id: convId,
+				},
+				{
+					$set: {
+						messages,
+						updatedAt: new Date(),
+					},
+				}
+			);
+		},
+	});
 	// Todo: maybe we should wait for the message to be saved before ending the response - in case of errors
+	return new Response(stream);
 }
 export async function DELETE({ locals, params }) {
 	return new Response();
 }
 export async function PATCH({ request, locals, params }) {
 	const { title } = z
 		.object({ title: z.string().trim().min(1).max(100) })

src/routes/conversation/[id]/share/+server.ts CHANGED Viewed

@@ -3,7 +3,7 @@ import { PUBLIC_ORIGIN, PUBLIC_SHARE_PREFIX } from "$env/static/public";
 import { authCondition } from "$lib/server/auth";
 import { collections } from "$lib/server/database";
 import type { SharedConversation } from "$lib/types/SharedConversation";
-import { hashConv } from "$lib/utils/hashConv.js";
 import { error } from "@sveltejs/kit";
 import { ObjectId } from "mongodb";
 import { nanoid } from "nanoid";

 import { authCondition } from "$lib/server/auth";
 import { collections } from "$lib/server/database";
 import type { SharedConversation } from "$lib/types/SharedConversation";
+import { hashConv } from "$lib/utils/hashConv";
 import { error } from "@sveltejs/kit";
 import { ObjectId } from "mongodb";
 import { nanoid } from "nanoid";

src/routes/conversation/[id]/summarize/+server.ts CHANGED Viewed

@@ -4,7 +4,7 @@ import { authCondition } from "$lib/server/auth";
 import { collections } from "$lib/server/database";
 import { generateFromDefaultEndpoint } from "$lib/server/generateFromDefaultEndpoint";
 import { defaultModel } from "$lib/server/models";
-import { ERROR_MESSAGES } from "$lib/stores/errors.js";
 import { error } from "@sveltejs/kit";
 import { ObjectId } from "mongodb";

 import { collections } from "$lib/server/database";
 import { generateFromDefaultEndpoint } from "$lib/server/generateFromDefaultEndpoint";
 import { defaultModel } from "$lib/server/models";
+import { ERROR_MESSAGES } from "$lib/stores/errors";
 import { error } from "@sveltejs/kit";
 import { ObjectId } from "mongodb";

src/routes/conversation/[id]/web-search/+server.ts DELETED Viewed

@@ -1,165 +0,0 @@
-import { authCondition } from "$lib/server/auth";
-import { collections } from "$lib/server/database";
-import { searchWeb } from "$lib/server/websearch/searchWeb";
-import type { Message } from "$lib/types/Message";
-import { error } from "@sveltejs/kit";
-import { ObjectId } from "mongodb";
-import { z } from "zod";
-import type { WebSearch, WebSearchSource } from "$lib/types/WebSearch";
-import { generateQuery } from "$lib/server/websearch/generateQuery";
-import { parseWeb } from "$lib/server/websearch/parseWeb";
-import { chunk } from "$lib/utils/chunk";
-import { findSimilarSentences } from "$lib/server/websearch/sentenceSimilarity";
-import { RATE_LIMIT } from "$env/static/private";
-import { ERROR_MESSAGES } from "$lib/stores/errors.js";
-const MAX_N_PAGES_SCRAPE = 10 as const;
-const MAX_N_PAGES_EMBED = 5 as const;
-export async function GET({ params, locals, url, getClientAddress }) {
-	const convId = new ObjectId(params.id);
-	const searchId = new ObjectId();
-	const conv = await collections.conversations.findOne({
-		_id: convId,
-		...authCondition(locals),
-	});
-	if (!conv) {
-		throw error(404, "Conversation not found");
-	}
-	const userId = locals.user?._id ?? locals.sessionId;
-	await collections.messageEvents.insertOne({
-		userId: userId,
-		createdAt: new Date(),
-		ip: getClientAddress(),
-	});
-	const nEvents = Math.max(
-		await collections.messageEvents.countDocuments({ userId }),
-		await collections.messageEvents.countDocuments({ ip: getClientAddress() })
-	);
-	if (RATE_LIMIT != "" && nEvents > parseInt(RATE_LIMIT)) {
-		throw error(429, ERROR_MESSAGES.rateLimited);
-	}
-	const prompt = z.string().trim().min(1).parse(url.searchParams.get("prompt"));
-	const messages = (() => {
-		return [...conv.messages, { content: prompt, from: "user", id: crypto.randomUUID() }];
-	})() satisfies Message[];
-	const stream = new ReadableStream({
-		async start(controller) {
-			const webSearch: WebSearch = {
-				_id: searchId,
-				convId: convId,
-				prompt: prompt,
-				searchQuery: "",
-				results: [],
-				context: "",
-				contextSources: [],
-				messages: [],
-				createdAt: new Date(),
-				updatedAt: new Date(),
-			};
-			function appendUpdate(message: string, args?: string[], type?: "error" | "update") {
-				webSearch.messages.push({
-					type: type ?? "update",
-					message,
-					args,
-				});
-				controller.enqueue(JSON.stringify({ messages: webSearch.messages }));
-			}
-			try {
-				appendUpdate("Generating search query");
-				webSearch.searchQuery = await generateQuery(messages);
-				appendUpdate("Searching Google", [webSearch.searchQuery]);
-				const results = await searchWeb(webSearch.searchQuery);
-				webSearch.results =
-					(results.organic_results &&
-						results.organic_results.map((el: { title: string; link: string }) => {
-							const { title, link } = el;
-							const { hostname } = new URL(link);
-							return { title, link, hostname };
-						})) ??
-					[];
-				webSearch.results = webSearch.results
-					.filter(({ link }) => !link.includes("youtube.com")) // filter out youtube links
-					.slice(0, MAX_N_PAGES_SCRAPE); // limit to first 10 links only
-				let paragraphChunks: { source: WebSearchSource; text: string }[] = [];
-				if (webSearch.results.length > 0) {
-					appendUpdate("Browsing results");
-					const promises = webSearch.results.map(async (result) => {
-						const { link } = result;
-						let text = "";
-						try {
-							text = await parseWeb(link);
-							appendUpdate("Browsing webpage", [link]);
-						} catch (e) {
-							console.error(`Error parsing webpage "${link}"`, e);
-						}
-						const CHUNK_CAR_LEN = 512;
-						const MAX_N_CHUNKS = 100;
-						const texts = chunk(text, CHUNK_CAR_LEN).slice(0, MAX_N_CHUNKS);
-						return texts.map((t) => ({ source: result, text: t }));
-					});
-					const nestedParagraphChunks = (await Promise.all(promises)).slice(0, MAX_N_PAGES_EMBED);
-					paragraphChunks = nestedParagraphChunks.flat();
-					if (!paragraphChunks.length) {
-						throw new Error("No text found on the first 5 results");
-					}
-				} else {
-					throw new Error("No results found for this search query");
-				}
-				appendUpdate("Extracting relevant information");
-				const topKClosestParagraphs = 8;
-				const texts = paragraphChunks.map(({ text }) => text);
-				const indices = await findSimilarSentences(prompt, texts, {
-					topK: topKClosestParagraphs,
-				});
-				webSearch.context = indices.map((idx) => texts[idx]).join("");
-				const usedSources = new Set<string>();
-				for (const idx of indices) {
-					const { source } = paragraphChunks[idx];
-					if (!usedSources.has(source.link)) {
-						usedSources.add(source.link);
-						webSearch.contextSources.push(source);
-					}
-				}
-				appendUpdate("Injecting relevant information");
-			} catch (searchError) {
-				if (searchError instanceof Error) {
-					webSearch.messages.push({
-						type: "error",
-						message: "An error occurred with the web search",
-						args: [JSON.stringify(searchError.message)],
-					});
-				}
-			}
-			const res = await collections.webSearches.insertOne(webSearch);
-			webSearch.messages.push({
-				type: "sources",
-				sources: webSearch.contextSources,
-			});
-			webSearch.messages.push({
-				type: "result",
-				id: res.insertedId.toString(),
-			});
-			controller.enqueue(JSON.stringify({ messages: webSearch.messages }));
-		},
-	});
-	return new Response(stream, { headers: { "Content-Type": "application/json" } });
-}

src/routes/r/[id]/+page.server.ts CHANGED Viewed

@@ -1,8 +1,6 @@
 import type { PageServerLoad } from "./$types";
 import { collections } from "$lib/server/database";
 import { error } from "@sveltejs/kit";
-import { ObjectId } from "mongodb";
-import type { WebSearchMessageResult, WebSearchMessageSources } from "$lib/types/WebSearch";
 export const load: PageServerLoad = async ({ params }) => {
 	const conversation = await collections.sharedConversations.findOne({
@@ -13,27 +11,9 @@ export const load: PageServerLoad = async ({ params }) => {
 		throw error(404, "Conversation not found");
 	}
-	const webSearchesId = conversation.messages
-		.filter((message) => message.webSearchId)
-		.map((message) => new ObjectId(message.webSearchId));
-	const results = await collections.webSearches.find({ _id: { $in: webSearchesId } }).toArray();
-	const searches = Object.fromEntries(
-		results.map((x) => [
-			x._id.toString(),
-			[
-				...x.messages,
-				{ type: "sources", sources: x.contextSources ?? [] } satisfies WebSearchMessageSources,
-				{ type: "result", id: x._id.toString() } satisfies WebSearchMessageResult,
-			],
-		])
-	);
 	return {
 		messages: conversation.messages,
 		title: conversation.title,
 		model: conversation.model,
-		searches,
 	};
 };

 import type { PageServerLoad } from "./$types";
 import { collections } from "$lib/server/database";
 import { error } from "@sveltejs/kit";
 export const load: PageServerLoad = async ({ params }) => {
 	const conversation = await collections.sharedConversations.findOne({
 		throw error(404, "Conversation not found");
 	}
 	return {
 		messages: conversation.messages,
 		title: conversation.title,
 		model: conversation.model,
 	};
 };

src/routes/r/[id]/+page.svelte CHANGED Viewed

@@ -60,7 +60,6 @@
 	{loading}
 	shared={true}
 	messages={data.messages}
-	searches={data.searches}
 	on:message={(ev) =>
 		createConversation()
 			.then((convId) => {

 	{loading}
 	shared={true}
 	messages={data.messages}
 	on:message={(ev) =>
 		createConversation()
 			.then((convId) => {

src/routes/search/[id]/+server.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import { collections } from "$lib/server/database";
-import { hashConv } from "$lib/utils/hashConv.js";
 import { error } from "@sveltejs/kit";
 import { ObjectId } from "mongodb";

 import { collections } from "$lib/server/database";
+import { hashConv } from "$lib/utils/hashConv";
 import { error } from "@sveltejs/kit";
 import { ObjectId } from "mongodb";