Spaces:
Build error
Build error
import { authCondition } from "$lib/server/auth"; | |
import { collections } from "$lib/server/database"; | |
import { generateFromDefaultEndpoint } from "$lib/server/generateFromDefaultEndpoint.js"; | |
import { defaultModel } from "$lib/server/models"; | |
import { searchWeb } from "$lib/server/searchWeb.js"; | |
import type { Message } from "$lib/types/Message.js"; | |
import { error } from "@sveltejs/kit"; | |
import { ObjectId } from "mongodb"; | |
import { z } from "zod"; | |
import { JSDOM, VirtualConsole } from "jsdom"; | |
import type { WebSearch } from "$lib/types/WebSearch.js"; | |
function removeTags(node: Node) { | |
if (node.hasChildNodes()) { | |
node.childNodes.forEach((childNode) => { | |
if (node.nodeName === "SCRIPT" || node.nodeName === "STYLE") { | |
node.removeChild(childNode); | |
} else { | |
removeTags(childNode); | |
} | |
}); | |
} | |
} | |
function naiveInnerText(node: Node): string { | |
const Node = node; // We need Node(DOM's Node) for the constants, but Node doesn't exist in the nodejs global space, and any Node instance references the constants through the prototype chain | |
return [...node.childNodes] | |
.map((childNode) => { | |
switch (childNode.nodeType) { | |
case Node.TEXT_NODE: | |
return node.textContent; | |
case Node.ELEMENT_NODE: | |
return naiveInnerText(childNode); | |
default: | |
return ""; | |
} | |
}) | |
.join("\n"); | |
} | |
interface GenericObject { | |
[key: string]: GenericObject | unknown; | |
} | |
function removeLinks(obj: GenericObject) { | |
for (const prop in obj) { | |
if (prop.endsWith("link")) delete obj[prop]; | |
else if (typeof obj[prop] === "object") removeLinks(obj[prop] as GenericObject); | |
} | |
return obj; | |
} | |
export async function GET({ params, locals, url }) { | |
const model = defaultModel; | |
const convId = new ObjectId(params.id); | |
const searchId = new ObjectId(); | |
const conv = await collections.conversations.findOne({ | |
_id: convId, | |
...authCondition(locals), | |
}); | |
if (!conv) { | |
throw error(404, "Conversation not found"); | |
} | |
const prompt = z.string().trim().min(1).parse(url.searchParams.get("prompt")); | |
const messages = (() => { | |
return [...conv.messages, { content: prompt, from: "user", id: crypto.randomUUID() }]; | |
})() satisfies Message[]; | |
const stream = new ReadableStream({ | |
async start(controller) { | |
const webSearch: WebSearch = { | |
_id: searchId, | |
convId: convId, | |
prompt: prompt, | |
searchQuery: "", | |
knowledgeGraph: "", | |
results: [], | |
summary: "", | |
messages: [], | |
createdAt: new Date(), | |
updatedAt: new Date(), | |
}; | |
try { | |
webSearch.messages.push({ | |
type: "update", | |
message: "Generating search query", | |
}); | |
controller.enqueue(JSON.stringify({ messages: webSearch.messages })); | |
const promptSearchQuery = | |
model.userMessageToken + | |
"The following messages were written by a user, trying to answer a question." + | |
model.messageEndToken + | |
messages | |
.filter((message) => message.from === "user") | |
.map((message) => model.userMessageToken + message.content + model.messageEndToken) + | |
model.userMessageToken + | |
"What plain-text english sentence would you input into Google to answer the last question? Answer with a short (10 words max) simple sentence." + | |
model.messageEndToken + | |
model.assistantMessageToken + | |
"Query: "; | |
webSearch.searchQuery = await generateFromDefaultEndpoint(promptSearchQuery).then( | |
(query) => { | |
const arr = query.split(/\r?\n/); | |
return arr[0].length > 0 ? arr[0] : arr[1]; | |
} | |
); | |
// the model has a tendency to continue answering even when we tell it not to, so the split makes | |
// sure we only get the first line of the response | |
webSearch.messages.push({ | |
type: "update", | |
message: "Searching Google", | |
args: [webSearch.searchQuery], | |
}); | |
controller.enqueue(JSON.stringify({ messages: webSearch.messages })); | |
const results = await searchWeb(webSearch.searchQuery); | |
let text = ""; | |
webSearch.results = | |
(results.organic_results && | |
results.organic_results.map((el: { link: string }) => el.link)) ?? | |
[]; | |
if (results.knowledge_graph) { | |
// if google returns a knowledge graph, we use it | |
webSearch.knowledgeGraph = JSON.stringify(removeLinks(results.knowledge_graph)); | |
text = webSearch.knowledgeGraph; | |
webSearch.messages.push({ | |
type: "update", | |
message: "Found a Google knowledge page", | |
}); | |
controller.enqueue(JSON.stringify({ messages: webSearch.messages })); | |
} else if (webSearch.results.length > 0) { | |
// otherwise we use the top result from search | |
const topUrl = webSearch.results[0]; | |
webSearch.messages.push({ | |
type: "update", | |
message: "Browsing first result", | |
args: [JSON.stringify(topUrl)], | |
}); | |
controller.enqueue(JSON.stringify({ messages: webSearch.messages })); | |
// fetch the webpage | |
//10 second timeout: | |
const abortController = new AbortController(); | |
setTimeout(() => abortController.abort(), 10000); | |
const htmlString = await fetch(topUrl, { signal: abortController.signal }) | |
.then((response) => response.text()) | |
.catch((err) => console.log(err)); | |
const virtualConsole = new VirtualConsole(); | |
virtualConsole.on("error", () => { | |
// No-op to skip console errors. | |
}); | |
// put the html string into a DOM | |
const dom = new JSDOM(htmlString ?? "", { | |
virtualConsole, | |
}); | |
const body = dom.window.document.querySelector("body"); | |
if (!body) throw new Error("body of the webpage is null"); | |
removeTags(body); | |
// recursively extract text content from the body and then remove newlines and multiple spaces | |
text = (naiveInnerText(body) ?? "").replace(/ {2}|\r\n|\n|\r/gm, ""); | |
if (!text) throw new Error("text of the webpage is null"); | |
} else { | |
throw new Error("No results found for this search query"); | |
} | |
webSearch.messages.push({ | |
type: "update", | |
message: "Creating summary", | |
}); | |
controller.enqueue(JSON.stringify({ messages: webSearch.messages })); | |
const summaryPrompt = | |
model.userMessageToken + | |
text | |
.split(" ") | |
.slice(0, model.parameters?.truncate ?? 0) | |
.join(" ") + | |
model.messageEndToken + | |
model.userMessageToken + | |
`The text above should be summarized to best answer the query: ${webSearch.searchQuery}.` + | |
model.messageEndToken + | |
model.assistantMessageToken + | |
"Summary: "; | |
webSearch.summary = await generateFromDefaultEndpoint(summaryPrompt).then((txt: string) => | |
txt.trim() | |
); | |
webSearch.messages.push({ | |
type: "update", | |
message: "Injecting summary", | |
args: [JSON.stringify(webSearch.summary)], | |
}); | |
controller.enqueue(JSON.stringify({ messages: webSearch.messages })); | |
} catch (searchError) { | |
if (searchError instanceof Error) { | |
webSearch.messages.push({ | |
type: "error", | |
message: "An error occurred with the web search", | |
args: [JSON.stringify(searchError.message)], | |
}); | |
} | |
} | |
const res = await collections.webSearches.insertOne(webSearch); | |
webSearch.messages.push({ | |
type: "result", | |
id: res.insertedId.toString(), | |
}); | |
controller.enqueue(JSON.stringify({ messages: webSearch.messages })); | |
}, | |
}); | |
return new Response(stream, { headers: { "Content-Type": "application/json" } }); | |
} | |