Spaces:
Running
Running
File size: 3,255 Bytes
723cc89 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import OpenAI from "openai";
import type { Stream } from "openai/streaming.mjs";
import type { Connect, PreviewServer, ViteDevServer } from "vite";
import { verifyTokenAndRateLimit } from "./verifyTokenAndRateLimit";
export function internalApiEndpointServerHook<
T extends ViteDevServer | PreviewServer,
>(server: T) {
server.middlewares.use(async (request, response, next) => {
if (!request.url.startsWith("/inference")) return next();
const authHeader = request.headers.authorization;
const tokenPrefix = "Bearer ";
const token = authHeader?.startsWith(tokenPrefix)
? authHeader.slice(tokenPrefix.length)
: null;
const authResult = await verifyTokenAndRateLimit(token);
if (!authResult.isAuthorized) {
response.statusCode = authResult.statusCode;
response.end(authResult.error);
return;
}
const openai = new OpenAI({
baseURL: process.env.INTERNAL_OPENAI_COMPATIBLE_API_BASE_URL,
apiKey: process.env.INTERNAL_OPENAI_COMPATIBLE_API_KEY,
});
try {
const requestBody = await getRequestBody(request);
const completion = await openai.chat.completions.create({
...requestBody,
model: process.env.INTERNAL_OPENAI_COMPATIBLE_API_MODEL,
stream: true,
});
response.setHeader("Content-Type", "text/event-stream");
response.setHeader("Cache-Control", "no-cache");
response.setHeader("Connection", "keep-alive");
const stream = OpenAIStream(completion);
stream.pipeTo(
new WritableStream({
write(chunk) {
response.write(chunk);
},
close() {
response.end();
},
}),
);
} catch (error) {
console.error("Error in internal API endpoint:", error);
response.statusCode = 500;
response.end(JSON.stringify({ error: "Internal server error" }));
}
});
}
async function getRequestBody(
request: Connect.IncomingMessage,
): Promise<OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming> {
return new Promise((resolve, reject) => {
let body = "";
request.on("data", (chunk: string) => {
body += chunk;
});
request.on("end", () => {
try {
resolve(JSON.parse(body));
} catch (error) {
reject(error);
}
});
});
}
function OpenAIStream(
completion: Stream<OpenAI.Chat.Completions.ChatCompletionChunk>,
) {
return new ReadableStream({
async start(controller) {
for await (const chunk of completion) {
const content = chunk.choices[0]?.delta?.content || "";
if (content) {
const payload = {
id: chunk.id,
object: "chat.completion.chunk",
created: chunk.created,
model: chunk.model,
choices: [
{
index: 0,
delta: { content },
finish_reason: chunk.choices[0].finish_reason,
},
],
};
controller.enqueue(
new TextEncoder().encode(`data: ${JSON.stringify(payload)}\n\n`),
);
}
}
controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"));
controller.close();
},
});
}
|