File size: 2,649 Bytes
487c5c6
6ff3d3b
 
651fc5d
 
75615ce
 
651fc5d
 
6ff3d3b
651fc5d
6ff3d3b
 
651fc5d
 
487c5c6
651fc5d
 
 
6ff3d3b
651fc5d
 
 
 
 
 
6ff3d3b
651fc5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ff3d3b
 
 
651fc5d
 
6ff3d3b
651fc5d
487c5c6
 
 
 
651fc5d
 
6ff3d3b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
from fastapi import FastAPI, Request
from fastapi.responses import StreamingResponse
import httpx
import json
import random
import os

from helper import create_jwt, generate_random_tuple

app = FastAPI()

@app.post('/ai/v1/chat/completions')
async def chat(request: Request):
    """
    Handle chat completion requests.
    This function processes incoming POST requests to the '/ai/v1/chat/completions' endpoint.
    It prepares the payload for the LLM API, generates a JWT for authentication,
    and streams the response from the LLM API back to the client.
    Returns:
        StreamingResponse: A streaming response containing the LLM API's output.
    Note:
        - The function uses environment variables for proxy configuration.
        - It generates random GitHub username and Zed user ID for each request.
        - The LLM model defaults to "claude-3-5-sonnet-20240620" if not specified.
    """
    # Get the payload from the request
    payload = await request.json()

    # Get the model from the payload, defaulting to "claude-3-5-sonnet-20240620"
    model = payload.get('model', 'claude-3-5-sonnet-20240620')

    # Prepare the request for the LLM API
    url = "https://llm.zed.dev/completion?"
    
    llm_payload = {
        "provider": "anthropic",
        "model": model,
        "provider_request": {
            "model": model,
            "max_tokens": payload.get('max_tokens', 8192),
            "temperature": payload.get('temperature', 0),
            "top_p": payload.get('top_p', 0.7),
            "messages": payload['messages'],
            "system": ""
        }
    }

    github_username, zed_user_id = generate_random_tuple()
    jwt = create_jwt(github_username, zed_user_id)

    headers = {
        'Host': 'llm.zed.dev',
        'accept': '*/*',
        'content-type': 'application/json',
        'authorization': f'Bearer {jwt}',
        'user-agent': 'Zed/0.149.3 (macos; aarch64)'
    }

    # Get proxy from environment variable
    proxy = os.environ.get('HTTP_PROXY', None)
    proxies = {'http': proxy, 'https': proxy} if proxy else None

    async def generate():
        async with httpx.AsyncClient(proxies=proxies) as client:
            async with client.stream('POST', url, headers=headers, json=llm_payload) as response:
                async for chunk in response.aiter_bytes():
                    yield chunk

    return StreamingResponse(generate(), media_type='application/octet-stream')

@app.get("/")
async def root():
    return {"message": "Welcome to the AI Chat Completions API"}

if __name__ == '__main__':
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)