File size: 1,993 Bytes
9bbbb8f
 
6faab44
 
 
 
35e6b48
 
 
6faab44
 
35e6b48
 
 
 
6faab44
 
 
 
 
 
 
 
 
35e6b48
 
 
 
497bfcd
6faab44
7cf1a8c
6faab44
 
 
 
 
 
 
 
7cf1a8c
 
6faab44
7cf1a8c
 
 
1c72597
7cf1a8c
 
 
 
1c72597
7cf1a8c
 
 
 
 
 
1c72597
c914fb5
497bfcd
1dcc637
7cf1a8c
6faab44
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import os

from langchain_voyageai import VoyageAIEmbeddings
embed_model = VoyageAIEmbeddings(
    voyage_api_key=os.getenv('VOYAGE_API_KEY'), model="voyage-large-2-instruct", # input_type="document",
)

from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=os.getenv('PINECONE_API_KEY2'))
pc_256 = pc.Index('subset1-voyage-large-2-instruct-cs256')

from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.core import VectorStoreIndex

vector_store = PineconeVectorStore(pinecone_index=pc_256)
vindex = VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)

from llama_index.llms.fireworks import Fireworks

fireworks_model = 'accounts/fireworks/models/llama-v3-70b-instruct'
llm = Fireworks(
    api_key=os.getenv('FIREWORKS_API_KEY'),
    model=fireworks_model,
    max_tokens=512,
    temperature=0.1,
)

import gradio as gr
from llama_index.core.memory import ChatMemoryBuffer

def get_chat_engine():
    memory = ChatMemoryBuffer.from_defaults(token_limit=5000)
    return vindex.as_chat_engine(
        chat_mode="context",
        llm=llm,
        memory=memory,
        system_prompt="You are a chatbot, able to have normal interactions, as well as talk about news events.",
    )

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(height="80vh")
    msg = gr.Textbox()
    clear = gr.Button("Clear")

    chat_engine_var = gr.State(get_chat_engine)

    def user(user_message, history):
        return "", history + [[user_message, None]]

    def bot(history, chat_engine):
        response = chat_engine.stream_chat(history[-1][0])
        history[-1][1] = ""
        for token in response.response_gen:
            history[-1][1] += token
            yield history

    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, [chatbot, chat_engine_var], chatbot)
    clear.click(lambda x: x.reset(), chat_engine_var, chatbot, queue=False)

if __name__ == "__main__":
    demo.queue()
    demo.launch()