import os from langchain_voyageai import VoyageAIEmbeddings embed_model = VoyageAIEmbeddings( voyage_api_key=os.getenv('VOYAGE_API_KEY'), model="voyage-large-2-instruct", # input_type="document", ) from pinecone import Pinecone, ServerlessSpec pc = Pinecone(api_key=os.getenv('PINECONE_API_KEY2')) pc_256 = pc.Index('subset1-voyage-large-2-instruct-cs256') from llama_index.vector_stores.pinecone import PineconeVectorStore from llama_index.core import VectorStoreIndex vector_store = PineconeVectorStore(pinecone_index=pc_256) vindex = VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model) from llama_index.llms.fireworks import Fireworks fireworks_model = 'accounts/fireworks/models/llama-v3-70b-instruct' llm = Fireworks( api_key=os.getenv('FIREWORKS_API_KEY'), model=fireworks_model, max_tokens=512, temperature=0.1, ) import gradio as gr from llama_index.core.memory import ChatMemoryBuffer def get_chat_engine(): memory = ChatMemoryBuffer.from_defaults(token_limit=5000) return vindex.as_chat_engine( chat_mode="context", llm=llm, memory=memory, system_prompt="You are a chatbot, able to have normal interactions, as well as talk about news events.", ) with gr.Blocks() as demo: chatbot = gr.Chatbot(height="80vh") msg = gr.Textbox() clear = gr.Button("Clear") chat_engine_var = gr.State(get_chat_engine) def user(user_message, history): return "", history + [[user_message, None]] def bot(history, chat_engine): response = chat_engine.stream_chat(history[-1][0]) history[-1][1] = "" for token in response.response_gen: history[-1][1] += token yield history msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, [chatbot, chat_engine_var], chatbot) clear.click(lambda x: x.reset(), chat_engine_var, chatbot, queue=False) if __name__ == "__main__": demo.queue() demo.launch()