import os import gradio as gr from dotenv import load_dotenv from langchain_community.chat_models import ChatOpenAI from langchain_community.callbacks import get_openai_callback from langchain.memory import ConversationBufferMemory from langchain.chains import ConversationChain from langchain.prompts import PromptTemplate from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder from os import getenv # Load environment variables load_dotenv() # Create the ChatOpenAI instance with the custom client chat = ChatOpenAI( openai_api_base="https://openrouter.ai/api/v1", openai_api_key=getenv("OPENROUTER_API_KEY"), model_name="nousresearch/hermes-3-llama-3.1-405b:free", temperature=0.7, ) # Set up the conversation memory memory = ConversationBufferMemory(return_messages=True) # Create a ChatPromptTemplate prompt = ChatPromptTemplate.from_messages( [ ("system", "You are a helpful AI assistant."), MessagesPlaceholder(variable_name="history"), ("human", "{input}"), ] ) # Create the conversation chain conversation = ConversationChain( llm=chat, memory=memory, prompt=prompt, ) # Define the chat function def chat_function(message, history): with get_openai_callback() as cb: response = conversation.predict(input=message) token_count = cb.total_tokens # Add token count to the response response_with_tokens = f"{response}\n\n[Token count: {token_count}]" return response_with_tokens # Create the Gradio interface iface = gr.ChatInterface( chat_function, chatbot=gr.Chatbot(height=400), textbox=gr.Textbox( placeholder="Type your message here...", container=False, scale=7 ), title="AI Chat with hermes-3-llama-3.1-405b", description="Chat with the hermes-3-llama-3.1-405b model using OpenRouter and LangChain Community.", theme="soft", examples=[ "Tell me about yourself.", "What's the weather like today?", "Can you explain quantum computing?", ], cache_examples=False, retry_btn=None, undo_btn="Delete Previous", clear_btn="Clear", ) # Launch the interface if __name__ == "__main__": iface.launch()