Chat-with-Hermes-3-405B-Instruct

Running

File size: 2,215 Bytes

import os
import gradio as gr
from dotenv import load_dotenv
from langchain_community.chat_models import ChatOpenAI
from langchain_community.callbacks import get_openai_callback
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
from langchain.prompts import PromptTemplate
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder


from os import getenv

# Load environment variables
load_dotenv()

# Create the ChatOpenAI instance with the custom client
chat = ChatOpenAI(
    openai_api_base="https://openrouter.ai/api/v1",
    openai_api_key=getenv("OPENROUTER_API_KEY"),
    model_name="nousresearch/hermes-3-llama-3.1-405b:free",
    temperature=0.7,
)

# Set up the conversation memory
memory = ConversationBufferMemory(return_messages=True)

# Create a ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful AI assistant."),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{input}"),
    ]
)

# Create the conversation chain
conversation = ConversationChain(
    llm=chat,
    memory=memory,
    prompt=prompt,
)


# Define the chat function
def chat_function(message, history):
    with get_openai_callback() as cb:
        response = conversation.predict(input=message)
        token_count = cb.total_tokens

    # Add token count to the response
    response_with_tokens = f"{response}\n\n[Token count: {token_count}]"
    return response_with_tokens


# Create the Gradio interface
iface = gr.ChatInterface(
    chat_function,
    chatbot=gr.Chatbot(height=400),
    textbox=gr.Textbox(
        placeholder="Type your message here...", container=False, scale=7
    ),
    title="AI Chat with hermes-3-llama-3.1-405b",
    description="Chat with the hermes-3-llama-3.1-405b model using OpenRouter and LangChain Community.",
    theme="soft",
    examples=[
        "Tell me about yourself.",
        "What's the weather like today?",
        "Can you explain quantum computing?",
    ],
    cache_examples=False,
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
)

# Launch the interface
if __name__ == "__main__":
    iface.launch()