|
import os |
|
import gradio as gr |
|
from dotenv import load_dotenv |
|
from langchain_community.chat_models import ChatOpenAI |
|
from langchain_community.callbacks import get_openai_callback |
|
from langchain.memory import ConversationBufferMemory |
|
from langchain.chains import ConversationChain |
|
from langchain.prompts import PromptTemplate |
|
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder |
|
|
|
|
|
from os import getenv |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
chat = ChatOpenAI( |
|
openai_api_base="https://openrouter.ai/api/v1", |
|
openai_api_key=getenv("OPENROUTER_API_KEY"), |
|
model_name="nousresearch/hermes-3-llama-3.1-405b:free", |
|
temperature=0.7, |
|
) |
|
|
|
|
|
memory = ConversationBufferMemory(return_messages=True) |
|
|
|
|
|
prompt = ChatPromptTemplate.from_messages( |
|
[ |
|
("system", "You are a helpful AI assistant."), |
|
MessagesPlaceholder(variable_name="history"), |
|
("human", "{input}"), |
|
] |
|
) |
|
|
|
|
|
conversation = ConversationChain( |
|
llm=chat, |
|
memory=memory, |
|
prompt=prompt, |
|
) |
|
|
|
|
|
|
|
def chat_function(message, history): |
|
with get_openai_callback() as cb: |
|
response = conversation.predict(input=message) |
|
token_count = cb.total_tokens |
|
|
|
|
|
response_with_tokens = f"{response}\n\n[Token count: {token_count}]" |
|
return response_with_tokens |
|
|
|
|
|
|
|
iface = gr.ChatInterface( |
|
chat_function, |
|
chatbot=gr.Chatbot(height=400), |
|
textbox=gr.Textbox( |
|
placeholder="Type your message here...", container=False, scale=7 |
|
), |
|
title="AI Chat with hermes-3-llama-3.1-405b", |
|
description="Chat with the hermes-3-llama-3.1-405b model using OpenRouter and LangChain Community.", |
|
theme="soft", |
|
examples=[ |
|
"Tell me about yourself.", |
|
"What's the weather like today?", |
|
"Can you explain quantum computing?", |
|
], |
|
cache_examples=False, |
|
retry_btn=None, |
|
undo_btn="Delete Previous", |
|
clear_btn="Clear", |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
iface.launch() |
|
|