Spaces:
Paused
Paused
import gradio as gr | |
import os | |
from loguru import logger | |
from langchain_community.llms import LlamaCpp | |
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler | |
from langchain_core.prompts import PromptTemplate | |
import spaces | |
import json | |
# Create a directory for logs if it doesn't exist | |
if not os.path.exists('logs'): | |
os.makedirs('logs') | |
# Define the log file path | |
log_file = 'logs/file_{time}.log' | |
# Configure the logger to write to the log file | |
logger.add(log_file, rotation="500 MB") | |
template = """Question: {question} | |
Answer: Let's work this out in a step by step way to be sure we have the right answer.""" | |
prompt = PromptTemplate.from_template(template) | |
# Callbacks support token-wise streaming | |
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) | |
# n_gpu_layers = -1 # The number of layers to put on the GPU. The rest will be on the CPU. If you don't know how many layers there are, you can use -1 to move all to GPU. | |
# n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU. | |
# Make sure the model path is correct for your system! | |
llm = LlamaCpp( | |
model_path="/home/user/app/models/Phi-3-mini-4k-instruct-q4.gguf", | |
callback_manager=callback_manager, | |
verbose=True, # Verbose is required to pass to the callback manager | |
) | |
llm_chain = prompt | llm | |
def greet(name): | |
question = name | |
response = llm_chain.invoke({"question": question}) | |
logger.info(f"Response --> {response}") | |
return | |
demo = gr.Interface(fn=greet, inputs="text", outputs="text") | |
demo.launch() | |