Spaces:

rajivmehtapy
/

hf-smug-demo

Paused

App Files Files Community

hf-smug-demo / app.py

rajivmehtapy

Add gpu support without parameter.

a0b7dce 5 months ago

raw

history blame contribute delete

1.59 kB

	import gradio as gr
	import os
	from loguru import logger
	from langchain_community.llms import LlamaCpp
	from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
	from langchain_core.prompts import PromptTemplate
	import spaces
	import json
	# Create a directory for logs if it doesn't exist
	if not os.path.exists('logs'):
	os.makedirs('logs')

	# Define the log file path
	log_file = 'logs/file_{time}.log'

	# Configure the logger to write to the log file
	logger.add(log_file, rotation="500 MB")

	template = """Question: {question}

	Answer: Let's work this out in a step by step way to be sure we have the right answer."""

	prompt = PromptTemplate.from_template(template)
	# Callbacks support token-wise streaming
	callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

	# n_gpu_layers = -1 # The number of layers to put on the GPU. The rest will be on the CPU. If you don't know how many layers there are, you can use -1 to move all to GPU.
	# n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.

	# Make sure the model path is correct for your system!
	llm = LlamaCpp(
	model_path="/home/user/app/models/Phi-3-mini-4k-instruct-q4.gguf",
	callback_manager=callback_manager,
	verbose=True, # Verbose is required to pass to the callback manager
	)
	llm_chain = prompt \| llm

	@spaces.GPU()
	def greet(name):
	question = name
	response = llm_chain.invoke({"question": question})
	logger.info(f"Response --> {response}")
	return

	demo = gr.Interface(fn=greet, inputs="text", outputs="text")
	demo.launch()