import subprocess import os import urllib.request import gradio as gr def clone_power_infer(): repo_url = "https://github.com/SJTU-IPADS/PowerInfer.git" subprocess.run(["git", "clone", repo_url]) def install_requirements(): subprocess.run(["pip", "install", "-r", "requirements.txt"]) def cmake_builds(): subprocess.run(["cmake", "-S", ".", "-B", "build"]) # Run cmake --build build --config Release subprocess.run(["cmake", "--build", "build", "--config", "Release"]) clone_power_infer() os.chdir("PowerInfer") install_requirements() cmake_builds() os.mkdir("ReluLLaMA-7B-PowerInfer-GGUF") # URL to download the file from url = "https://huggingface.co/PowerInfer/ReluLLaMA-7B-PowerInfer-GGUF/resolve/main/llama-7b-relu.powerinfer.gguf" # Destination path to save the downloaded file destination_path = "ReluLLaMA-7B-PowerInfer-GGUF/llama-7b-relu.powerinfer.gguf" # Download the file urllib.request.urlretrieve(url, destination_path) def chat(question): command = [ "./build/bin/main", "-m", "./ReluLLaMA-7B-PowerInfer-GGUF/llama-7b-relu.powerinfer.gguf", "-n", "128", "-t", "8", "-p", question ] # Run the command return subprocess.run(command, capture_output=True, text=True).stdout # if output.returncode == 0: # return output.stdout # else: # return "Error" + output.stderrt screen = gr.Interface( fn = chat, inputs = gr.Textbox(lines = 10, placeholder = "Enter your question here πŸ‘‰"), outputs = gr.Textbox(lines = 10, placeholder = "Your answer will be here soon πŸš€"), title="Inference with Powerinfer πŸ‘©πŸ»β€πŸ’»πŸ““βœπŸ»πŸ’‘", description="This app aims to facilitate the inference of LLMs using PowerinferπŸ’‘", theme="soft", # examples=["Hello", "what is the speed of human nerve impulses?"], ) screen.launch()