File size: 1,684 Bytes
c3593fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import subprocess
import os
import urllib.request
import gradio as gr


def clone_power_infer():
    repo_url = "https://github.com/SJTU-IPADS/PowerInfer.git"
    subprocess.run(["git", "clone", repo_url])

def install_requirements():
    subprocess.run(["pip", "install", "-r", "requirements.txt"])

def cmake_builds():
    subprocess.run(["cmake", "-S", ".", "-B", "build"])

    # Run cmake --build build --config Release
    subprocess.run(["cmake", "--build", "build", "--config", "Release"])

clone_power_infer()
os.chdir("PowerInfer")
install_requirements()
cmake_builds()

os.mkdir("ReluLLaMA-7B-PowerInfer-GGUF")


# URL to download the file from
url = "https://huggingface.co/PowerInfer/ReluLLaMA-7B-PowerInfer-GGUF/resolve/main/llama-7b-relu.powerinfer.gguf"

# Destination path to save the downloaded file
destination_path = "ReluLLaMA-7B-PowerInfer-GGUF/"

# Download the file
urllib.request.urlretrieve(url, destination_path)

def chat(question):
    command = [
        "./build/bin/main",
        "-m", "./ReluLLaMA-7B-PowerInfer-GGUF/llama-7b-relu.powerinfer.gguf",
        "-n", "128",
        "-t", "8",
        "-p", question
    ]

    # Run the command
    subprocess.run(command)

screen = gr.Interface(
    fn = chat,
    inputs = gr.Textbox(lines = 10, placeholder = "Enter your question here πŸ‘‰"),
    outputs = gr.Textbox(lines = 10, placeholder = "Your answer will be here soon πŸš€"),
    title="Inference with Powerinfer πŸ‘©πŸ»β€πŸ’»πŸ““βœπŸ»πŸ’‘",
    description="This app aims to facilitate the inference of LLMs using PowerinferπŸ’‘",
    theme="soft",
    # examples=["Hello", "what is the speed of human nerve impulses?"],
)

screen.launch()