chat-with-saiga-7b

Sleeping

App Files Files Community

Federico Galatolo commited on Oct 27, 2023

Commit

34e2eaa

•

1 Parent(s): 7b2cfb0

first commit

Browse files

Files changed (2) hide show

app.py +67 -0
requirements.txt +2 -0

app.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import os
+import gradio as gr
+import copy
+import time
+import llama_cpp
+from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
+llm = Llama(
+    model_path=hf_hub_download(
+        repo_id="galatolo/cerbero-7b-gguf",
+        filename="ggml-model-Q8_0.gguf",
+    ),
+    n_ctx=4086,
+)
+history = []
+def generate_text(message, history):
+    temp = ""
+    input_prompt = "Conversazione tra umano ed un assistente AI di nome cerbero-7b\n"
+    for interaction in history:
+        input_prompt += "[|Umano|] " + interaction[0] + "\n"
+        input_prompt += "[|AI|]" + interaction[1]
+    input_prompt += "[|Umano|] " + message + "\n[|AI|]"
+    print(input_prompt)
+    output = llm(
+        input_prompt,
+        temperature=0.15,
+        top_p=0.1,
+        top_k=40,
+        repeat_penalty=1.1,
+        max_tokens=1024,
+        stop=[
+            "[|Umano|]",
+            "[|Human|]",
+            "[|AI|]",
+        ],
+        stream=True,
+    )
+    for out in output:
+        stream = copy.deepcopy(out)
+        temp += stream["choices"][0]["text"]
+        yield temp
+    history = ["init", input_prompt]
+demo = gr.ChatInterface(
+    generate_text,
+    title="cerbero-7b running on CPU (quantized)",
+    description="This is a quantized version of cerbero-7b running on CPU. It is less powerful than the original version, but it is much faster and it can even run on a Raspberry Pi 4.",
+    examples=[
+        "Dammi 3 idee di ricette che posso fare con i pistacchi",
+        "Prepara un piano di esercizi da poter fare a casa",
+        "Scrivi una poesia sulla nuova AI chiamata cerbero-7b"
+    ],
+    cache_examples=False,
+    retry_btn=None,
+    undo_btn="Delete Previous",
+    clear_btn="Clear",
+)
+demo.queue(concurrency_count=1, max_size=5)
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ llama-cpp-python
2	+ huggingface-hub