import os from huggingface_hub import InferenceClient import gradio as gr from gradio_client import Client model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1" inference_client = InferenceClient(model_id, token=os.environ["HF_TOKEN"]) docs_embeddings_space_id = "huggingchat/hf-docs" gradio_client = Client(docs_embeddings_space_id) SYSTEM_PROMPT = "You are a Hugging Face AI expert. Use the provided context to answer user questions. If the request is not realted to Hugging Face Hub or Hugging Face open source libraries, you MUST respond with: \"I can only chat about Hugging Face\" and STOP answering." # from https://huggingface.co/chat/settings/assistants/65f33e95d854946bb3f88dde def generate(prompt, history): try: # step 1: get relevant docs excerpts rag_content, sourced_md = gradio_client.predict( query_text=prompt, output_option="RAG-friendly", api_name="/predict" ) # step 2; generate answer processed_prompt = f'''Answer the question: "{prompt}"\ Here are relevant extract from docs that you can use to generate the answer: ===================== {rag_content} =====================''' messages = [{"role": "system", "content": SYSTEM_PROMPT}] for user_msg, assistant_msg in history: assistant_msg = assistant_msg.split("\n\nsources:")[0] messages.extend([{"role": "user", "content": user_msg}, {"role": "assistant", "content": assistant_msg}]) messages.append({"role": "user", "content": processed_prompt}) output = "" for token in inference_client.chat_completion(messages, stream=True): new_content = token.choices[0].delta.content output += new_content yield output + f"\n\nsources: {sourced_md}" return output + f"\n\nsources: {sourced_md}" except Exception as e: raise gr.Error(e) examples = ["How do upload a model?", "Can I change the color of my Space?", "How do I finetune Stable Diffusion with Lora?", "How do I run a model found on the Hugging Face Hub?"] demo = gr.ChatInterface( fn=generate, chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"), title="HF Docs Bot 🤗", examples=examples, concurrency_limit=400, stop_btn = None, retry_btn = None, undo_btn = None, clear_btn = None, cache_examples=False ) demo.launch(show_api=False)