from fastapi import FastAPI from pydantic import BaseModel from huggingface_hub import InferenceClient import uvicorn app = FastAPI() API_URL = "https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1" class Item(BaseModel): prompt: str history: list system_prompt: str temperature: float = 0.0 max_new_tokens: int = 1048 top_p: float = 0.15 repetition_penalty: float = 1.0 instructions: str = "" api: str = "" def format_prompt(message, custom_instructions=None): prompt = "" if custom_instructions: prompt += f"[INST] {custom_instructions} [/INST]" prompt += f"[INST] {message} [/INST]" return prompt def generate(item: Item): try: temperature = float(item.temperature) if temperature < 1e-2: temperature = 1e-2 top_p = float(item.top_p) generate_kwargs = dict( temperature=temperature, max_new_tokens=item.max_new_tokens, top_p=top_p, repetition_penalty=item.repetition_penalty, do_sample=True, seed=42, ) print(item) custom_instructions=item.instructions formatted_prompt = format_prompt(item.prompt, custom_instructions) headers = {"Authorization": f"Bearer {item.api}"} client = InferenceClient(API_URL, headers=headers) response = client.text_generation(formatted_prompt, **generate_kwargs) return {"response": response} except Exception as e: return {"error": str(e)} @app.post("/generate/") async def generate_text(item: Item): return {"response": generate(item)}