from fastapi import FastAPI
from pydantic import BaseModel
from huggingface_hub import InferenceClient
import uvicorn


app = FastAPI()

API_URL = "https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1"


class Item(BaseModel):
    prompt: str
    history: list
    system_prompt: str
    temperature: float = 0.0
    max_new_tokens: int = 1048
    top_p: float = 0.15
    repetition_penalty: float = 1.0
    instructions: str = ""
    api: str = ""

def format_prompt(message, custom_instructions=None):
    prompt = ""
    if custom_instructions:
        prompt += f"[INST] {custom_instructions} [/INST]"
    prompt += f"[INST] {message} [/INST]"
    return prompt

def generate(item: Item):
      try:
            temperature = float(item.temperature)
            if temperature < 1e-2:
                  temperature = 1e-2
            top_p = float(item.top_p)

            generate_kwargs = dict(
            temperature=temperature,
            max_new_tokens=item.max_new_tokens,
            top_p=top_p,
            repetition_penalty=item.repetition_penalty,
            do_sample=True,
            seed=42,
          )
            print(item)
            custom_instructions=item.instructions
            formatted_prompt = format_prompt(item.prompt, custom_instructions)
            headers = {"Authorization": f"Bearer {item.api}"}
            client = InferenceClient(API_URL, headers=headers)
            response = client.text_generation(formatted_prompt, **generate_kwargs)
            return {"response": response}
      except Exception as e:
            return {"error": str(e)}

@app.post("/generate/")
async def generate_text(item: Item):
    return {"response": generate(item)}