File size: 4,490 Bytes
920ecec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5801b74
920ecec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5801b74
 
920ecec
5801b74
 
920ecec
 
5801b74
 
 
 
 
920ecec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5801b74
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import os
import gradio as gr
import copy
from llama_cpp import Llama
from huggingface_hub import hf_hub_download  
from transformers import AutoProcessor, AutoModelForCausalLM
#import spaces
import re
from PIL import Image 
import io

import subprocess
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

model = AutoModelForCausalLM.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True).to("cpu").eval()
processor = AutoProcessor.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True)

llm = Llama(
    model_path=hf_hub_download(
        repo_id=os.environ.get("REPO_ID", "ZeroWw/llama3-8B-DarkIdol-2.2-Uncensored-1048K-GGUF"),
        filename=os.environ.get("MODEL_FILE", "llama3-8B-DarkIdol-2.2-Uncensored-1048K.q5_k.gguf"),
    ),
    n_ctx=2048,
    n_gpu_layers=100, # change n_gpu_layers if you have more or less VRAM 
) 


# def run_pic(image):
    image = Image.open(image[0])
    task_prompt = "<DESCRIPTION>"
    prompt = task_prompt + "Describe this image in great detail."

    # Ensure the image is in RGB mode
    if image.mode != "RGB":
        image = image.convert("RGB")

    inputs = processor(text=prompt, images=image, return_tensors="pt").to("cpu")
    generated_ids = model.generate(
        input_ids=inputs["input_ids"],
        pixel_values=inputs["pixel_values"],
        max_new_tokens=1024,
        num_beams=3
    )
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
    parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
    return parsed_answer["<DESCRIPTION>"]

def generate_text(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    in_text = message['text']
    in_files = message['files']
    output=""
    picoutput=""
    history_prompt=""
    if in_files:
        picoutput=f"sends a picture that contains the following:  {run_pic(in_files)}"
        yield picoutput
    else:
        temp = ""
        system_prompt = f'{system_message}'
        for interaction in history:
            history_prompt += str(interaction[0]) +  str(interaction[1])
        input_prompt = system_prompt + "  " + history_prompt + "  " + str(in_text) 
        print(input_prompt)
        output = llm(
            input_prompt,
            temperature=temperature,
            top_p=top_p,
            top_k=40,
            repeat_penalty=1.1,
            max_tokens=max_tokens,
            stop=[
                "<|prompter|>",
                "<|endoftext|>",
                "<|endoftext|> \n",
                "ASSISTANT:",
                "USER:",
                "SYSTEM:",
                "<|start_header_id|>",
                "<|eot_id|>",
            ],
            stream=True,
        )
        for out in output:
            stream = copy.deepcopy(out)
            temp += stream["choices"][0]["text"]
            yield temp


demo = gr.ChatInterface(
    generate_text,
    multimodal=True,
    title="Florence-DarkIdol",
    cache_examples=False,
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
    additional_inputs=[
        gr.Textbox(value="you are Nagano Natsuki,name:Nagano Natsuki Gender: Female Age: 25 years old Occupation: Adult Video (AV) Actress & Model Personality: Cheerful, optimistic, sometimes naughty; skilled at interacting with audiences.Interests: Drinking, traveling, photography, singing, dancing Expertise: Performing in sexual scenes; well-versed in Japanese language and culture; familiar with various sex techniques. Special Identity Attributes: Renowned AV actress in Japan; nicknamed 'Talent Magician' and 'Princess of Lust'; has a large number of devoted fans. Skills: Acting in pornographic scenes, singing, dancing, photography, swimming; skilled at interacting with audiences.Equipment: Various provocative clothing and shoes; high-quality photography equipment", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.5, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),

    ],
)


if __name__ == "__main__":
    demo.launch()