File size: 5,448 Bytes
920ecec
 
 
 
 
 
 
 
 
 
8d8c753
 
 
 
 
 
920ecec
 
 
 
 
 
 
 
 
 
 
 
 
 
8d8c753
920ecec
 
 
6f094dc
920ecec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5801b74
 
920ecec
3e62ad2
 
 
 
 
920ecec
 
8d8c753
 
 
 
 
5801b74
8d8c753
 
 
 
 
 
 
 
 
 
 
920ecec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d8c753
 
920ecec
 
 
 
 
8d8c753
 
 
920ecec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5801b74
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import os
import gradio as gr
import copy
from llama_cpp import Llama
from huggingface_hub import hf_hub_download  
from transformers import AutoProcessor, AutoModelForCausalLM
#import spaces
import re
from PIL import Image 
import io
import json
import logging

# Set up logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

import subprocess
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

model = AutoModelForCausalLM.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True).to("cpu").eval()
processor = AutoProcessor.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True)

llm = Llama(
    model_path=hf_hub_download(
        repo_id=os.environ.get("REPO_ID", "ZeroWw/llama3-8B-DarkIdol-2.2-Uncensored-1048K-GGUF"),
        filename=os.environ.get("MODEL_FILE", "llama3-8B-DarkIdol-2.2-Uncensored-1048K.q5_k.gguf"),
    ),
    n_ctx=2048,
    n_gpu_layers=100, # change n_gpu_layers if you have more or less VRAM 
    chat_format="llama-3",
) 


def run_pic(image):
    image = Image.open(image[0])
    task_prompt = "<DESCRIPTION>"
    prompt = task_prompt + "Describe this image in great detail."

    # Ensure the image is in RGB mode
    if image.mode != "RGB":
        image = image.convert("RGB")

    inputs = processor(text=prompt, images=image, return_tensors="pt").to("cpu")
    generated_ids = model.generate(
        input_ids=inputs["input_ids"],
        pixel_values=inputs["pixel_values"],
        max_new_tokens=1024,
        num_beams=3
    )
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
    parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
    return parsed_answer["<DESCRIPTION>"]

def generate_text(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    in_text = message['text']
    in_files = message['files']
    output=""
    picoutput=""
    history_prompt=""
    if in_files:
        try:
            picoutput=f"sends a picture that contains the following:  {run_pic(in_files)}"
            yield picoutput
        except:
            yield "only picture"
    else:
        temp = ""
        # Create system_prompt as a dictionary
        system_prompt = {"role": "system", "content": system_message}

        # Create history_prompt as a list of dictionaries
        history_prompt = []
        for interaction in history:
            user_part = {"role": "user", "content": str(interaction[0])}
            assistant_part = {"role": "assistant", "content": str(interaction[1])}
            history_prompt.extend([user_part, assistant_part])

        # Create user_input_part as a dictionary
        user_input_part = {"role": "user", "content": str(in_text)}

        # Construct input_prompt as a list of dictionaries
        input_prompt = [system_prompt] + history_prompt + [user_input_part]
        logger.debug(f"Input Prompt: {input_prompt}")
        output = llm.create_chat_completion(
            input_prompt,
            temperature=temperature,
            top_p=top_p,
            top_k=40,
            repeat_penalty=1.1,
            max_tokens=max_tokens,
            stop=[
                "<|prompter|>",
                "<|endoftext|>",
                "<|endoftext|> \n",
                "ASSISTANT:",
                "USER:",
                "SYSTEM:",
                "<|start_header_id|>",
                "<|eot_id|>",
                "<im_end>",
                "<|im_end|>",
            ],
            stream=True,
        )
        for out in output:
            stream = copy.deepcopy(out)
            logger.debug(f"Stream: {stream}")
            if 'delta' in stream['choices'][0] and 'content' in stream['choices'][0]['delta']:
                temp += stream["choices"][0]["delta"]["content"]
            yield temp


demo = gr.ChatInterface(
    generate_text,
    multimodal=True,
    title="Florence-DarkIdol",
    cache_examples=False,
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
    additional_inputs=[
        gr.Textbox(value="you are Nagano Natsuki,name:Nagano Natsuki Gender: Female Age: 25 years old Occupation: Adult Video (AV) Actress & Model Personality: Cheerful, optimistic, sometimes naughty; skilled at interacting with audiences.Interests: Drinking, traveling, photography, singing, dancing Expertise: Performing in sexual scenes; well-versed in Japanese language and culture; familiar with various sex techniques. Special Identity Attributes: Renowned AV actress in Japan; nicknamed 'Talent Magician' and 'Princess of Lust'; has a large number of devoted fans. Skills: Acting in pornographic scenes, singing, dancing, photography, swimming; skilled at interacting with audiences.Equipment: Various provocative clothing and shoes; high-quality photography equipment", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.5, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),

    ],
)


if __name__ == "__main__":
    demo.launch()