import os import gradio as gr import torch from diffusers import StableDiffusion3Pipeline, AutoencoderKL, KDPM2AncestralDiscreteScheduler from huggingface_hub import snapshot_download import spaces from PIL import Image import requests from translatepy import Translator os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" translator = Translator() HF_TOKEN = os.environ.get("HF_TOKEN", None) # Constants model = "stabilityai/stable-diffusion-3-medium" vae_model = "madebyollin/sdxl-vae-fp16-fix" model_path = snapshot_download( repo_id=model, revision="refs/pr/26", repo_type="model", ignore_patterns=["*.md", "*..gitattributes"], local_dir="model", token=HF_TOKEN, ) CSS = """ .gradio-container { max-width: 690px !important; } footer { visibility: hidden; } """ JS = """function () { gradioURL = window.location.href if (!gradioURL.endsWith('?__theme=dark')) { window.location.replace(gradioURL + '?__theme=dark'); } }""" # Load VAE component vae = AutoencoderKL.from_pretrained( vae_model, torch_dtype=torch.float16 ) # Ensure model and scheduler are initialized in GPU-enabled function if torch.cuda.is_available(): pipe = StableDiffusion3Pipeline.from_pretrained(model_path, vae=vae, torch_dtype=torch.float16).to("cuda") # Function @spaces.GPU() def generate_image( prompt, negative="low quality", width=1024, height=1024, scale=1.5, steps=30, clip=3): prompt = str(translator.translate(prompt, 'English')) print(f'prompt:{prompt}') image = pipe( prompt, negative_prompt=negative, width=width, height=height, guidance_scale=scale, num_inference_steps=steps, clip_skip=clip, ) return image.images[0] examples = [ "a cat eating a piece of cheese", "a ROBOT riding a BLUE horse on Mars, photorealistic", "Ironman VS Hulk, ultrarealistic", "a CUTE robot artist painting on an easel", "Astronaut in a jungle, cold color palette, oil pastel, detailed, 8k", "An alien holding sign board contain word 'Flash', futuristic, neonpunk", "Kids going to school, Anime style" ] # Gradio Interface with gr.Blocks(css=CSS, js=JS, theme="soft") as demo: gr.HTML("

SD3M🦄

") gr.HTML("

mobius text-to-image generation

Multi-Languages. Adding default prompts to enhance.

") with gr.Group(): with gr.Row(): prompt = gr.Textbox(label='Enter Your Prompt', value="best quality, HD, aesthetic", scale=6) submit = gr.Button(scale=1, variant='primary') img = gr.Image(label='SD3M Generated Image') with gr.Accordion("Advanced Options", open=False): with gr.Row(): negative = gr.Textbox(label="Negative prompt", value="low quality") with gr.Row(): width = gr.Slider( label="Width", minimum=512, maximum=1280, step=8, value=1024, ) height = gr.Slider( label="Height", minimum=512, maximum=1280, step=8, value=1024, ) with gr.Row(): scale = gr.Slider( label="Guidance", minimum=3.5, maximum=7, step=0.1, value=7, ) steps = gr.Slider( label="Steps", minimum=1, maximum=50, step=1, value=50, ) clip = gr.Slider( label="Clip Skip", minimum=1, maximum=10, step=1, value=3, ) gr.Examples( examples=examples, inputs=prompt, outputs=img, fn=generate_image, cache_examples="lazy", ) prompt.submit(fn=generate_image, inputs=[prompt, negative, width, height, scale, steps, clip], outputs=img, ) submit.click(fn=generate_image, inputs=[prompt, negative, width, height, scale, steps, clip], outputs=img, ) demo.queue().launch()