Spaces:

ariG23498
/

makeanime

Running on Zero

App Files Files Community

ariG23498 commited on 16 days ago

Commit

a6a9a80

•

1 Parent(s): 1ad3782

Upload folder using huggingface_hub

Browse files

Files changed (11) hide show

.github/README.md +66 -0
.github/output.png +0 -0
.gitignore +3 -0
.python-version +1 -0
README.md +2 -8
app.py +32 -0
pyproject.toml +26 -0
src/makeanime/__init__.py +0 -0
src/makeanime/__main__.py +4 -0
src/makeanime/cli.py +74 -0
uv.lock +0 -0

.github/README.md ADDED Viewed

	@@ -0,0 +1,66 @@

+# makeanime
+<a href="https://colab.research.google.com/gist/ariG23498/645f0f276612a60fb32ad2b387e0d301/scratchpad.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
+![image](output.png)
+`makeanime` is a CLI tool for generating anime-style images from a face image and a text prompt using Stable Diffusion XL and an IP-Adapter for applying anime-like styles.
+## Features
+- Generates anime-style images by blending a face image with anime style references.
+- Leverages Stable Diffusion XL for high-quality text-to-image generation.
+- Uses an IP-Adapter to combine face and anime-style attributes.
+- Supports custom prompt input for greater flexibility.
+- Allows control over the influence of face and style using weights.
+## Installation
+```shell
+$ pip install -Uq git+https://github.com/ariG23498/makeanime
+```
+## Usage
+You can use the `makeanime` CLI to generate images. The tool accepts the following arguments:
+- `image`: URL of the face image to be stylized.
+- `prompt`: Text prompt to guide the image generation.
+- `style_weight`: (Optional) A float that controls how much the anime style influences the image. Default is `0.5`.
+- `face_weight`: (Optional) A float that controls how much the face image influences the result. Default is `0.5`.
+### Example Command
+```bash
+$ makeanime \
+--image "https://example.com/your-face-image.jpg" \
+--prompt "a man" \
+--style_weight 0.7 \
+--face_weight 0.3
+```
+This command will generate an anime-style image based on the provided face image URL and prompt. The resulting image will be saved as `output.png` in the working directory.
+## File Structure
+- `makeanime/app.py`: Contains the main logic for generating anime-style images.
+- `makeanime/__main__.py`: Sets up the CLI using Python's `Fire` library.
+## How it Works
+- **CLIPVisionModelWithProjection** is used to encode the input face image.
+- **Stable Diffusion XL** is used for generating images based on the text prompt and the encoded face.
+- An **IP-Adapter** is loaded to modulate the anime style and face weights.
+- Images are generated at 1024x1024 resolution, and the output is a grid of the original face image and the generated anime image.
+## Requirements
+- Python 3.10+
+- PyTorch
+- Diffusers
+- Transformers
+- Fire
+## References
+The code is taken from the [Hugging Face IP-Adapter Guide](https://huggingface.co/docs/diffusers/main/en/using-diffusers/ip_adapter)

.github/output.png ADDED Viewed

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+__pycache__
+.venv
+.ruff_cache

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.10

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
 ---
-title: Makeanime
-emoji: 🐨
-colorFrom: blue
-colorTo: green
 sdk: gradio
 sdk_version: 4.42.0
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: makeanime
+app_file: app.py
 sdk: gradio
 sdk_version: 4.42.0
 ---

app.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import gradio as gr
+from functools import partial
+from makeanime.cli import main
+generate_image = partial(main, is_gradio=True)
+with gr.Blocks() as demo:
+    gr.Markdown("# makeanime: Turn your image into an anime")
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(label="Upload your image")
+            prompt_input = gr.Text(label="Prompt")
+            style_weight_slider = gr.Slider(
+                label="Style Weight", minimum=0.0, maximum=1.0, value=0.5, step=0.1
+            )
+            face_weight_slider = gr.Slider(
+                label="Face Weight", minimum=0.0, maximum=1.0, value=0.5, step=0.1
+            )
+            generate_button = gr.Button("Generate Anime Image")
+        with gr.Column():
+            result_output = gr.Image()
+    generate_button.click(
+        fn=generate_image,
+        inputs=[image_input, prompt_input, style_weight_slider, face_weight_slider],
+        outputs=result_output,
+    )
+if __name__ == "__main__":
+    demo.launch()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,26 @@

+[project]
+name = "makeanime"
+version = "0.1.0"
+description = "A CLI to make anime themed face images"
+readme = ".github/README.md"
+requires-python = ">=3.10"
+dependencies = [
+    "accelerate>=0.34.0",
+    "diffusers>=0.30.2",
+    "fire>=0.6.0",
+    "gradio>=4.42.0",
+    "makeanime",
+    "ruff>=0.6.3",
+    "torch>=2.4.0",
+    "transformers>=4.44.2",
+]
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[tool.uv.sources]
+makeanime = { workspace = true }
+[project.scripts]
+makeanime = "makeanime.cli:app"

src/makeanime/__init__.py ADDED Viewed

File without changes

src/makeanime/__main__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from makeanime.cli import app
+if __name__ == "__main__":
+    app()

src/makeanime/cli.py ADDED Viewed

	@@ -0,0 +1,74 @@

+from fire import Fire
+import torch
+from diffusers import AutoPipelineForText2Image, DDIMScheduler
+from transformers import CLIPVisionModelWithProjection
+from diffusers.utils import load_image, make_image_grid
+def main(
+    image,
+    prompt: str,
+    style_weight: float = 0.5,
+    face_weight: float = 0.5,
+    is_gradio: bool = False,
+):
+    image_encoder = CLIPVisionModelWithProjection.from_pretrained(
+        "h94/IP-Adapter",
+        subfolder="models/image_encoder",
+        torch_dtype=torch.float16,
+    )
+    pipeline = AutoPipelineForText2Image.from_pretrained(
+        "stabilityai/stable-diffusion-xl-base-1.0",
+        torch_dtype=torch.float16,
+        image_encoder=image_encoder,
+    )
+    pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config)
+    pipeline.load_ip_adapter(
+        "h94/IP-Adapter",
+        subfolder="sdxl_models",
+        weight_name=[
+            "ip-adapter-plus_sdxl_vit-h.safetensors",
+            "ip-adapter-plus-face_sdxl_vit-h.safetensors",
+        ],
+    )
+    pipeline.set_ip_adapter_scale([style_weight, face_weight])
+    pipeline.enable_model_cpu_offload()
+    face_image = image if is_gradio else load_image(image)
+    style_folder = (
+        "https://huggingface.co/datasets/ariG23498/images/resolve/main/anime-style"
+    )
+    style_images = [load_image(f"{style_folder}/image00{i}.png") for i in range(10)]
+    generator = torch.Generator(device="cpu").manual_seed(0)
+    image = pipeline(
+        prompt=prompt,
+        height=1024,
+        width=1024,
+        ip_adapter_image=[style_images, face_image],
+        negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality",
+        num_inference_steps=50,
+        num_images_per_prompt=1,
+        generator=generator,
+    ).images[0]
+    if is_gradio:
+        return image
+    image = make_image_grid(
+        [
+            face_image.resize((512, 512)),
+            image.resize((512, 512)),
+        ],
+        rows=1,
+        cols=2,
+    )
+    image.save("output.png")
+def app():
+    Fire(main)

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff