xinsir commited on
Commit
edefb24
1 Parent(s): 4411599

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +124 -2
README.md CHANGED
@@ -13,7 +13,7 @@ The model was trained with large amount of high quality data(over 10000000 image
13
  during the training, including date augmentation, mutiple loss and multi resolution. Note that this model can achieve higher aesthetic performance than our Controlnet-Canny-Sdxl-1.0 model,
14
  the model support any type of lines and any width of lines, the sketch can be very simple and so does the prompt. This model is more general and good at generate visual appealing images,
15
  The control ability is also strong, for example if you are unstatisfied with some local regions about the generated image, draw a more precise sketch and give a detail prompt will help a lot.
16
- **Note the model also support lineart or canny lines!!!**
17
 
18
 
19
  ## Model Details
@@ -44,4 +44,126 @@ The control ability is also strong, for example if you are unstatisfied with som
44
  ![image7](./000256_scribble_concat.webp)
45
  ![image8](./000271_scribble_concat.webp)
46
  ![image9](./000285_scribble_concat.webp)
47
- ![image10](./000290_scribble_concat.webp)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  during the training, including date augmentation, mutiple loss and multi resolution. Note that this model can achieve higher aesthetic performance than our Controlnet-Canny-Sdxl-1.0 model,
14
  the model support any type of lines and any width of lines, the sketch can be very simple and so does the prompt. This model is more general and good at generate visual appealing images,
15
  The control ability is also strong, for example if you are unstatisfied with some local regions about the generated image, draw a more precise sketch and give a detail prompt will help a lot.
16
+ **Note the model also support lineart or canny lines, you can try it and will get a surpurise!!!**
17
 
18
 
19
  ## Model Details
 
44
  ![image7](./000256_scribble_concat.webp)
45
  ![image8](./000271_scribble_concat.webp)
46
  ![image9](./000285_scribble_concat.webp)
47
+ ![image10](./000290_scribble_concat.webp)
48
+
49
+
50
+ ## How to Get Started with the Model
51
+
52
+ Use the code below to get started with the model.
53
+
54
+ ```python
55
+ from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
56
+ from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
57
+ from PIL import Image
58
+ import torch
59
+ import numpy as np
60
+ import cv2
61
+
62
+ def HWC3(x):
63
+ assert x.dtype == np.uint8
64
+ if x.ndim == 2:
65
+ x = x[:, :, None]
66
+ assert x.ndim == 3
67
+ H, W, C = x.shape
68
+ assert C == 1 or C == 3 or C == 4
69
+ if C == 3:
70
+ return x
71
+ if C == 1:
72
+ return np.concatenate([x, x, x], axis=2)
73
+ if C == 4:
74
+ color = x[:, :, 0:3].astype(np.float32)
75
+ alpha = x[:, :, 3:4].astype(np.float32) / 255.0
76
+ y = color * alpha + 255.0 * (1.0 - alpha)
77
+ y = y.clip(0, 255).astype(np.uint8)
78
+ return y
79
+
80
+ def nms(x, t, s):
81
+ x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s)
82
+
83
+ f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8)
84
+ f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8)
85
+ f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8)
86
+ f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8)
87
+
88
+ y = np.zeros_like(x)
89
+
90
+ for f in [f1, f2, f3, f4]:
91
+ np.putmask(y, cv2.dilate(x, kernel=f) == x, x)
92
+
93
+ z = np.zeros_like(y, dtype=np.uint8)
94
+ z[y > t] = 255
95
+ return z
96
+
97
+
98
+ controlnet_conditioning_scale = 1.0
99
+ prompt = "your prompt, the longer the better, you can describe it as detail as possible"
100
+ negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
101
+
102
+
103
+ eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")
104
+
105
+
106
+ controlnet = ControlNetModel.from_pretrained(
107
+ "xinsir/controlnet-scribble-sdxl-1.0",
108
+ torch_dtype=torch.float16
109
+ )
110
+
111
+ # when test with other base model, you need to change the vae also.
112
+ vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
113
+
114
+ pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
115
+ "stabilityai/stable-diffusion-xl-base-1.0",
116
+ controlnet=controlnet,
117
+ vae=vae,
118
+ safety_checker=None,
119
+ torch_dtype=torch.float16,
120
+ scheduler=eulera_scheduler,
121
+ )
122
+
123
+ # you can use either hed to generate a fake scribble given an image or a sketch image totally draw by yourself
124
+
125
+ if random.random() > 0.5:
126
+ # Method 1
127
+ # if you use hed, you should provide an image, the image can be real or anime, you extract its hed lines and use it as the scribbles
128
+ # The detail about hed detect you can refer to https://github.com/lllyasviel/ControlNet/blob/main/gradio_fake_scribble2image.py
129
+ # I provide a pseudo-code here
130
+ # img = cv2.imread(img_path)
131
+ # hed_img = apply_hed(img)
132
+ # cv2.imwrite("a hed detect path for an image", hed_img)
133
+
134
+ controlnet_img = Image.open("a hed detect path for an image")
135
+ controlnet_img = np.array(controlnet_img)
136
+ controlnet_img = nms(controlnet_img, 127, 3)
137
+ controlnet_img = cv2.GaussianBlur(controlnet_img, (0, 0), 3)
138
+
139
+ # different threshold for different lines
140
+ random_val = int(round(random.uniform(0.01, 0.10), 2) * 255)
141
+ controlnet_img[controlnet_img > random_val] = 255
142
+ controlnet_img[controlnet_img < 255] = 0
143
+ controlnet_img = Image.fromarray(controlnet_img)
144
+
145
+ else:
146
+ # Method 2
147
+ # if you use a sketch image total draw by yourself
148
+ control_path = "the sketch image you draw with some tools, like drawing board, the path you save it"
149
+ controlnet_img = Image.open(control_path) # Note that the image must be black-white(0 or 255), like the examples we list
150
+
151
+ # must resize to 1024*1024 or same resolution bucket to get the best performance
152
+ width, height = controlnet_img.size
153
+ ratio = np.sqrt(1024. * 1024. / (width * height))
154
+ new_width, new_height = int(width * ratio), int(height * ratio)
155
+ controlnet_img = controlnet_img.resize((new_width, new_height))
156
+
157
+ images = pipe(
158
+ prompt,
159
+ negative_prompt=negative_prompt,
160
+ image=controlnet_img,
161
+ controlnet_conditioning_scale=controlnet_conditioning_scale,
162
+ width=new_width,
163
+ height=new_height,
164
+ num_inference_steps=30,
165
+ ).images
166
+
167
+ images[0].save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger")
168
+ ```
169
+