kirisame commited on
Commit
2dff9da
1 Parent(s): d135a7f

Add WD 1.3 float32 weights

Browse files
.gitattributes CHANGED
@@ -29,3 +29,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zst filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zst filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
32
+ safety_checker/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
33
+ text_encoder/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
34
+ unet/diffusion_pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
35
+ vae/diffusion_pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -17,24 +17,15 @@ waifu-diffusion is a latent text-to-image diffusion model that has been conditio
17
 
18
  [Original Weights](https://huggingface.co/hakurei/waifu-diffusion-v1-3)
19
 
20
- # Gradio
21
 
22
- We also support a [Gradio](https://github.com/gradio-app/gradio) web ui with diffusers to run Waifu Diffusion:
23
  [![Open In Spaces](https://camo.githubusercontent.com/00380c35e60d6b04be65d3d94a58332be5cc93779f630bcdfc18ab9a3a7d3388/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f25463025394625413425393725323048756767696e67253230466163652d5370616365732d626c7565)](https://huggingface.co/spaces/hakurei/waifu-diffusion-demo)
24
-
25
  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1_8wPN7dJO746QXsFnB09Uq2VGgSRFuYE#scrollTo=1HaCauSq546O)
26
 
27
  ## Model Description
28
 
29
- The model originally used for fine-tuning is [Stable Diffusion V1-4](https://huggingface.co/CompVis/stable-diffusion-v1-4), which is a latent image diffusion model trained on [LAION2B-en](https://huggingface.co/datasets/laion/laion2B-en).
30
-
31
- The current model has been fine-tuned with a learning rate of 5.0e-6 for 4 epochs on 56k text-image pairs obtained through Danbooru which all have an aesthetic rating greater than `6.0`.
32
-
33
- **Note:** This project has **no affiliation with Danbooru.**
34
-
35
- ## Training Data & Annotative Prompting
36
-
37
- The data used for fine-tuning has come from a random sample of 56k Danbooru images, which were filtered based on [CLIP Aesthetic Scoring](https://github.com/christophschuhmann/improved-aesthetic-predictor) where only images with an aesthetic score greater than `6.0` were used.
38
 
39
  ## License
40
 
@@ -55,31 +46,18 @@ This model can be used for entertainment purposes and as a generative art assist
55
  ```python
56
  import torch
57
  from torch import autocast
58
- from diffusers import StableDiffusionPipeline, DDIMScheduler
59
-
60
- model_id = "hakurei/waifu-diffusion"
61
- device = "cuda"
62
-
63
 
64
  pipe = StableDiffusionPipeline.from_pretrained(
65
- model_id,
66
- torch_dtype=torch.float16,
67
- revision="fp16",
68
- scheduler=DDIMScheduler(
69
- beta_start=0.00085,
70
- beta_end=0.012,
71
- beta_schedule="scaled_linear",
72
- clip_sample=False,
73
- set_alpha_to_one=False,
74
- ),
75
- )
76
- pipe = pipe.to(device)
77
-
78
- prompt = "touhou hakurei_reimu 1girl solo portrait"
79
  with autocast("cuda"):
80
- image = pipe(prompt, guidance_scale=7.5)["sample"][0]
81
 
82
- image.save("reimu_hakurei.png")
83
  ```
84
 
85
  ## Team Members and Acknowledgements
 
17
 
18
  [Original Weights](https://huggingface.co/hakurei/waifu-diffusion-v1-3)
19
 
20
+ # Gradio & Colab
21
 
22
+ We also support a [Gradio](https://github.com/gradio-app/gradio) Web UI and Colab with Diffusers to run Waifu Diffusion:
23
  [![Open In Spaces](https://camo.githubusercontent.com/00380c35e60d6b04be65d3d94a58332be5cc93779f630bcdfc18ab9a3a7d3388/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f25463025394625413425393725323048756767696e67253230466163652d5370616365732d626c7565)](https://huggingface.co/spaces/hakurei/waifu-diffusion-demo)
 
24
  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1_8wPN7dJO746QXsFnB09Uq2VGgSRFuYE#scrollTo=1HaCauSq546O)
25
 
26
  ## Model Description
27
 
28
+ [See here for a full model overview.](https://gist.github.com/harubaru/f727cedacae336d1f7877c4bbe2196e1)
 
 
 
 
 
 
 
 
29
 
30
  ## License
31
 
 
46
  ```python
47
  import torch
48
  from torch import autocast
49
+ from diffusers import StableDiffusionPipeline
 
 
 
 
50
 
51
  pipe = StableDiffusionPipeline.from_pretrained(
52
+ 'waifu-diffusion',
53
+ torch_dtype=torch.float32
54
+ ).to('cuda')
55
+
56
+ prompt = "1girl, aqua eyes, baseball cap, blonde hair, closed mouth, earrings, green background, hat, hoop earrings, jewelry, looking at viewer, shirt, short hair, simple background, solo, upper body, yellow shirt"
 
 
 
 
 
 
 
 
 
57
  with autocast("cuda"):
58
+ image = pipe(prompt, guidance_scale=6)["sample"][0]
59
 
60
+ image.save("test.png")
61
  ```
62
 
63
  ## Team Members and Acknowledgements
model_index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_class_name": "StableDiffusionPipeline",
3
- "_diffusers_version": "0.2.4",
4
  "feature_extractor": [
5
  "transformers",
6
  "CLIPFeatureExtractor"
@@ -9,14 +9,14 @@
9
  "stable_diffusion",
10
  "StableDiffusionSafetyChecker"
11
  ],
 
 
 
 
12
  "text_encoder": [
13
  "transformers",
14
  "CLIPTextModel"
15
  ],
16
- "scheduler": [
17
- "diffusers",
18
- "DDIMScheduler"
19
- ],
20
  "tokenizer": [
21
  "transformers",
22
  "CLIPTokenizer"
 
1
  {
2
  "_class_name": "StableDiffusionPipeline",
3
+ "_diffusers_version": "0.4.1",
4
  "feature_extractor": [
5
  "transformers",
6
  "CLIPFeatureExtractor"
 
9
  "stable_diffusion",
10
  "StableDiffusionSafetyChecker"
11
  ],
12
+ "scheduler": [
13
+ "diffusers",
14
+ "LMSDiscreteScheduler"
15
+ ],
16
  "text_encoder": [
17
  "transformers",
18
  "CLIPTextModel"
19
  ],
 
 
 
 
20
  "tokenizer": [
21
  "transformers",
22
  "CLIPTokenizer"
safety_checker/config.json CHANGED
@@ -1,5 +1,6 @@
1
  {
2
- "_name_or_path": "./safety_module",
 
3
  "architectures": [
4
  "StableDiffusionSafetyChecker"
5
  ],
@@ -68,6 +69,7 @@
68
  "sep_token_id": null,
69
  "task_specific_params": null,
70
  "temperature": 1.0,
 
71
  "tie_encoder_decoder": false,
72
  "tie_word_embeddings": true,
73
  "tokenizer_class": null,
@@ -75,7 +77,7 @@
75
  "top_p": 1.0,
76
  "torch_dtype": null,
77
  "torchscript": false,
78
- "transformers_version": "4.21.0.dev0",
79
  "typical_p": 1.0,
80
  "use_bfloat16": false,
81
  "vocab_size": 49408
@@ -133,6 +135,7 @@
133
  "num_attention_heads": 16,
134
  "num_beam_groups": 1,
135
  "num_beams": 1,
 
136
  "num_hidden_layers": 24,
137
  "num_return_sequences": 1,
138
  "output_attentions": false,
@@ -150,6 +153,7 @@
150
  "sep_token_id": null,
151
  "task_specific_params": null,
152
  "temperature": 1.0,
 
153
  "tie_encoder_decoder": false,
154
  "tie_word_embeddings": true,
155
  "tokenizer_class": null,
@@ -157,7 +161,7 @@
157
  "top_p": 1.0,
158
  "torch_dtype": null,
159
  "torchscript": false,
160
- "transformers_version": "4.21.0.dev0",
161
  "typical_p": 1.0,
162
  "use_bfloat16": false
163
  },
 
1
  {
2
+ "_commit_hash": null,
3
+ "_name_or_path": "CompVis/stable-diffusion-safety-checker",
4
  "architectures": [
5
  "StableDiffusionSafetyChecker"
6
  ],
 
69
  "sep_token_id": null,
70
  "task_specific_params": null,
71
  "temperature": 1.0,
72
+ "tf_legacy_loss": false,
73
  "tie_encoder_decoder": false,
74
  "tie_word_embeddings": true,
75
  "tokenizer_class": null,
 
77
  "top_p": 1.0,
78
  "torch_dtype": null,
79
  "torchscript": false,
80
+ "transformers_version": "4.22.2",
81
  "typical_p": 1.0,
82
  "use_bfloat16": false,
83
  "vocab_size": 49408
 
135
  "num_attention_heads": 16,
136
  "num_beam_groups": 1,
137
  "num_beams": 1,
138
+ "num_channels": 3,
139
  "num_hidden_layers": 24,
140
  "num_return_sequences": 1,
141
  "output_attentions": false,
 
153
  "sep_token_id": null,
154
  "task_specific_params": null,
155
  "temperature": 1.0,
156
+ "tf_legacy_loss": false,
157
  "tie_encoder_decoder": false,
158
  "tie_word_embeddings": true,
159
  "tokenizer_class": null,
 
161
  "top_p": 1.0,
162
  "torch_dtype": null,
163
  "torchscript": false,
164
+ "transformers_version": "4.22.2",
165
  "typical_p": 1.0,
166
  "use_bfloat16": false
167
  },
scheduler/scheduler_config.json CHANGED
@@ -1,13 +1,9 @@
1
  {
2
- "_class_name": "DDIMScheduler",
3
- "_diffusers_version": "0.2.4",
4
  "beta_end": 0.012,
5
  "beta_schedule": "scaled_linear",
6
  "beta_start": 0.00085,
7
- "clip_sample": false,
8
  "num_train_timesteps": 1000,
9
- "set_alpha_to_one": false,
10
- "timestep_values": null,
11
- "trained_betas": null,
12
- "steps_offset": 1
13
  }
 
1
  {
2
+ "_class_name": "LMSDiscreteScheduler",
3
+ "_diffusers_version": "0.4.1",
4
  "beta_end": 0.012,
5
  "beta_schedule": "scaled_linear",
6
  "beta_start": 0.00085,
 
7
  "num_train_timesteps": 1000,
8
+ "trained_betas": null
 
 
 
9
  }
text_encoder/config.json CHANGED
@@ -18,7 +18,8 @@
18
  "num_attention_heads": 12,
19
  "num_hidden_layers": 12,
20
  "pad_token_id": 1,
 
21
  "torch_dtype": "float32",
22
- "transformers_version": "4.21.3",
23
  "vocab_size": 49408
24
  }
 
18
  "num_attention_heads": 12,
19
  "num_hidden_layers": 12,
20
  "pad_token_id": 1,
21
+ "projection_dim": 768,
22
  "torch_dtype": "float32",
23
+ "transformers_version": "4.22.2",
24
  "vocab_size": 49408
25
  }
unet/config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_class_name": "UNet2DConditionModel",
3
- "_diffusers_version": "0.2.4",
4
  "act_fn": "silu",
5
  "attention_head_dim": 8,
6
  "block_out_channels": [
 
1
  {
2
  "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.4.1",
4
  "act_fn": "silu",
5
  "attention_head_dim": 8,
6
  "block_out_channels": [
unet/diffusion_pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9879a41e1f8b02bbe3937110c4f4b0171e3c04f9c6f02817cde986a3c4d09afe
3
  size 3438354725
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f47e5665f0e85155a5f6f58683b04940c6b132023d584396226bf54419a78831
3
  size 3438354725
vae/config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_class_name": "AutoencoderKL",
3
- "_diffusers_version": "0.2.4",
4
  "act_fn": "silu",
5
  "block_out_channels": [
6
  128,
@@ -17,8 +17,9 @@
17
  "in_channels": 3,
18
  "latent_channels": 4,
19
  "layers_per_block": 2,
 
20
  "out_channels": 3,
21
- "sample_size": 512,
22
  "up_block_types": [
23
  "UpDecoderBlock2D",
24
  "UpDecoderBlock2D",
 
1
  {
2
  "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.4.1",
4
  "act_fn": "silu",
5
  "block_out_channels": [
6
  128,
 
17
  "in_channels": 3,
18
  "latent_channels": 4,
19
  "layers_per_block": 2,
20
+ "norm_num_groups": 32,
21
  "out_channels": 3,
22
+ "sample_size": 256,
23
  "up_block_types": [
24
  "UpDecoderBlock2D",
25
  "UpDecoderBlock2D",