update model
Browse files
scheduler/scheduler_config.json
CHANGED
@@ -4,14 +4,15 @@
|
|
4 |
"beta_end": 0.012,
|
5 |
"beta_schedule": "scaled_linear",
|
6 |
"beta_start": 0.00085,
|
7 |
-
"clip_sample":
|
8 |
"clip_sample_range": 1.0,
|
9 |
"dynamic_thresholding_ratio": 0.995,
|
10 |
"num_train_timesteps": 1000,
|
|
|
11 |
"prediction_type": "epsilon",
|
12 |
"rescale_betas_zero_snr": false,
|
13 |
"sample_max_value": 1.0,
|
14 |
-
"set_alpha_to_one":
|
15 |
"steps_offset": 0,
|
16 |
"thresholding": false,
|
17 |
"timestep_spacing": "leading",
|
|
|
4 |
"beta_end": 0.012,
|
5 |
"beta_schedule": "scaled_linear",
|
6 |
"beta_start": 0.00085,
|
7 |
+
"clip_sample": false,
|
8 |
"clip_sample_range": 1.0,
|
9 |
"dynamic_thresholding_ratio": 0.995,
|
10 |
"num_train_timesteps": 1000,
|
11 |
+
"original_inference_steps": 50,
|
12 |
"prediction_type": "epsilon",
|
13 |
"rescale_betas_zero_snr": false,
|
14 |
"sample_max_value": 1.0,
|
15 |
+
"set_alpha_to_one": false,
|
16 |
"steps_offset": 0,
|
17 |
"thresholding": false,
|
18 |
"timestep_spacing": "leading",
|
text_encoder/config.json
CHANGED
@@ -1,22 +1,20 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "/tmp/test/tiny-stable-diffusion-torch/text_encoder",
|
3 |
"architectures": [
|
4 |
"CLIPTextModel"
|
5 |
],
|
6 |
"attention_dropout": 0.0,
|
7 |
"bos_token_id": 0,
|
8 |
-
"dropout": 0.0,
|
9 |
"eos_token_id": 2,
|
10 |
"hidden_act": "quick_gelu",
|
11 |
"hidden_size": 32,
|
12 |
"initializer_factor": 1.0,
|
13 |
"initializer_range": 0.02,
|
14 |
-
"intermediate_size":
|
15 |
"layer_norm_eps": 1e-05,
|
16 |
"max_position_embeddings": 77,
|
17 |
"model_type": "clip_text_model",
|
18 |
-
"num_attention_heads":
|
19 |
-
"num_hidden_layers":
|
20 |
"pad_token_id": 1,
|
21 |
"projection_dim": 512,
|
22 |
"torch_dtype": "float32",
|
|
|
1 |
{
|
|
|
2 |
"architectures": [
|
3 |
"CLIPTextModel"
|
4 |
],
|
5 |
"attention_dropout": 0.0,
|
6 |
"bos_token_id": 0,
|
|
|
7 |
"eos_token_id": 2,
|
8 |
"hidden_act": "quick_gelu",
|
9 |
"hidden_size": 32,
|
10 |
"initializer_factor": 1.0,
|
11 |
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 64,
|
13 |
"layer_norm_eps": 1e-05,
|
14 |
"max_position_embeddings": 77,
|
15 |
"model_type": "clip_text_model",
|
16 |
+
"num_attention_heads": 8,
|
17 |
+
"num_hidden_layers": 3,
|
18 |
"pad_token_id": 1,
|
19 |
"projection_dim": 512,
|
20 |
"torch_dtype": "float32",
|
text_encoder/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5edffee1c2ae276b4ecf6e6fc290c2da8224cda3a5a57d1d86a468e8d7e70b62
|
3 |
+
size 246416
|
tokenizer/tokenizer_config.json
CHANGED
@@ -7,15 +7,15 @@
|
|
7 |
"normalized": true,
|
8 |
"rstrip": false,
|
9 |
"single_word": false,
|
10 |
-
"special":
|
11 |
},
|
12 |
"1": {
|
13 |
"content": "<|endoftext|>",
|
14 |
"lstrip": false,
|
15 |
-
"normalized":
|
16 |
"rstrip": false,
|
17 |
"single_word": false,
|
18 |
-
"special":
|
19 |
}
|
20 |
},
|
21 |
"additional_special_tokens": [],
|
|
|
7 |
"normalized": true,
|
8 |
"rstrip": false,
|
9 |
"single_word": false,
|
10 |
+
"special": false
|
11 |
},
|
12 |
"1": {
|
13 |
"content": "<|endoftext|>",
|
14 |
"lstrip": false,
|
15 |
+
"normalized": false,
|
16 |
"rstrip": false,
|
17 |
"single_word": false,
|
18 |
+
"special": false
|
19 |
}
|
20 |
},
|
21 |
"additional_special_tokens": [],
|
unet/config.json
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
{
|
2 |
"_class_name": "UNet2DConditionModel",
|
3 |
"_diffusers_version": "0.22.0.dev0",
|
4 |
-
"_name_or_path": "/tmp/test/tiny-stable-diffusion-torch/unet",
|
5 |
"act_fn": "silu",
|
6 |
"addition_embed_type": null,
|
7 |
"addition_embed_type_num_heads": 64,
|
@@ -9,8 +8,8 @@
|
|
9 |
"attention_head_dim": 8,
|
10 |
"attention_type": "default",
|
11 |
"block_out_channels": [
|
12 |
-
|
13 |
-
|
14 |
],
|
15 |
"center_input_sample": false,
|
16 |
"class_embed_type": null,
|
@@ -31,12 +30,12 @@
|
|
31 |
"flip_sin_to_cos": true,
|
32 |
"freq_shift": 0,
|
33 |
"in_channels": 4,
|
34 |
-
"layers_per_block":
|
35 |
"mid_block_only_cross_attention": null,
|
36 |
"mid_block_scale_factor": 1,
|
37 |
"mid_block_type": "UNetMidBlock2DCrossAttn",
|
38 |
"norm_eps": 1e-05,
|
39 |
-
"norm_num_groups":
|
40 |
"num_attention_heads": null,
|
41 |
"num_class_embeds": null,
|
42 |
"only_cross_attention": false,
|
@@ -45,8 +44,9 @@
|
|
45 |
"resnet_out_scale_factor": 1.0,
|
46 |
"resnet_skip_time_act": false,
|
47 |
"resnet_time_scale_shift": "default",
|
48 |
-
"
|
49 |
-
"
|
|
|
50 |
"time_embedding_act_fn": null,
|
51 |
"time_embedding_dim": null,
|
52 |
"time_embedding_type": "positional",
|
|
|
1 |
{
|
2 |
"_class_name": "UNet2DConditionModel",
|
3 |
"_diffusers_version": "0.22.0.dev0",
|
|
|
4 |
"act_fn": "silu",
|
5 |
"addition_embed_type": null,
|
6 |
"addition_embed_type_num_heads": 64,
|
|
|
8 |
"attention_head_dim": 8,
|
9 |
"attention_type": "default",
|
10 |
"block_out_channels": [
|
11 |
+
4,
|
12 |
+
8
|
13 |
],
|
14 |
"center_input_sample": false,
|
15 |
"class_embed_type": null,
|
|
|
30 |
"flip_sin_to_cos": true,
|
31 |
"freq_shift": 0,
|
32 |
"in_channels": 4,
|
33 |
+
"layers_per_block": 1,
|
34 |
"mid_block_only_cross_attention": null,
|
35 |
"mid_block_scale_factor": 1,
|
36 |
"mid_block_type": "UNetMidBlock2DCrossAttn",
|
37 |
"norm_eps": 1e-05,
|
38 |
+
"norm_num_groups": 2,
|
39 |
"num_attention_heads": null,
|
40 |
"num_class_embeds": null,
|
41 |
"only_cross_attention": false,
|
|
|
44 |
"resnet_out_scale_factor": 1.0,
|
45 |
"resnet_skip_time_act": false,
|
46 |
"resnet_time_scale_shift": "default",
|
47 |
+
"reverse_transformer_layers_per_block": null,
|
48 |
+
"sample_size": 32,
|
49 |
+
"time_cond_proj_dim": 32,
|
50 |
"time_embedding_act_fn": null,
|
51 |
"time_embedding_dim": null,
|
52 |
"time_embedding_type": "positional",
|
unet/diffusion_pytorch_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:673e65bc368c3e781f88c2464f0f34ea3da79a3ccc1640a9d71c569997cf5926
|
3 |
+
size 96608
|
vae/config.json
CHANGED
@@ -1,11 +1,10 @@
|
|
1 |
{
|
2 |
"_class_name": "AutoencoderKL",
|
3 |
"_diffusers_version": "0.22.0.dev0",
|
4 |
-
"_name_or_path": "/tmp/test/tiny-stable-diffusion-torch/vae",
|
5 |
"act_fn": "silu",
|
6 |
"block_out_channels": [
|
7 |
-
|
8 |
-
|
9 |
],
|
10 |
"down_block_types": [
|
11 |
"DownEncoderBlock2D",
|
@@ -15,9 +14,9 @@
|
|
15 |
"in_channels": 3,
|
16 |
"latent_channels": 4,
|
17 |
"layers_per_block": 1,
|
18 |
-
"norm_num_groups":
|
19 |
"out_channels": 3,
|
20 |
-
"sample_size":
|
21 |
"scaling_factor": 0.18215,
|
22 |
"up_block_types": [
|
23 |
"UpDecoderBlock2D",
|
|
|
1 |
{
|
2 |
"_class_name": "AutoencoderKL",
|
3 |
"_diffusers_version": "0.22.0.dev0",
|
|
|
4 |
"act_fn": "silu",
|
5 |
"block_out_channels": [
|
6 |
+
4,
|
7 |
+
8
|
8 |
],
|
9 |
"down_block_types": [
|
10 |
"DownEncoderBlock2D",
|
|
|
14 |
"in_channels": 3,
|
15 |
"latent_channels": 4,
|
16 |
"layers_per_block": 1,
|
17 |
+
"norm_num_groups": 2,
|
18 |
"out_channels": 3,
|
19 |
+
"sample_size": 32,
|
20 |
"scaling_factor": 0.18215,
|
21 |
"up_block_types": [
|
22 |
"UpDecoderBlock2D",
|
vae/diffusion_pytorch_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b023ca2edc02c318f3c46b7c8c05d21c188724d40b51855065fec0f91bf950ef
|
3 |
+
size 59772
|