multimodalart HF staff commited on
Commit
8d49336
1 Parent(s): e0e9e9a

v2 support

Browse files
Files changed (4) hide show
  1. app.py +157 -85
  2. requirements-local.txt +18 -0
  3. requirements.txt +1 -1
  4. train_dreambooth.py +6 -3
app.py CHANGED
@@ -28,32 +28,40 @@ css = '''
28
  '''
29
  maximum_concepts = 3
30
 
31
- #Pre download the files even if we don't use it here
32
- model_to_load = snapshot_download(repo_id="multimodalart/sd-fine-tunable")
 
 
33
  safety_checker = snapshot_download(repo_id="multimodalart/sd-sc")
34
 
35
- def zipdir(path, ziph):
36
- # ziph is zipfile handle
37
- for root, dirs, files in os.walk(path):
38
- for file in files:
39
- ziph.write(os.path.join(root, file),
40
- os.path.relpath(os.path.join(root, file),
41
- os.path.join(path, '..')))
42
 
43
  def swap_text(option):
44
  mandatory_liability = "You must have the right to do so and you are liable for the images you use, example:"
45
  if(option == "object"):
46
  instance_prompt_example = "cttoy"
47
  freeze_for = 50
48
- return [f"You are going to train `object`(s), upload 5-10 images of each object you are planning on training on from different angles/perspectives. {mandatory_liability}:", '''<img src="file/cat-toy.png" />''', f"You should name your concept with a unique made up word that has low chance of the model already knowing it (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to 512x512.", freeze_for]
49
  elif(option == "person"):
50
  instance_prompt_example = "julcto"
51
- freeze_for = 100
52
- return [f"You are going to train a `person`(s), upload 10-20 images of each person you are planning on training on from different angles/perspectives. {mandatory_liability}:", '''<img src="file/person.png" />''', f"You should name the files with a unique word that represent your concept (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to 512x512.", freeze_for]
53
  elif(option == "style"):
54
  instance_prompt_example = "trsldamrl"
55
  freeze_for = 10
56
- return [f"You are going to train a `style`, upload 10-20 images of the style you are planning on training on. Name the files with the words you would like {mandatory_liability}:", '''<img src="file/trsl_style.png" />''', f"You should name your files with a unique word that represent your concept (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to 512x512.", freeze_for]
 
 
 
 
 
 
 
 
 
57
 
58
  def count_files(*inputs):
59
  file_counter = 0
@@ -69,10 +77,7 @@ def count_files(*inputs):
69
  if(uses_custom):
70
  Training_Steps = int(inputs[-3])
71
  else:
72
- if(type_of_thing == "person"):
73
- Training_Steps = file_counter*200*2
74
- else:
75
- Training_Steps = file_counter*200
76
  if(is_spaces):
77
  summary_sentence = f'''You are going to train {concept_counter} {type_of_thing}(s), with {file_counter} images for {Training_Steps} steps. The training should take around {round(Training_Steps/1.1, 2)} seconds, or {round((Training_Steps/1.1)/60, 2)} minutes.
78
  The setup, compression and uploading the model can take up to 20 minutes.<br>As the T4-Small GPU costs US$0.60 for 1h, <span style="font-size: 120%"><b>the estimated cost for this training is US${round((((Training_Steps/1.1)/3600)+0.3+0.1)*0.60, 2)}.</b></span><br><br>
@@ -82,6 +87,13 @@ def count_files(*inputs):
82
 
83
  return([gr.update(visible=True), gr.update(visible=True, value=summary_sentence)])
84
 
 
 
 
 
 
 
 
85
  def pad_image(image):
86
  w, h = image.size
87
  if w == h:
@@ -101,7 +113,9 @@ def train(*inputs):
101
 
102
  torch.cuda.empty_cache()
103
  if 'pipe' in globals():
 
104
  del pipe
 
105
  gc.collect()
106
 
107
  if os.path.exists("output_model"): shutil.rmtree('output_model')
@@ -130,9 +144,9 @@ def train(*inputs):
130
  os.makedirs('output_model',exist_ok=True)
131
  uses_custom = inputs[-1]
132
  type_of_thing = inputs[-4]
133
-
134
  remove_attribution_after = inputs[-6]
135
-
 
136
  if(uses_custom):
137
  Training_Steps = int(inputs[-3])
138
  Train_text_encoder_for = int(inputs[-2])
@@ -140,51 +154,100 @@ def train(*inputs):
140
  Training_Steps = file_counter*200
141
  if(type_of_thing == "object"):
142
  Train_text_encoder_for=30
143
- elif(type_of_thing == "person"):
144
- Train_text_encoder_for=60
145
  elif(type_of_thing == "style"):
146
  Train_text_encoder_for=15
 
 
147
 
148
- class_data_dir = None
149
  stptxt = int((Training_Steps*Train_text_encoder_for)/100)
150
- args_general = argparse.Namespace(
151
- image_captions_filename = True,
152
- train_text_encoder = True,
153
- stop_text_encoder_training = stptxt,
154
- save_n_steps = 0,
155
- pretrained_model_name_or_path = model_to_load,
156
- instance_data_dir="instance_images",
157
- class_data_dir=class_data_dir,
158
- output_dir="output_model",
159
- instance_prompt="",
160
- seed=42,
161
- resolution=512,
162
- mixed_precision="fp16",
163
- train_batch_size=1,
164
- gradient_accumulation_steps=1,
165
- use_8bit_adam=True,
166
- learning_rate=2e-6,
167
- lr_scheduler="polynomial",
168
- lr_warmup_steps = 0,
169
- max_train_steps=Training_Steps,
170
- )
171
- print("Starting training...")
172
- lock_file = open("intraining.lock", "w")
173
- lock_file.close()
174
- run_training(args_general)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  gc.collect()
176
  torch.cuda.empty_cache()
177
- print("Adding Safety Checker to the model...")
178
- shutil.copytree(f"{safety_checker}/feature_extractor", "output_model/feature_extractor")
179
- shutil.copytree(f"{safety_checker}/safety_checker", "output_model/safety_checker")
180
- shutil.copy(f"model_index.json", "output_model/model_index.json")
 
181
 
182
- #with zipfile.ZipFile('diffusers_model.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
183
- # zipdir('output_model/', zipf)
184
  if(not remove_attribution_after):
185
  print("Archiving model file...")
186
  with tarfile.open("diffusers_model.tar", "w") as tar:
187
- tar.add("diffusers_model", arcname=os.path.basename("diffusers_model"))
188
  if os.path.exists("intraining.lock"): os.remove("intraining.lock")
189
  trained_file = open("hastrained.success", "w")
190
  trained_file.close()
@@ -201,22 +264,27 @@ def train(*inputs):
201
  hf_token = inputs[-5]
202
  model_name = inputs[-7]
203
  where_to_upload = inputs[-8]
204
- push(model_name, where_to_upload, hf_token, True)
205
  hardware_url = f"https://huggingface.co/spaces/{os.environ['SPACE_ID']}/hardware"
206
  headers = { "authorization" : f"Bearer {hf_token}"}
207
  body = {'flavor': 'cpu-basic'}
208
  requests.post(hardware_url, json = body, headers=headers)
209
 
210
- def generate(prompt):
 
211
  torch.cuda.empty_cache()
212
  from diffusers import StableDiffusionPipeline
213
- global pipe
214
- pipe = StableDiffusionPipeline.from_pretrained("./output_model", torch_dtype=torch.float16)
215
- pipe = pipe.to("cuda")
216
- image = pipe(prompt).images[0]
 
 
 
 
217
  return(image)
218
 
219
- def push(model_name, where_to_upload, hf_token, comes_from_automated=False):
220
  if(not os.path.exists("model.ckpt")):
221
  convert("output_model", "model.ckpt")
222
  from huggingface_hub import HfApi, HfFolder, CommitOperationAdd
@@ -250,7 +318,7 @@ license: creativeml-openrail-m
250
  tags:
251
  - text-to-image
252
  ---
253
- ### {model_name} Dreambooth model trained by {api.whoami(token=hf_token)["name"]} with [Hugging Face Dreambooth Training Space](https://huggingface.co/spaces/multimodalart/dreambooth-training)
254
 
255
  You run your new concept via `diffusers` [Colab Notebook for Inference](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/sd_dreambooth_inference.ipynb). Don't forget to use the concept prompts!
256
 
@@ -371,21 +439,24 @@ with gr.Blocks(css=css) as demo:
371
  top_description = gr.HTML(f'''
372
  <div class="gr-prose" style="max-width: 80%">
373
  <h2>You have successfully cloned the Dreambooth Training Space locally 🎉</h2>
374
- <p>If you are having problems with the requirements, try installing xformers with `%pip install git+https://github.com/facebookresearch/xformers@1d31a3a#egg=xformers`</p>
375
  </div>
376
  ''')
377
- gr.Markdown("# Dreambooth Training UI")
378
- gr.Markdown("Customize Stable Diffusion by training it on a few examples of concepts, up to 3 concepts on the same model. This Space is based on TheLastBen's [fast-DreamBooth Colab](https://colab.research.google.com/github/TheLastBen/fast-stable-diffusion/blob/main/fast-DreamBooth.ipynb) with [🧨 diffusers](https://github.com/huggingface/diffusers)")
379
 
380
  with gr.Row() as what_are_you_training:
381
  type_of_thing = gr.Dropdown(label="What would you like to train?", choices=["object", "person", "style"], value="object", interactive=True)
382
-
 
383
  #Very hacky approach to emulate dynamically created Gradio components
384
  with gr.Row() as upload_your_concept:
385
  with gr.Column():
386
- thing_description = gr.Markdown("You are going to train an `object`, please upload 5-10 images of the object you are planning on training on from different angles/perspectives. You must have the right to do so and you are liable for the images you use, example:")
 
387
  thing_image_example = gr.HTML('''<img src="file/cat-toy.png" />''')
388
  things_naming = gr.Markdown("You should name your concept with a unique made up word that has low chance of the model already knowing it (e.g.: `cttoy` here). Images will be automatically cropped to 512x512.")
 
389
  with gr.Column():
390
  file_collection = []
391
  concept_collection = []
@@ -431,24 +502,19 @@ with gr.Blocks(css=css) as demo:
431
 
432
  with gr.Accordion("Custom Settings", open=False):
433
  swap_auto_calculated = gr.Checkbox(label="Use custom settings")
434
- gr.Markdown("If not checked, the number of steps and % of frozen encoder will be tuned automatically according to the amount of images you upload and whether you are training an `object`, `person` or `style` as follows: The number of steps is calculated by number of images uploaded multiplied by 20. The text-encoder is frozen after 10% of the steps for a style, 30% of the steps for an object and is fully trained for persons.")
435
  steps = gr.Number(label="How many steps", value=800)
436
  perc_txt_encoder = gr.Number(label="Percentage of the training steps the text-encoder should be trained as well", value=30)
437
-
438
  with gr.Box(visible=False) as training_summary:
439
  training_summary_text = gr.HTML("", visible=False, label="Training Summary")
440
- if(is_spaces):
441
- training_summary_checkbox = gr.Checkbox(label="Automatically remove paid GPU attribution and upload model to the Hugging Face Hub after training", value=False)
442
- training_summary_model_name = gr.Textbox(label="Name of your model", visible=False)
443
- training_summary_where_to_upload = gr.Dropdown(["My personal profile", "Public Library"], label="Upload to", visible=False)
444
- training_summary_token_message = gr.Markdown("[A Hugging Face write access token](https://huggingface.co/settings/tokens), go to \"New token\" -> Role : Write. A regular read token won't work here.", visible=False)
445
- training_summary_token = gr.Textbox(label="Hugging Face Write Token", type="password", visible=False)
446
- else:
447
- training_summary_checkbox = False
448
- training_summary_model_name = ''
449
- training_summary_where_to_upload = "My person profile"
450
- training_summary_token_message = ""
451
- training_summary_token = ""
452
  train_btn = gr.Button("Start Training")
453
 
454
  training_ongoing = gr.Markdown("## Training is ongoing ⌛... You can close this tab if you like or just wait. If you did not check the `Remove GPU After training`, you can come back here to try your model and upload it after training. Don't forget to remove the GPU attribution after you are done. ", visible=False)
@@ -462,6 +528,7 @@ with gr.Blocks(css=css) as demo:
462
  gr.Markdown("## Try your model")
463
  prompt = gr.Textbox(label="Type your prompt")
464
  result_image = gr.Image()
 
465
  generate_button = gr.Button("Generate Image")
466
 
467
  with gr.Box(visible=False) as push_to_hub:
@@ -478,11 +545,16 @@ with gr.Blocks(css=css) as demo:
478
  convert_button = gr.Button("Convert to CKPT", visible=False)
479
 
480
  #Swap the examples and the % of text encoder trained depending if it is an object, person or style
481
- type_of_thing.change(fn=swap_text, inputs=[type_of_thing], outputs=[thing_description, thing_image_example, things_naming, perc_txt_encoder], queue=False, show_progress=False)
482
 
 
 
 
483
  #Update the summary box below the UI according to how many images are uploaded and whether users are using custom settings or not
484
  for file in file_collection:
 
485
  file.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
 
486
  steps.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
487
  perc_txt_encoder.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
488
 
@@ -493,12 +565,12 @@ with gr.Blocks(css=css) as demo:
493
  train_btn.click(lambda:gr.update(visible=True), inputs=None, outputs=training_ongoing)
494
 
495
  #The main train function
496
- train_btn.click(fn=train, inputs=is_visible+concept_collection+file_collection+[training_summary_where_to_upload]+[training_summary_model_name]+[training_summary_checkbox]+[training_summary_token]+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[result, try_your_model, push_to_hub, convert_button, training_ongoing, completed_training], queue=False)
497
 
498
  #Button to generate an image from your trained model after training
499
- generate_button.click(fn=generate, inputs=prompt, outputs=result_image, queue=False)
500
  #Button to push the model to the Hugging Face Hub
501
- push_button.click(fn=push, inputs=[model_name, where_to_upload, hf_token], outputs=[success_message_upload, result], queue=False)
502
  #Button to convert the model to ckpt format
503
  convert_button.click(fn=convert_to_ckpt, inputs=[], outputs=result, queue=False)
504
 
 
28
  '''
29
  maximum_concepts = 3
30
 
31
+ #Pre download the files
32
+ model_v1 = snapshot_download(repo_id="multimodalart/sd-fine-tunable")
33
+ #model_v2 = snapshot_download(repo_id="stabilityai/stable-diffusion-2")
34
+ model_v2_512 = snapshot_download(repo_id="stabilityai/stable-diffusion-2-base")
35
  safety_checker = snapshot_download(repo_id="multimodalart/sd-sc")
36
 
37
+ model_to_load = model_v1
38
+
39
+ #with zipfile.ZipFile("mix.zip", 'r') as zip_ref:
40
+ # zip_ref.extractall(".")
 
 
 
41
 
42
  def swap_text(option):
43
  mandatory_liability = "You must have the right to do so and you are liable for the images you use, example:"
44
  if(option == "object"):
45
  instance_prompt_example = "cttoy"
46
  freeze_for = 50
47
+ return [f"You are going to train `object`(s), upload 5-10 images of each object you are planning on training on from different angles/perspectives. {mandatory_liability}:", '''<img src="file/cat-toy.png" />''', f"You should name your concept with a unique made up word that has low chance of the model already knowing it (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to 512x512.", freeze_for, gr.update(visible=False)]
48
  elif(option == "person"):
49
  instance_prompt_example = "julcto"
50
+ freeze_for = 65
51
+ return [f"You are going to train a `person`(s), upload 10-20 images of each person you are planning on training on from different angles/perspectives. {mandatory_liability}:", '''<img src="file/person.png" />''', f"You should name the files with a unique word that represent your concept (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to 512x512.", freeze_for, gr.update(visible=False)]
52
  elif(option == "style"):
53
  instance_prompt_example = "trsldamrl"
54
  freeze_for = 10
55
+ return [f"You are going to train a `style`, upload 10-20 images of the style you are planning on training on. Name the files with the words you would like {mandatory_liability}:", '''<img src="file/trsl_style.png" />''', f"You should name your files with a unique word that represent your concept (e.g.: `{instance_prompt_example}` here). Images will be automatically cropped to 512x512.", freeze_for, gr.update(visible=False)]
56
+
57
+ def swap_base_model(selected_model):
58
+ global model_to_load
59
+ if(selected_model == "v1-5"):
60
+ model_to_load = model_v1
61
+ elif(selected_model == "v2-768"):
62
+ model_to_load = model_v2
63
+ else:
64
+ model_to_load = model_v2_512
65
 
66
  def count_files(*inputs):
67
  file_counter = 0
 
77
  if(uses_custom):
78
  Training_Steps = int(inputs[-3])
79
  else:
80
+ Training_Steps = file_counter*200
 
 
 
81
  if(is_spaces):
82
  summary_sentence = f'''You are going to train {concept_counter} {type_of_thing}(s), with {file_counter} images for {Training_Steps} steps. The training should take around {round(Training_Steps/1.1, 2)} seconds, or {round((Training_Steps/1.1)/60, 2)} minutes.
83
  The setup, compression and uploading the model can take up to 20 minutes.<br>As the T4-Small GPU costs US$0.60 for 1h, <span style="font-size: 120%"><b>the estimated cost for this training is US${round((((Training_Steps/1.1)/3600)+0.3+0.1)*0.60, 2)}.</b></span><br><br>
 
87
 
88
  return([gr.update(visible=True), gr.update(visible=True, value=summary_sentence)])
89
 
90
+ def update_steps(*files_list):
91
+ file_counter = 0
92
+ for i, files in enumerate(files_list):
93
+ if(files):
94
+ file_counter+=len(files)
95
+ return(gr.update(value=file_counter*200))
96
+
97
  def pad_image(image):
98
  w, h = image.size
99
  if w == h:
 
113
 
114
  torch.cuda.empty_cache()
115
  if 'pipe' in globals():
116
+ global pipe, pipe_is_set
117
  del pipe
118
+ pipe_is_set = False
119
  gc.collect()
120
 
121
  if os.path.exists("output_model"): shutil.rmtree('output_model')
 
144
  os.makedirs('output_model',exist_ok=True)
145
  uses_custom = inputs[-1]
146
  type_of_thing = inputs[-4]
 
147
  remove_attribution_after = inputs[-6]
148
+ experimental_face_improvement = inputs[-9]
149
+ which_model = inputs[-10]
150
  if(uses_custom):
151
  Training_Steps = int(inputs[-3])
152
  Train_text_encoder_for = int(inputs[-2])
 
154
  Training_Steps = file_counter*200
155
  if(type_of_thing == "object"):
156
  Train_text_encoder_for=30
 
 
157
  elif(type_of_thing == "style"):
158
  Train_text_encoder_for=15
159
+ elif(type_of_thing == "person"):
160
+ Train_text_encoder_for=65
161
 
 
162
  stptxt = int((Training_Steps*Train_text_encoder_for)/100)
163
+ if (type_of_thing == "object" or type_of_thing == "style" or (type_of_thing == "person" and not experimental_face_improvement)):
164
+ args_general = argparse.Namespace(
165
+ image_captions_filename = True,
166
+ train_text_encoder = True if stptxt > 0 else False,
167
+ stop_text_encoder_training = stptxt,
168
+ save_n_steps = 0,
169
+ pretrained_model_name_or_path = model_to_load,
170
+ instance_data_dir="instance_images",
171
+ class_data_dir=None,
172
+ output_dir="output_model",
173
+ instance_prompt="",
174
+ seed=42,
175
+ resolution=512,
176
+ mixed_precision="fp16",
177
+ train_batch_size=1,
178
+ gradient_accumulation_steps=1,
179
+ use_8bit_adam=True,
180
+ learning_rate=2e-6,
181
+ lr_scheduler="polynomial",
182
+ lr_warmup_steps = 0,
183
+ max_train_steps=Training_Steps,
184
+ )
185
+ print("Starting single training...")
186
+ lock_file = open("intraining.lock", "w")
187
+ lock_file.close()
188
+ run_training(args_general)
189
+ else:
190
+ args_txt_encoder = argparse.Namespace(
191
+ image_captions_filename=True,
192
+ train_text_encoder=True,
193
+ dump_only_text_encoder=True,
194
+ pretrained_model_name_or_path=model_to_load,
195
+ save_n_steps=0,
196
+ instance_data_dir="instance_images",
197
+ class_data_dir="Mix",
198
+ output_dir="output_model",
199
+ with_prior_preservation=True,
200
+ prior_loss_weight=1.0,
201
+ instance_prompt="",
202
+ seed=42,
203
+ resolution=512,
204
+ mixed_precision="fp16",
205
+ train_batch_size=1,
206
+ gradient_accumulation_steps=1,
207
+ gradient_checkpointing=True,
208
+ use_8bit_adam=True,
209
+ learning_rate=2e-6,
210
+ lr_scheduler="polynomial",
211
+ lr_warmup_steps = 0,
212
+ max_train_steps=stptxt,
213
+ num_class_images=200
214
+ )
215
+ args_unet = argparse.Namespace(
216
+ image_captions_filename=True,
217
+ train_only_unet=True,
218
+ save_n_steps=0,
219
+ pretrained_model_name_or_path=model_to_load,
220
+ instance_data_dir="instance_images",
221
+ output_dir="output_model",
222
+ instance_prompt="",
223
+ seed=42,
224
+ resolution=512,
225
+ mixed_precision="fp16",
226
+ train_batch_size=1,
227
+ gradient_accumulation_steps=1,
228
+ use_8bit_adam=True,
229
+ learning_rate=2e-6,
230
+ lr_scheduler="polynomial",
231
+ lr_warmup_steps = 0,
232
+ max_train_steps=Training_Steps,
233
+ )
234
+ print("Starting multi-training...")
235
+ lock_file = open("intraining.lock", "w")
236
+ lock_file.close()
237
+ run_training(args_txt_encoder)
238
+ run_training(args_unet)
239
  gc.collect()
240
  torch.cuda.empty_cache()
241
+ if(which_model == "v1-5"):
242
+ print("Adding Safety Checker to the model...")
243
+ shutil.copytree(f"{safety_checker}/feature_extractor", "output_model/feature_extractor")
244
+ shutil.copytree(f"{safety_checker}/safety_checker", "output_model/safety_checker")
245
+ shutil.copy(f"model_index.json", "output_model/model_index.json")
246
 
 
 
247
  if(not remove_attribution_after):
248
  print("Archiving model file...")
249
  with tarfile.open("diffusers_model.tar", "w") as tar:
250
+ tar.add("output_model", arcname=os.path.basename("output_model"))
251
  if os.path.exists("intraining.lock"): os.remove("intraining.lock")
252
  trained_file = open("hastrained.success", "w")
253
  trained_file.close()
 
264
  hf_token = inputs[-5]
265
  model_name = inputs[-7]
266
  where_to_upload = inputs[-8]
267
+ push(model_name, where_to_upload, hf_token, which_model, True)
268
  hardware_url = f"https://huggingface.co/spaces/{os.environ['SPACE_ID']}/hardware"
269
  headers = { "authorization" : f"Bearer {hf_token}"}
270
  body = {'flavor': 'cpu-basic'}
271
  requests.post(hardware_url, json = body, headers=headers)
272
 
273
+ pipe_is_set = False
274
+ def generate(prompt, steps):
275
  torch.cuda.empty_cache()
276
  from diffusers import StableDiffusionPipeline
277
+ global pipe_is_set
278
+ if(not pipe_is_set):
279
+ global pipe
280
+ pipe = StableDiffusionPipeline.from_pretrained("./output_model", torch_dtype=torch.float16)
281
+ pipe = pipe.to("cuda")
282
+ pipe_is_set = True
283
+
284
+ image = pipe(prompt, num_inference_steps=steps).images[0]
285
  return(image)
286
 
287
+ def push(model_name, where_to_upload, hf_token, which_model, comes_from_automated=False):
288
  if(not os.path.exists("model.ckpt")):
289
  convert("output_model", "model.ckpt")
290
  from huggingface_hub import HfApi, HfFolder, CommitOperationAdd
 
318
  tags:
319
  - text-to-image
320
  ---
321
+ ### {model_name} Dreambooth model trained by {api.whoami(token=hf_token)["name"]} with [Hugging Face Dreambooth Training Space](https://huggingface.co/spaces/multimodalart/dreambooth-training) with the {which_model} base model
322
 
323
  You run your new concept via `diffusers` [Colab Notebook for Inference](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/sd_dreambooth_inference.ipynb). Don't forget to use the concept prompts!
324
 
 
439
  top_description = gr.HTML(f'''
440
  <div class="gr-prose" style="max-width: 80%">
441
  <h2>You have successfully cloned the Dreambooth Training Space locally 🎉</h2>
442
+ <p>Do a <code>pip install requirements-local.txt</code></p>
443
  </div>
444
  ''')
445
+ gr.Markdown("# Dreambooth Training UI 💭")
446
+ gr.Markdown("Customize Stable Diffusion v1 or v2 (new!) by training it on a few examples of concepts, up to 3 concepts on the same model. This Space is based on TheLastBen's [fast-DreamBooth Colab](https://colab.research.google.com/github/TheLastBen/fast-stable-diffusion/blob/main/fast-DreamBooth.ipynb) with [🧨 diffusers](https://github.com/huggingface/diffusers)")
447
 
448
  with gr.Row() as what_are_you_training:
449
  type_of_thing = gr.Dropdown(label="What would you like to train?", choices=["object", "person", "style"], value="object", interactive=True)
450
+ base_model_to_use = gr.Dropdown(label="Which base model would you like to use?", choices=["v1-5", "v2-512"], value="v1-5", interactive=True)
451
+
452
  #Very hacky approach to emulate dynamically created Gradio components
453
  with gr.Row() as upload_your_concept:
454
  with gr.Column():
455
+ thing_description = gr.Markdown("You are going to train an `object`, please upload 5-10 images of the object you are planning on training on from different angles/perspectives. You must have the right to do so and you are liable for the images you use, example")
456
+ thing_experimental = gr.Checkbox(label="Improve faces (experimental) - takes 1.5x times training, can improve if you are training people's faces", visible=False, value=False)
457
  thing_image_example = gr.HTML('''<img src="file/cat-toy.png" />''')
458
  things_naming = gr.Markdown("You should name your concept with a unique made up word that has low chance of the model already knowing it (e.g.: `cttoy` here). Images will be automatically cropped to 512x512.")
459
+
460
  with gr.Column():
461
  file_collection = []
462
  concept_collection = []
 
502
 
503
  with gr.Accordion("Custom Settings", open=False):
504
  swap_auto_calculated = gr.Checkbox(label="Use custom settings")
505
+ gr.Markdown("If not checked, the number of steps and % of frozen encoder will be tuned automatically according to the amount of images you upload and whether you are training an `object`, `person` or `style` as follows: The number of steps is calculated by number of images uploaded multiplied by 200. The text-encoder is frozen after 10% of the steps for a style, 30% of the steps for an object and 65% trained for persons.")
506
  steps = gr.Number(label="How many steps", value=800)
507
  perc_txt_encoder = gr.Number(label="Percentage of the training steps the text-encoder should be trained as well", value=30)
508
+
509
  with gr.Box(visible=False) as training_summary:
510
  training_summary_text = gr.HTML("", visible=False, label="Training Summary")
511
+ is_advanced_visible = True if is_spaces else False
512
+ training_summary_checkbox = gr.Checkbox(label="Automatically remove paid GPU attribution and upload model to the Hugging Face Hub after training", value=False, visible=is_advanced_visible)
513
+ training_summary_model_name = gr.Textbox(label="Name of your model", visible=False)
514
+ training_summary_where_to_upload = gr.Dropdown(["My personal profile", "Public Library"], label="Upload to", visible=False)
515
+ training_summary_token_message = gr.Markdown("[A Hugging Face write access token](https://huggingface.co/settings/tokens), go to \"New token\" -> Role : Write. A regular read token won't work here.", visible=False)
516
+ training_summary_token = gr.Textbox(label="Hugging Face Write Token", type="password", visible=False)
517
+
 
 
 
 
 
518
  train_btn = gr.Button("Start Training")
519
 
520
  training_ongoing = gr.Markdown("## Training is ongoing ⌛... You can close this tab if you like or just wait. If you did not check the `Remove GPU After training`, you can come back here to try your model and upload it after training. Don't forget to remove the GPU attribution after you are done. ", visible=False)
 
528
  gr.Markdown("## Try your model")
529
  prompt = gr.Textbox(label="Type your prompt")
530
  result_image = gr.Image()
531
+ inference_steps = gr.Slider(minimum=1, maximum=150, value=50, step=1)
532
  generate_button = gr.Button("Generate Image")
533
 
534
  with gr.Box(visible=False) as push_to_hub:
 
545
  convert_button = gr.Button("Convert to CKPT", visible=False)
546
 
547
  #Swap the examples and the % of text encoder trained depending if it is an object, person or style
548
+ type_of_thing.change(fn=swap_text, inputs=[type_of_thing], outputs=[thing_description, thing_image_example, things_naming, perc_txt_encoder, thing_experimental], queue=False, show_progress=False)
549
 
550
+ #Swap the base model
551
+ base_model_to_use.change(fn=swap_base_model, inputs=base_model_to_use, outputs=[])
552
+
553
  #Update the summary box below the UI according to how many images are uploaded and whether users are using custom settings or not
554
  for file in file_collection:
555
+ file.change(fn=update_steps,inputs=file_collection, outputs=steps)
556
  file.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
557
+
558
  steps.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
559
  perc_txt_encoder.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary_text], queue=False)
560
 
 
565
  train_btn.click(lambda:gr.update(visible=True), inputs=None, outputs=training_ongoing)
566
 
567
  #The main train function
568
+ train_btn.click(fn=train, inputs=is_visible+concept_collection+file_collection+[base_model_to_use]+[thing_experimental]+[training_summary_where_to_upload]+[training_summary_model_name]+[training_summary_checkbox]+[training_summary_token]+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[result, try_your_model, push_to_hub, convert_button, training_ongoing, completed_training], queue=False)
569
 
570
  #Button to generate an image from your trained model after training
571
+ generate_button.click(fn=generate, inputs=[prompt, inference_steps], outputs=result_image, queue=False)
572
  #Button to push the model to the Hugging Face Hub
573
+ push_button.click(fn=push, inputs=[model_name, where_to_upload, hf_token, base_model_to_use], outputs=[success_message_upload, result], queue=False)
574
  #Button to convert the model to ckpt format
575
  convert_button.click(fn=convert_to_ckpt, inputs=[], outputs=result, queue=False)
576
 
requirements-local.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cu113
2
+ torch==1.12.1+cu113
3
+ torchvision==0.13.1+cu113
4
+ diffusers==0.9.0
5
+ accelerate==0.12.0
6
+ OmegaConf
7
+ wget
8
+ pytorch_lightning
9
+ huggingface_hub
10
+ ftfy
11
+ transformers
12
+ pyfiglet
13
+ triton==2.0.0.dev20220701
14
+ bitsandbytes
15
+ python-slugify
16
+ requests
17
+ tensorboard
18
+ pip install git+https://github.com/facebookresearch/xformers@7e4c02c#egg=xformers
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
  --extra-index-url https://download.pytorch.org/whl/cu113
2
  torch==1.12.1+cu113
3
  torchvision==0.13.1+cu113
4
- git+https://github.com/TheLastBen/diffusers
5
  accelerate==0.12.0
6
  OmegaConf
7
  wget
 
1
  --extra-index-url https://download.pytorch.org/whl/cu113
2
  torch==1.12.1+cu113
3
  torchvision==0.13.1+cu113
4
+ diffusers==0.9.0
5
  accelerate==0.12.0
6
  OmegaConf
7
  wget
train_dreambooth.py CHANGED
@@ -6,7 +6,7 @@ from pathlib import Path
6
  from typing import Optional
7
  import subprocess
8
  import sys
9
- import gc
10
 
11
  import torch
12
  import torch.nn.functional as F
@@ -54,7 +54,7 @@ def parse_args():
54
  "--class_data_dir",
55
  type=str,
56
  default=None,
57
- required=False,
58
  help="A folder containing the training data of class images.",
59
  )
60
  parser.add_argument(
@@ -334,6 +334,7 @@ class DreamBoothDataset(Dataset):
334
  pt=pt.replace("_"," ")
335
  pt=pt.replace("(","")
336
  pt=pt.replace(")","")
 
337
  instance_prompt = pt
338
  sys.stdout.write(" " +instance_prompt+" ")
339
  sys.stdout.flush()
@@ -746,7 +747,7 @@ def run_training(args_imported):
746
  pipeline.text_encoder.save_pretrained(frz_dir)
747
 
748
  if args.save_n_steps >= 200:
749
- if global_step < args.max_train_steps-100 and global_step+1==i:
750
  ckpt_name = "_step_" + str(global_step+1)
751
  save_dir = Path(args.output_dir+ckpt_name)
752
  save_dir=str(save_dir)
@@ -770,6 +771,7 @@ def run_training(args_imported):
770
  subprocess.call('cp -f '+frz_dir +'/*.* '+ save_dir+'/text_encoder', shell=True)
771
  chkpth=args.Session_dir+"/"+inst+".ckpt"
772
  subprocess.call('python /content/diffusers/scripts/convert_diffusers_to_original_stable_diffusion.py --model_path ' + save_dir + ' --checkpoint_path ' + chkpth + ' --half', shell=True)
 
773
  i=i+args.save_n_steps
774
 
775
  accelerator.wait_for_everyone()
@@ -819,3 +821,4 @@ def run_training(args_imported):
819
  if __name__ == "__main__":
820
  pass
821
  #main()
 
 
6
  from typing import Optional
7
  import subprocess
8
  import sys
9
+ import gc
10
 
11
  import torch
12
  import torch.nn.functional as F
 
54
  "--class_data_dir",
55
  type=str,
56
  default=None,
57
+ #required=False,
58
  help="A folder containing the training data of class images.",
59
  )
60
  parser.add_argument(
 
334
  pt=pt.replace("_"," ")
335
  pt=pt.replace("(","")
336
  pt=pt.replace(")","")
337
+ pt=pt.replace("-","")
338
  instance_prompt = pt
339
  sys.stdout.write(" " +instance_prompt+" ")
340
  sys.stdout.flush()
 
747
  pipeline.text_encoder.save_pretrained(frz_dir)
748
 
749
  if args.save_n_steps >= 200:
750
+ if global_step < args.max_train_steps and global_step+1==i:
751
  ckpt_name = "_step_" + str(global_step+1)
752
  save_dir = Path(args.output_dir+ckpt_name)
753
  save_dir=str(save_dir)
 
771
  subprocess.call('cp -f '+frz_dir +'/*.* '+ save_dir+'/text_encoder', shell=True)
772
  chkpth=args.Session_dir+"/"+inst+".ckpt"
773
  subprocess.call('python /content/diffusers/scripts/convert_diffusers_to_original_stable_diffusion.py --model_path ' + save_dir + ' --checkpoint_path ' + chkpth + ' --half', shell=True)
774
+ subprocess.call('rm -r '+ save_dir, shell=True)
775
  i=i+args.save_n_steps
776
 
777
  accelerator.wait_for_everyone()
 
821
  if __name__ == "__main__":
822
  pass
823
  #main()
824
+