wyysf commited on
Commit
c594797
β€’
1 Parent(s): 4aab1ba
.gitignore CHANGED
@@ -1 +1,2 @@
1
- gradio_cached_dir
 
 
1
+ gradio_cached_dir
2
+ jiangxin
ckpts/image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6/config.yaml ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: michelangelo-image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-170k
2
+ description: ''
3
+ tag: michelangelo-aligned-autoencoder+n4096+noise0.0+pfeat3+zeroemb0.0+normembFalse+lr5e-05+qkvbiasFalse+nfreq8+ln_postTrue
4
+ seed: 0
5
+ use_timestamp: true
6
+ timestamp: ''
7
+ exp_root_dir: outputs
8
+ exp_dir: outputs/michelangelo-image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-170k
9
+ trial_name: michelangelo-aligned-autoencoder+n4096+noise0.0+pfeat3+zeroemb0.0+normembFalse+lr5e-05+qkvbiasFalse+nfreq8+ln_postTrue
10
+ trial_dir: outputs/michelangelo-image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-170k/michelangelo-aligned-autoencoder+n4096+noise0.0+pfeat3+zeroemb0.0+normembFalse+lr5e-05+qkvbiasFalse+nfreq8+ln_postTrue
11
+ n_gpus: 8
12
+ resume: ./ckpts/3DNativeGeneration/michelangelo-image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-170k.ckpt
13
+ data_type: objaverse-datamodule
14
+ data:
15
+ root_dir: data/objaverse_clean/cap3d_high_quality_170k_images
16
+ data_type: occupancy
17
+ n_samples: 4096
18
+ noise_sigma: 0.0
19
+ load_supervision: false
20
+ supervision_type: occupancy
21
+ n_supervision: 10000
22
+ load_image: true
23
+ image_data_path: data/objaverse_clean/raw_data/images/cap3d_high_quality_170k
24
+ image_type: mvrgb
25
+ idx:
26
+ - 0
27
+ - 4
28
+ - 8
29
+ - 12
30
+ - 16
31
+ n_views: 4
32
+ load_caption: false
33
+ rotate_points: false
34
+ batch_size: 32
35
+ num_workers: 16
36
+ system_type: shape-diffusion-system
37
+ system:
38
+ val_samples_json: val_data/mv_images/val_samples_rgb_mvimage.json
39
+ z_scale_factor: 1.0
40
+ guidance_scale: 7.5
41
+ num_inference_steps: 50
42
+ eta: 0.0
43
+ shape_model_type: michelangelo-aligned-autoencoder
44
+ shape_model:
45
+ num_latents: 256
46
+ embed_dim: 64
47
+ point_feats: 3
48
+ out_dim: 1
49
+ num_freqs: 8
50
+ include_pi: false
51
+ heads: 12
52
+ width: 768
53
+ num_encoder_layers: 8
54
+ num_decoder_layers: 16
55
+ use_ln_post: true
56
+ init_scale: 0.25
57
+ qkv_bias: false
58
+ use_flash: true
59
+ use_checkpoint: true
60
+ condition_model_type: clip-embedder
61
+ condition_model:
62
+ pretrained_model_name_or_path: openai/clip-vit-large-patch14
63
+ encode_camera: true
64
+ camera_embeds_dim: 32
65
+ n_views: 4
66
+ empty_embeds_ratio: 0.1
67
+ normalize_embeds: false
68
+ zero_uncond_embeds: true
69
+ denoiser_model_type: simple-denoiser
70
+ denoiser_model:
71
+ input_channels: 64
72
+ output_channels: 64
73
+ n_ctx: 256
74
+ width: 768
75
+ layers: 6
76
+ heads: 12
77
+ context_dim: 1024
78
+ init_scale: 1.0
79
+ skip_ln: true
80
+ use_checkpoint: true
81
+ noise_scheduler_type: diffusers.schedulers.DDPMScheduler
82
+ noise_scheduler:
83
+ num_train_timesteps: 1000
84
+ beta_start: 0.00085
85
+ beta_end: 0.012
86
+ beta_schedule: scaled_linear
87
+ variance_type: fixed_small
88
+ clip_sample: false
89
+ denoise_scheduler_type: diffusers.schedulers.DDIMScheduler
90
+ denoise_scheduler:
91
+ num_train_timesteps: 1000
92
+ beta_start: 0.00085
93
+ beta_end: 0.012
94
+ beta_schedule: scaled_linear
95
+ clip_sample: false
96
+ set_alpha_to_one: false
97
+ steps_offset: 1
98
+ loggers:
99
+ wandb:
100
+ enable: false
101
+ project: JiangXin
102
+ name: text-to-shape-diffusion+michelangelo-image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-170k+michelangelo-aligned-autoencoder+n4096+noise0.0+pfeat3+zeroemb0.0+normembFalse+lr5e-05+qkvbiasFalse+nfreq8+ln_postTrue
103
+ loss:
104
+ loss_type: mse
105
+ lambda_diffusion: 1.0
106
+ optimizer:
107
+ name: AdamW
108
+ args:
109
+ lr: 5.0e-05
110
+ betas:
111
+ - 0.9
112
+ - 0.99
113
+ eps: 1.0e-06
114
+ scheduler:
115
+ name: SequentialLR
116
+ interval: step
117
+ schedulers:
118
+ - name: LinearLR
119
+ interval: step
120
+ args:
121
+ start_factor: 1.0e-06
122
+ end_factor: 1.0
123
+ total_iters: 5000
124
+ - name: CosineAnnealingLR
125
+ interval: step
126
+ args:
127
+ T_max: 5000
128
+ eta_min: 0.0
129
+ milestones:
130
+ - 5000
131
+ trainer:
132
+ num_nodes: 2
133
+ max_epochs: 100000
134
+ log_every_n_steps: 5
135
+ num_sanity_val_steps: 1
136
+ check_val_every_n_epoch: 3
137
+ enable_progress_bar: true
138
+ precision: 16-mixed
139
+ strategy: ddp_find_unused_parameters_true
140
+ checkpoint:
141
+ save_last: true
142
+ save_top_k: -1
143
+ every_n_train_steps: 5000
ckpts/image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6/model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41248dba953cad356c491e7584b4171920f2ad95af10b0f78225eda867dbb7c4
3
+ size 3722911570
craftsman/models/autoencoders/__pycache__/michelangelo_autoencoder.cpython-38.pyc CHANGED
Binary files a/craftsman/models/autoencoders/__pycache__/michelangelo_autoencoder.cpython-38.pyc and b/craftsman/models/autoencoders/__pycache__/michelangelo_autoencoder.cpython-38.pyc differ
 
craftsman/models/autoencoders/michelangelo_autoencoder.py CHANGED
@@ -324,3 +324,81 @@ class MichelangeloAutoencoder(AutoEncoder):
324
  logits = self.decoder(queries, latents).squeeze(-1)
325
 
326
  return logits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  logits = self.decoder(queries, latents).squeeze(-1)
325
 
326
  return logits
327
+
328
+
329
+
330
+
331
+ @craftsman.register("michelangelo-aligned-autoencoder")
332
+ class MichelangeloAlignedAutoencoder(MichelangeloAutoencoder):
333
+ r"""
334
+ A VAE model for encoding shapes into latents and decoding latent representations into shapes.
335
+ """
336
+ @dataclass
337
+ class Config(MichelangeloAutoencoder.Config):
338
+ clip_model_version: Optional[str] = None
339
+
340
+ cfg: Config
341
+
342
+ def configure(self) -> None:
343
+ if self.cfg.clip_model_version is not None:
344
+ self.clip_model: CLIPModel = CLIPModel.from_pretrained(self.cfg.clip_model_version)
345
+ self.projection = nn.Parameter(torch.empty(self.cfg.width, self.clip_model.projection_dim))
346
+ self.logit_scale = torch.exp(self.clip_model.logit_scale.data)
347
+ nn.init.normal_(self.projection, std=self.clip_model.projection_dim ** -0.5)
348
+ else:
349
+ self.projection = nn.Parameter(torch.empty(self.cfg.width, 768))
350
+ nn.init.normal_(self.projection, std=768 ** -0.5)
351
+
352
+ self.cfg.num_latents = self.cfg.num_latents + 1
353
+
354
+ super().configure()
355
+
356
+ def encode(self,
357
+ surface: torch.FloatTensor,
358
+ sample_posterior: bool = True):
359
+ """
360
+ Args:
361
+ surface (torch.FloatTensor): [B, N, 3+C]
362
+ sample_posterior (bool):
363
+
364
+ Returns:
365
+ latents (torch.FloatTensor)
366
+ posterior (DiagonalGaussianDistribution or None):
367
+ """
368
+ assert surface.shape[-1] == 3 + self.cfg.point_feats, f"\
369
+ Expected {3 + self.cfg.point_feats} channels, got {surface.shape[-1]}"
370
+
371
+ pc, feats = surface[..., :3], surface[..., 3:] # B, n_samples, 3
372
+ shape_latents = self.encoder(pc, feats) # B, num_latents, width
373
+ shape_embeds = shape_latents[:, 0] # B, width
374
+ shape_latents = shape_latents[:, 1:] # B, num_latents-1, width
375
+ kl_embed, posterior = self.encode_kl_embed(shape_latents, sample_posterior) # B, num_latents, embed_dim
376
+
377
+ shape_embeds = shape_embeds @ self.projection
378
+ return shape_embeds, kl_embed, posterior
379
+
380
+ def forward(self,
381
+ surface: torch.FloatTensor,
382
+ queries: torch.FloatTensor,
383
+ sample_posterior: bool = True):
384
+ """
385
+ Args:
386
+ surface (torch.FloatTensor): [B, N, 3+C]
387
+ queries (torch.FloatTensor): [B, P, 3]
388
+ sample_posterior (bool):
389
+
390
+ Returns:
391
+ shape_embeds (torch.FloatTensor): [B, width]
392
+ latents (torch.FloatTensor): [B, num_latents, embed_dim]
393
+ posterior (DiagonalGaussianDistribution or None).
394
+ logits (torch.FloatTensor): [B, P]
395
+ """
396
+
397
+ shape_embeds, kl_embed, posterior = self.encode(surface, sample_posterior=sample_posterior)
398
+
399
+ latents = self.decode(kl_embed) # [B, num_latents - 1, width]
400
+
401
+ logits = self.query(queries, latents) # [B,]
402
+
403
+ return shape_embeds, latents, posterior, logits
404
+
gradio_app.py CHANGED
@@ -170,8 +170,10 @@ if __name__=="__main__":
170
  # mvimg_model_config_list = ["CRM", "ImageDream", "Wonder3D"]
171
 
172
  # for 3D latent set diffusion
173
- ckpt_path = hf_hub_download(repo_id="wyysf/CraftsMan", filename="image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6/model.ckpt", repo_type="model")
174
- config_path = hf_hub_download(repo_id="wyysf/CraftsMan", filename="image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6/config.yaml", repo_type="model")
 
 
175
  scheluder_dict = OrderedDict({
176
  "DDIMScheduler": 'diffusers.schedulers.DDIMScheduler',
177
  # "DPMSolverMultistepScheduler": 'diffusers.schedulers.DPMSolverMultistepScheduler', # not support yet
 
170
  # mvimg_model_config_list = ["CRM", "ImageDream", "Wonder3D"]
171
 
172
  # for 3D latent set diffusion
173
+ ckpt_path = "./ckpts/image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6/model.ckpt"
174
+ config_path = "./ckpts/image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6/config.yaml"
175
+ # ckpt_path = hf_hub_download(repo_id="wyysf/CraftsMan", filename="image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6/model.ckpt", repo_type="model")
176
+ # config_path = hf_hub_download(repo_id="wyysf/CraftsMan", filename="image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6/config.yaml", repo_type="model")
177
  scheluder_dict = OrderedDict({
178
  "DDIMScheduler": 'diffusers.schedulers.DDIMScheduler',
179
  # "DPMSolverMultistepScheduler": 'diffusers.schedulers.DPMSolverMultistepScheduler', # not support yet