diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/bert/hf-internal-testing/tiny-random-BertModel/7963cd8aff5e83e172e9.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/bert/hf-internal-testing/tiny-random-BertModel/7963cd8aff5e83e172e9.json
deleted file mode 100644
index 78244465219ac6f0b9cbbcd10db202b09dbdf665..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/bert/hf-internal-testing/tiny-random-BertModel/7963cd8aff5e83e172e9.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["BertModel"], "attention_probs_dropout_prob": 0.1, "classifier_dropout": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 32, "initializer_range": 0.02, "intermediate_size": 37, "layer_norm_eps": 1e-12, "max_position_embeddings": 512, "model_type": "bert", "neuron": {"auto_cast": "matmul", "auto_cast_type": "bf16", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "dynamic_batch_size": false, "inline_weights_to_neff": false, "optlevel": "2", "output_attentions": false, "output_hidden_states": false, "static_batch_size": 1, "static_sequence_length": 64}, "num_attention_heads": 4, "num_hidden_layers": 5, "position_embedding_type": "absolute", "task": "text-classification", "type_vocab_size": 16, "use_cache": true, "vocab_size": 1124}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/4b3ca94bb4445bc28bc8.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/4b3ca94bb4445bc28bc8.json
deleted file mode 100644
index 66c56b2cfcaf36f773bbd1f20605e3b20b20191c..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/4b3ca94bb4445bc28bc8.json
+++ /dev/null
@@ -1 +0,0 @@
-{"apply_residual_connection_post_layernorm": false, "architectures": ["BloomForCausalLM"], "attention_dropout": 0.1, "bos_token_id": 1, "dtype": "float32", "eos_token_id": 2, "gradient_checkpointing": false, "hidden_dropout": 0.1, "hidden_size": 32, "id2label": {"0": "LABEL_0", "1": "LABEL_1", "2": "LABEL_2"}, "initializer_range": 0.02, "is_decoder": true, "label2id": {"LABEL_0": 0, "LABEL_1": 1, "LABEL_2": 2}, "layer_norm_epsilon": 1e-05, "model_type": "bloom", "n_head": 4, "n_layer": 5, "n_positions": 512, "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "hf-internal-testing/tiny-random-BloomForCausalLM", "checkpoint_revision": "92b07e9b7b4f986fa7c54e2ac3b9201b4ba5212e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "pad_token_id": 3, "pretraining_tp": 1, "seq_length": 7, "slow_but_exact": true, "torch_dtype": "float32", "type_vocab_size": 16, "use_cache": true, "vocab_size": 1024}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/4eccd2e66477ff5ef5b3.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/4eccd2e66477ff5ef5b3.json
deleted file mode 100644
index 8084d8fd84a3266614d40c01a9ece1039099f4e4..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/4eccd2e66477ff5ef5b3.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 1024, "hidden_size": 32, "n_layer": 5, "n_head": 4, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "use_cache": true, "pretraining_tp": 1, "apply_residual_connection_post_layernorm": false, "hidden_dropout": 0.1, "attention_dropout": 0.1, "bos_token_id": 1, "eos_token_id": 2, "slow_but_exact": true, "torch_dtype": "float32", "is_decoder": true, "architectures": ["BloomForCausalLM"], "id2label": {"0": "LABEL_0", "1": "LABEL_1", "2": "LABEL_2"}, "label2id": {"LABEL_0": 0, "LABEL_1": 1, "LABEL_2": 2}, "pad_token_id": 3, "dtype": "float32", "gradient_checkpointing": false, "model_type": "bloom", "n_positions": 512, "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "hf-internal-testing/tiny-random-BloomForCausalLM", "checkpoint_revision": "0f4f06f162cd67d34d03ee156484e4001d468500", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "seq_length": 7, "type_vocab_size": 16}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/d635309efcd921a3a3f6.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/d635309efcd921a3a3f6.json
deleted file mode 100644
index 8ca9cbe92d213fbceb03b98e6437d5d890dfd179..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/bloom/hf-internal-testing/tiny-random-BloomForCausalLM/d635309efcd921a3a3f6.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 1024, "hidden_size": 32, "n_layer": 5, "n_head": 4, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "use_cache": true, "pretraining_tp": 1, "apply_residual_connection_post_layernorm": false, "hidden_dropout": 0.1, "attention_dropout": 0.1, "bos_token_id": 1, "eos_token_id": 2, "slow_but_exact": true, "torch_dtype": "float32", "is_decoder": true, "architectures": ["BloomForCausalLM"], "id2label": {"0": "LABEL_0", "1": "LABEL_1", "2": "LABEL_2"}, "label2id": {"LABEL_0": 0, "LABEL_1": 1, "LABEL_2": 2}, "pad_token_id": 3, "dtype": "float32", "gradient_checkpointing": false, "model_type": "bloom", "n_positions": 512, "seq_length": 7, "type_vocab_size": 16, "neuron": {"task": "text-generation", "batch_size": 2, "num_cores": 2, "auto_cast_type": "fp32", "sequence_length": 100, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "hf-internal-testing/tiny-random-BloomForCausalLM", "checkpoint_revision": "0f4f06f162cd67d34d03ee156484e4001d468500"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/clip_text_model/echarlaix/tiny-random-stable-diffusion-xl/5f8fe230c0a8fbf429e8.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/clip_text_model/echarlaix/tiny-random-stable-diffusion-xl/5f8fe230c0a8fbf429e8.json
deleted file mode 100644
index f67669eb609aa67e11f259785ffbbf6cef0305ea..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/clip_text_model/echarlaix/tiny-random-stable-diffusion-xl/5f8fe230c0a8fbf429e8.json
+++ /dev/null
@@ -1 +0,0 @@
-{"model_type": "clip_text_model", "text_encoder": {"architectures": ["CLIPTextModel"], "attention_dropout": 0.0, "hidden_act": "gelu", "hidden_size": 32, "initializer_factor": 1.0, "initializer_range": 0.02, "intermediate_size": 37, "layer_norm_eps": 1e-05, "max_position_embeddings": 77, "model_type": "clip_text_model", "neuron": {"auto_cast": "all", "auto_cast_type": "bf16", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "dynamic_batch_size": false, "inline_weights_to_neff": false, "optlevel": "2", "output_attentions": false, "output_hidden_states": false, "static_batch_size": 1, "static_sequence_length": 77}, "num_attention_heads": 4, "num_hidden_layers": 5, "output_hidden_states": true, "task": "feature-extraction", "vocab_size": 1000}, "text_encoder_2": {"architectures": ["CLIPTextModelWithProjection"], "attention_dropout": 0.0, "hidden_act": "gelu", "hidden_size": 32, "initializer_factor": 1.0, "initializer_range": 0.02, "intermediate_size": 37, "layer_norm_eps": 1e-05, "max_position_embeddings": 77, "model_type": "clip_text_model", "neuron": {"auto_cast": "all", "auto_cast_type": "bf16", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "dynamic_batch_size": false, "inline_weights_to_neff": false, "optlevel": "2", "output_attentions": false, "output_hidden_states": false, "static_batch_size": 1, "static_sequence_length": 77}, "num_attention_heads": 4, "num_hidden_layers": 5, "output_hidden_states": true, "task": "feature-extraction", "vocab_size": 1000}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/clip_text_model/echarlaix/tiny-random-stable-diffusion-xl/7bd42087aa1c770a35ef.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/clip_text_model/echarlaix/tiny-random-stable-diffusion-xl/7bd42087aa1c770a35ef.json
deleted file mode 100644
index c8b34fe67554af9155faf3ae97b9ef3d94fe611e..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/clip_text_model/echarlaix/tiny-random-stable-diffusion-xl/7bd42087aa1c770a35ef.json
+++ /dev/null
@@ -1 +0,0 @@
-{"model_type": "clip_text_model", "text_encoder": {"architectures": ["CLIPTextModel"], "attention_dropout": 0.0, "hidden_act": "gelu", "hidden_size": 32, "initializer_factor": 1.0, "initializer_range": 0.02, "intermediate_size": 37, "layer_norm_eps": 1e-05, "max_position_embeddings": 77, "model_type": "clip_text_model", "neuron": {"auto_cast": "matmul", "auto_cast_type": "bf16", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "dynamic_batch_size": false, "inline_weights_to_neff": false, "optlevel": "2", "output_attentions": false, "output_hidden_states": false, "static_batch_size": 1, "static_sequence_length": 77}, "num_attention_heads": 4, "num_hidden_layers": 5, "output_hidden_states": true, "task": "feature-extraction", "vocab_size": 1000}, "text_encoder_2": {"architectures": ["CLIPTextModelWithProjection"], "attention_dropout": 0.0, "hidden_act": "gelu", "hidden_size": 32, "initializer_factor": 1.0, "initializer_range": 0.02, "intermediate_size": 37, "layer_norm_eps": 1e-05, "max_position_embeddings": 77, "model_type": "clip_text_model", "neuron": {"auto_cast": "matmul", "auto_cast_type": "bf16", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "dynamic_batch_size": false, "inline_weights_to_neff": false, "optlevel": "2", "output_attentions": false, "output_hidden_states": false, "static_batch_size": 1, "static_sequence_length": 77}, "num_attention_heads": 4, "num_hidden_layers": 5, "output_hidden_states": true, "task": "feature-extraction", "vocab_size": 1000}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/gpt2/2c3a47e6fca2fd6ac3cf.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/gpt2/2c3a47e6fca2fd6ac3cf.json
deleted file mode 100644
index d00f28594ee93c6094e47cd7dde67a3ca1e7e76c..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/gpt2/2c3a47e6fca2fd6ac3cf.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 50257, "n_positions": 1024, "n_embd": 768, "n_layer": 12, "n_head": 12, "n_inner": null, "activation_function": "gelu_new", "resid_pdrop": 0.1, "embd_pdrop": 0.1, "attn_pdrop": 0.1, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "summary_type": "cls_index", "summary_use_proj": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "scale_attn_weights": true, "use_cache": true, "scale_attn_by_inverse_layer_idx": false, "reorder_and_upcast_attn": false, "bos_token_id": 50256, "eos_token_id": 50256, "architectures": ["GPT2LMHeadModel"], "task_specific_params": {"text-generation": {"do_sample": true, "max_length": 50}}, "model_type": "gpt2", "n_ctx": 1024, "neuron": {"task": "text-generation", "batch_size": 4, "num_cores": 2, "auto_cast_type": "fp32", "sequence_length": 1024, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "gpt2", "checkpoint_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/gpt2/36e1657c052cd92f031f.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/gpt2/36e1657c052cd92f031f.json
deleted file mode 100644
index fcc1b479f025517a85ceaf2023e235b6e73ccd32..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/gpt2/36e1657c052cd92f031f.json
+++ /dev/null
@@ -1 +0,0 @@
-{"activation_function": "gelu_new", "architectures": ["GPT2LMHeadModel"], "attn_pdrop": 0.1, "bos_token_id": 50256, "embd_pdrop": 0.1, "eos_token_id": 50256, "initializer_range": 0.02, "layer_norm_epsilon": 1e-05, "model_type": "gpt2", "n_ctx": 1024, "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "n_positions": 1024, "neuron": {"auto_cast_type": "fp32", "batch_size": 4, "checkpoint_id": "gpt2", "checkpoint_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 1024, "task": "text-generation"}, "reorder_and_upcast_attn": false, "resid_pdrop": 0.1, "scale_attn_by_inverse_layer_idx": false, "scale_attn_weights": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "summary_type": "cls_index", "summary_use_proj": true, "task_specific_params": {"text-generation": {"do_sample": true, "max_length": 50}}, "use_cache": true, "vocab_size": 50257}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/gpt2/8ae9e463111b0fa38673.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/gpt2/8ae9e463111b0fa38673.json
deleted file mode 100644
index aeb86022d512f4d581757070095eedf687e630ae..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/gpt2/8ae9e463111b0fa38673.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 50257, "n_positions": 1024, "n_embd": 768, "n_layer": 12, "n_head": 12, "n_inner": null, "activation_function": "gelu_new", "resid_pdrop": 0.1, "embd_pdrop": 0.1, "attn_pdrop": 0.1, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "summary_type": "cls_index", "summary_use_proj": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "scale_attn_weights": true, "use_cache": true, "scale_attn_by_inverse_layer_idx": false, "reorder_and_upcast_attn": false, "bos_token_id": 50256, "eos_token_id": 50256, "architectures": ["GPT2LMHeadModel"], "task_specific_params": {"text-generation": {"do_sample": true, "max_length": 50}}, "model_type": "gpt2", "n_ctx": 1024, "neuron": {"auto_cast_type": "fp32", "batch_size": 4, "checkpoint_id": "gpt2", "checkpoint_revision": "607a30d783dfa663caf39e06633721c8d4cfcd7e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 1024, "task": "text-generation"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/165740fba2c72c56c43d.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/165740fba2c72c56c43d.json
deleted file mode 100644
index 5e198e5ad9dd636c282f26327971c0da96467d6c..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/165740fba2c72c56c43d.json
+++ /dev/null
@@ -1 +0,0 @@
-{"activation_function": "gelu_new", "attention_probs_dropout_prob": 0.1, "attn_pdrop": 0.1, "bos_token_id": 98, "embd_pdrop": 0.1, "eos_token_id": 98, "gradient_checkpointing": false, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "initializer_range": 0.02, "intermediate_size": 37, "layer_norm_epsilon": 1e-05, "model_type": "gpt2", "n_ctx": 512, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "n_positions": 512, "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "hf-internal-testing/tiny-random-gpt2", "checkpoint_revision": "91c0fe31d692dd8448d9bc06e8d1877345009e3b", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "pad_token_id": 98, "reorder_and_upcast_attn": false, "resid_pdrop": 0.1, "scale_attn_by_inverse_layer_idx": false, "scale_attn_weights": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "summary_type": "cls_index", "summary_use_proj": true, "type_vocab_size": 16, "use_cache": true, "vocab_size": 1000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/2037d1be0146ceb8d639.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/2037d1be0146ceb8d639.json
deleted file mode 100644
index 96736ee9d5a0a60d4c3e6fd10c8e174a962276cd..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/2037d1be0146ceb8d639.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 1000, "n_positions": 512, "n_embd": 32, "n_layer": 5, "n_head": 4, "n_inner": null, "activation_function": "gelu_new", "resid_pdrop": 0.1, "embd_pdrop": 0.1, "attn_pdrop": 0.1, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "summary_type": "cls_index", "summary_use_proj": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "scale_attn_weights": true, "use_cache": true, "scale_attn_by_inverse_layer_idx": false, "reorder_and_upcast_attn": false, "bos_token_id": 98, "eos_token_id": 98, "pad_token_id": 98, "attention_probs_dropout_prob": 0.1, "gradient_checkpointing": false, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "intermediate_size": 37, "model_type": "gpt2", "n_ctx": 512, "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "hf-internal-testing/tiny-random-gpt2", "checkpoint_revision": "91c0fe31d692dd8448d9bc06e8d1877345009e3b", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "type_vocab_size": 16}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/3edc0caf8d3805a471bd.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/3edc0caf8d3805a471bd.json
deleted file mode 100644
index 0a07d5d31fea8c90b4bffa306f569be8c76e9815..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/3edc0caf8d3805a471bd.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 1000, "n_positions": 512, "n_embd": 32, "n_layer": 5, "n_head": 4, "n_inner": null, "activation_function": "gelu_new", "resid_pdrop": 0.1, "embd_pdrop": 0.1, "attn_pdrop": 0.1, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "summary_type": "cls_index", "summary_use_proj": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "scale_attn_weights": true, "use_cache": true, "scale_attn_by_inverse_layer_idx": false, "reorder_and_upcast_attn": false, "bos_token_id": 98, "eos_token_id": 98, "pad_token_id": 98, "attention_probs_dropout_prob": 0.1, "gradient_checkpointing": false, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "intermediate_size": 37, "model_type": "gpt2", "n_ctx": 512, "type_vocab_size": 16, "neuron": {"task": "text-generation", "batch_size": 2, "num_cores": 2, "auto_cast_type": "fp32", "sequence_length": 100, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "hf-internal-testing/tiny-random-gpt2", "checkpoint_revision": "91c0fe31d692dd8448d9bc06e8d1877345009e3b"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/b85bd4ab62e3a18a8a0e.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/b85bd4ab62e3a18a8a0e.json
deleted file mode 100644
index 7f40d3ae2d63eda7cbc1cb4310a8e5272758107d..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/b85bd4ab62e3a18a8a0e.json
+++ /dev/null
@@ -1 +0,0 @@
-{"activation_function": "gelu_new", "attention_probs_dropout_prob": 0.1, "attn_pdrop": 0.1, "bos_token_id": 98, "embd_pdrop": 0.1, "eos_token_id": 98, "gradient_checkpointing": false, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "initializer_range": 0.02, "intermediate_size": 37, "layer_norm_epsilon": 1e-05, "model_type": "gpt2", "n_ctx": 512, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "n_positions": 512, "neuron": {"auto_cast_type": "bf16", "batch_size": 1, "checkpoint_id": "hf-internal-testing/tiny-random-gpt2", "checkpoint_revision": "91c0fe31d692dd8448d9bc06e8d1877345009e3b", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 1, "sequence_length": 64, "task": "text-generation"}, "pad_token_id": 98, "reorder_and_upcast_attn": false, "resid_pdrop": 0.1, "scale_attn_by_inverse_layer_idx": false, "scale_attn_weights": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "summary_type": "cls_index", "summary_use_proj": true, "type_vocab_size": 16, "use_cache": true, "vocab_size": 1000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/c3f3599091c9f0f4036f.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/c3f3599091c9f0f4036f.json
deleted file mode 100644
index b2fef82f4ad4c4d0dffe21067a21623dae8a80a4..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/gpt2/hf-internal-testing/tiny-random-gpt2/c3f3599091c9f0f4036f.json
+++ /dev/null
@@ -1 +0,0 @@
-{"activation_function": "gelu_new", "attention_probs_dropout_prob": 0.1, "attn_pdrop": 0.1, "bos_token_id": 98, "embd_pdrop": 0.1, "eos_token_id": 98, "gradient_checkpointing": false, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "initializer_range": 0.02, "intermediate_size": 37, "layer_norm_epsilon": 1e-05, "model_type": "gpt2", "n_ctx": 512, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "n_positions": 512, "neuron": {"auto_cast_type": "bf16", "batch_size": 1, "checkpoint_id": "hf-internal-testing/tiny-random-gpt2", "checkpoint_revision": "91c0fe31d692dd8448d9bc06e8d1877345009e3b", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 1, "sequence_length": 512, "task": "text-generation"}, "pad_token_id": 98, "reorder_and_upcast_attn": false, "resid_pdrop": 0.1, "scale_attn_by_inverse_layer_idx": false, "scale_attn_weights": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "summary_type": "cls_index", "summary_use_proj": true, "type_vocab_size": 16, "use_cache": true, "vocab_size": 1000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/6c7cfec071642e5fecee.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/6c7cfec071642e5fecee.json
deleted file mode 100644
index f70d6448e4174a87e8fe06e5651326e5945ba0a9..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/6c7cfec071642e5fecee.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 4096, "intermediate_size": 11008, "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float16", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "pad_token_id": 0, "eos_token_id": 2, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "NousResearch/Llama-2-7b-chat-hf", "checkpoint_revision": "37892f30c23786c0d5367d80481fa0d9fba93cf8", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/b9904e0c46d338b623d2.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/b9904e0c46d338b623d2.json
deleted file mode 100644
index d3fc80df684544689facdf4b43a119a8ad22dbd8..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/NousResearch/Llama-2-7b-chat-hf/b9904e0c46d338b623d2.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 4096, "intermediate_size": 11008, "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float16", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "pad_token_id": 0, "eos_token_id": 2, "model_type": "llama", "neuron": {"task": "text-generation", "batch_size": 4, "num_cores": 24, "auto_cast_type": "fp16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "NousResearch/Llama-2-7b-chat-hf", "checkpoint_revision": "37892f30c23786c0d5367d80481fa0d9fba93cf8"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/dacorvo/tiny-random-llama/62a76db84304b34ae305.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/dacorvo/tiny-random-llama/62a76db84304b34ae305.json
deleted file mode 100644
index c0fc1f377c0510cea111ef0d6abd2ea98a208db0..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/dacorvo/tiny-random-llama/62a76db84304b34ae305.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 128, "initializer_range": 0.02, "intermediate_size": 256, "max_position_embeddings": 512, "model_type": "llama", "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "dacorvo/tiny-random-llama", "checkpoint_revision": "7fdafd2fe6a2d31c6abb72ae60db606d8bb23196", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "num_attention_heads": 1, "num_hidden_layers": 1, "num_key_value_heads": 1, "pretraining_tp": 1, "rms_norm_eps": 1e-06, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float32", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/dacorvo/tiny-random-llama/7c595f241fa311d7b227.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/dacorvo/tiny-random-llama/7c595f241fa311d7b227.json
deleted file mode 100644
index 001490e86d95d953f24ecda29caf70f794c31afd..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/dacorvo/tiny-random-llama/7c595f241fa311d7b227.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 512, "hidden_size": 128, "intermediate_size": 256, "num_hidden_layers": 1, "num_attention_heads": 1, "num_key_value_heads": 1, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-06, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float32", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "llama", "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "dacorvo/tiny-random-llama", "checkpoint_revision": "7fdafd2fe6a2d31c6abb72ae60db606d8bb23196", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/dacorvo/tiny-random-llama/b650b9d41c4386d36a89.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/dacorvo/tiny-random-llama/b650b9d41c4386d36a89.json
deleted file mode 100644
index 218e70b904c546a08c119ec0df60d23a1abaef70..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/dacorvo/tiny-random-llama/b650b9d41c4386d36a89.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 512, "hidden_size": 128, "intermediate_size": 256, "num_hidden_layers": 1, "num_attention_heads": 1, "num_key_value_heads": 1, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-06, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float32", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "llama", "neuron": {"task": "text-generation", "batch_size": 2, "num_cores": 2, "auto_cast_type": "fp32", "sequence_length": 100, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "dacorvo/tiny-random-llama", "checkpoint_revision": "7fdafd2fe6a2d31c6abb72ae60db606d8bb23196"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/0b9e96edfb00f0f41781.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/0b9e96edfb00f0f41781.json
deleted file mode 100644
index 7d39b7ed5948bf3e27fb0490ad58d51a2407d926..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/0b9e96edfb00f0f41781.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 5120, "initializer_range": 0.02, "intermediate_size": 13824, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "meta-llama/Llama-2-13b-chat-hf", "checkpoint_revision": "29655417e51232f4f2b9b5d3e1418e5a9b04e80e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 40, "num_hidden_layers": 40, "num_key_value_heads": 40, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/2025960152bb1331bcc8.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/2025960152bb1331bcc8.json
deleted file mode 100644
index a9a605f8305ce09318369b52bb71cf6d15b168c7..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/2025960152bb1331bcc8.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 5120, "initializer_range": 0.02, "intermediate_size": 13824, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 8, "checkpoint_id": "meta-llama/Llama-2-13b-chat-hf", "checkpoint_revision": "29655417e51232f4f2b9b5d3e1418e5a9b04e80e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 12, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 40, "num_hidden_layers": 40, "num_key_value_heads": 40, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/2311cc196a0f70044562.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/2311cc196a0f70044562.json
deleted file mode 100644
index 9187e788be3ad766281ad36b5856b82de57898d9..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/2311cc196a0f70044562.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 5120, "initializer_range": 0.02, "intermediate_size": 13824, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 8, "checkpoint_id": "meta-llama/Llama-2-13b-chat-hf", "checkpoint_revision": "29655417e51232f4f2b9b5d3e1418e5a9b04e80e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 40, "num_hidden_layers": 40, "num_key_value_heads": 40, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/28dc6684e8ffd1957fcc.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/28dc6684e8ffd1957fcc.json
deleted file mode 100644
index d5f71133f2ec8fa00bbf067bebe32b99507bafac..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/28dc6684e8ffd1957fcc.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 5120, "initializer_range": 0.02, "intermediate_size": 13824, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 16, "checkpoint_id": "meta-llama/Llama-2-13b-chat-hf", "checkpoint_revision": "29655417e51232f4f2b9b5d3e1418e5a9b04e80e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 12, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 40, "num_hidden_layers": 40, "num_key_value_heads": 40, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/2e33fb05af9eda2154f6.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/2e33fb05af9eda2154f6.json
deleted file mode 100644
index 0b44b05740d0d4a9a08d4586d796507c20d06cc0..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/2e33fb05af9eda2154f6.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 5120, "initializer_range": 0.02, "intermediate_size": 13824, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 8, "checkpoint_id": "meta-llama/Llama-2-13b-chat-hf", "checkpoint_revision": "29655417e51232f4f2b9b5d3e1418e5a9b04e80e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 40, "num_hidden_layers": 40, "num_key_value_heads": 40, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/4e99ef3d07dc56a0f446.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/4e99ef3d07dc56a0f446.json
deleted file mode 100644
index e74a6aa49f6e51c9b1d81f4ed902d43038b68aea..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/4e99ef3d07dc56a0f446.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 5120, "initializer_range": 0.02, "intermediate_size": 13824, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 32, "checkpoint_id": "meta-llama/Llama-2-13b-chat-hf", "checkpoint_revision": "29655417e51232f4f2b9b5d3e1418e5a9b04e80e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 40, "num_hidden_layers": 40, "num_key_value_heads": 40, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/66c4d16781d41c4da7f7.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/66c4d16781d41c4da7f7.json
deleted file mode 100644
index 47a814727a2a8145339174365bbfcd960b6c28b2..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/66c4d16781d41c4da7f7.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 5120, "initializer_range": 0.02, "intermediate_size": 13824, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "meta-llama/Llama-2-13b-chat-hf", "checkpoint_revision": "29655417e51232f4f2b9b5d3e1418e5a9b04e80e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 40, "num_hidden_layers": 40, "num_key_value_heads": 40, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/8bf98c2e52fcfe14c5d7.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/8bf98c2e52fcfe14c5d7.json
deleted file mode 100644
index e03d067b83f16865256be90dc1bc1291817bd476..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/8bf98c2e52fcfe14c5d7.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 5120, "initializer_range": 0.02, "intermediate_size": 13824, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 1, "checkpoint_id": "meta-llama/Llama-2-13b-chat-hf", "checkpoint_revision": "29655417e51232f4f2b9b5d3e1418e5a9b04e80e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 12, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 40, "num_hidden_layers": 40, "num_key_value_heads": 40, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/9a378c29a33a95a92f36.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/9a378c29a33a95a92f36.json
deleted file mode 100644
index a5bdf7d02768edd1c1ec4a3610b7a1367e22c053..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/9a378c29a33a95a92f36.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 5120, "initializer_range": 0.02, "intermediate_size": 13824, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "meta-llama/Llama-2-13b-chat-hf", "checkpoint_revision": "29655417e51232f4f2b9b5d3e1418e5a9b04e80e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 12, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 40, "num_hidden_layers": 40, "num_key_value_heads": 40, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/baaf1893070cf034975e.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/baaf1893070cf034975e.json
deleted file mode 100644
index b01033203083db5514e9d17207a89e6d5071e277..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/baaf1893070cf034975e.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 5120, "initializer_range": 0.02, "intermediate_size": 13824, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 1, "checkpoint_id": "meta-llama/Llama-2-13b-chat-hf", "checkpoint_revision": "29655417e51232f4f2b9b5d3e1418e5a9b04e80e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 40, "num_hidden_layers": 40, "num_key_value_heads": 40, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/da44f10585a29a3d996c.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/da44f10585a29a3d996c.json
deleted file mode 100644
index 89b8169e423e092af12ba50a2dd11cb4947041f1..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/da44f10585a29a3d996c.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 5120, "initializer_range": 0.02, "intermediate_size": 13824, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 1, "checkpoint_id": "meta-llama/Llama-2-13b-chat-hf", "checkpoint_revision": "29655417e51232f4f2b9b5d3e1418e5a9b04e80e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 40, "num_hidden_layers": 40, "num_key_value_heads": 40, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/e9e9e0fa42e555b34152.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/e9e9e0fa42e555b34152.json
deleted file mode 100644
index 4a54dd2142abf9c6cea5db582d15daa7043d7e4b..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-13b-chat-hf/e9e9e0fa42e555b34152.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 5120, "initializer_range": 0.02, "intermediate_size": 13824, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 16, "checkpoint_id": "meta-llama/Llama-2-13b-chat-hf", "checkpoint_revision": "29655417e51232f4f2b9b5d3e1418e5a9b04e80e", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 40, "num_hidden_layers": 40, "num_key_value_heads": 40, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/04a286f25641fb37c0da.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/04a286f25641fb37c0da.json
deleted file mode 100644
index a4ed3a44f9e6c5dd4d80a3c383a64527a67da866..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/04a286f25641fb37c0da.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 12, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/09bd40b23d9644abeb4e.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/09bd40b23d9644abeb4e.json
deleted file mode 100644
index f4a3e636ebb485e5416982cbd766281f44a12239..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/09bd40b23d9644abeb4e.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 1, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 12, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/13cbb9ec2234e7ca3c26.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/13cbb9ec2234e7ca3c26.json
deleted file mode 100644
index cfa3942d30ac8e2a85d90d95d5932040b10f4039..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/13cbb9ec2234e7ca3c26.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 8, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/22d316bd8b3cdfb5514b.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/22d316bd8b3cdfb5514b.json
deleted file mode 100644
index 0c514701ab0939842c076dcab78db0f705f793ae..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/22d316bd8b3cdfb5514b.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 16, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/2e95f36ce181921d7a7b.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/2e95f36ce181921d7a7b.json
deleted file mode 100644
index 2f30a090a70bffaafb6c6e181c27057e15d40713..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/2e95f36ce181921d7a7b.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/39695bb575ebdb0bdfc1.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/39695bb575ebdb0bdfc1.json
deleted file mode 100644
index c2ecf306ba79ebfc9336b4c9ab5e7f6ba2859028..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/39695bb575ebdb0bdfc1.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 32, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 12, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/3a328b55c6445b520b8e.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/3a328b55c6445b520b8e.json
deleted file mode 100644
index 3a08bed7d6cd89dd5326bdb6083188a853b02f92..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/3a328b55c6445b520b8e.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 4096, "intermediate_size": 11008, "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float16", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "llama", "neuron": {"task": "text-generation", "batch_size": 16, "num_cores": 24, "auto_cast_type": "fp16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/3cad45de92cf81a0a16f.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/3cad45de92cf81a0a16f.json
deleted file mode 100644
index 3557a954d8cc2b20228f4bfb10d6e73dbe86958b..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/3cad45de92cf81a0a16f.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 8, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/3fa0092e4157c2ddb47b.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/3fa0092e4157c2ddb47b.json
deleted file mode 100644
index e53c6c8ec5cb0f5d23fc11da0559a8e8aa10f2e1..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/3fa0092e4157c2ddb47b.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/44c81784d74cd4713969.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/44c81784d74cd4713969.json
deleted file mode 100644
index 71e5e7b171bdb6aeffb73728941269f232025d3c..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/44c81784d74cd4713969.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 4096, "intermediate_size": 11008, "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float16", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "llama", "neuron": {"task": "text-generation", "batch_size": 8, "num_cores": 24, "auto_cast_type": "fp16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/4d28ec49befb4e87ec23.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/4d28ec49befb4e87ec23.json
deleted file mode 100644
index 7fd0f77083436b761e2017a2804a8db5c8e3eece..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/4d28ec49befb4e87ec23.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 32, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/5424b76f5346f48c87dc.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/5424b76f5346f48c87dc.json
deleted file mode 100644
index 586da8ab48a3ec23ce2229874e81d333ebd0345d..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/5424b76f5346f48c87dc.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 1, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/58eec5854a9f564f8a27.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/58eec5854a9f564f8a27.json
deleted file mode 100644
index e17d1b828cd92e19d778a0a6c2baf32e56a2404f..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/58eec5854a9f564f8a27.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 4096, "intermediate_size": 11008, "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float16", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 16, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/6a314b3d69ca329d2711.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/6a314b3d69ca329d2711.json
deleted file mode 100644
index 571644cf77882c1373478943036f95cf6f3f7703..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/6a314b3d69ca329d2711.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 4096, "intermediate_size": 11008, "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float16", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "llama", "neuron": {"task": "text-generation", "batch_size": 1, "num_cores": 24, "auto_cast_type": "fp16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/7a0a7caf91982c3c67b0.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/7a0a7caf91982c3c67b0.json
deleted file mode 100644
index b6c61842bfd7ebe476f18609a83e8f3c1e155e51..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/7a0a7caf91982c3c67b0.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 4096, "intermediate_size": 11008, "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float16", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 4, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/7a9166d6f50e7c014961.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/7a9166d6f50e7c014961.json
deleted file mode 100644
index d9b942a9250234ebf7d1d3dc8eecaafa9c9096e8..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/7a9166d6f50e7c014961.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 32, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/7e2416095368bdd0a5cd.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/7e2416095368bdd0a5cd.json
deleted file mode 100644
index 45033df9d6480802370abba113d4732d1bfac4bc..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/7e2416095368bdd0a5cd.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 16, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 12, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/ad1f7e0ca3ab366f91d4.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/ad1f7e0ca3ab366f91d4.json
deleted file mode 100644
index 126ab469041bd4993b166a55583245e30f26607f..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/ad1f7e0ca3ab366f91d4.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 4096, "intermediate_size": 11008, "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float16", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 8, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/d3acdf9ff1795080fd1c.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/d3acdf9ff1795080fd1c.json
deleted file mode 100644
index 137de61fc0a048bd16b7f5490141116caea96f5c..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/d3acdf9ff1795080fd1c.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 1, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/d49c97f13a4593173a49.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/d49c97f13a4593173a49.json
deleted file mode 100644
index 8e57a50375831667c86b3ae4590da021d4eacd98..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/d49c97f13a4593173a49.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 8, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 12, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/dd0ae8e9d982a2ed293b.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/dd0ae8e9d982a2ed293b.json
deleted file mode 100644
index 4f622db34f02fd43d560b8a82ded67831a9605a5..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/dd0ae8e9d982a2ed293b.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 4096, "intermediate_size": 11008, "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float16", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "llama", "neuron": {"task": "text-generation", "batch_size": 4, "num_cores": 24, "auto_cast_type": "fp16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/efa4119dabd9c1f8277f.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/efa4119dabd9c1f8277f.json
deleted file mode 100644
index f2664645ffbe6894a48eb6b9f3ef377c06ae6d14..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/efa4119dabd9c1f8277f.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 4096, "intermediate_size": 11008, "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float16", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 1, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/ff6b3f58fac88c2f96c4.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/ff6b3f58fac88c2f96c4.json
deleted file mode 100644
index f88abb5045d1730cb5418d567a9f36b3ea71d2eb..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/meta-llama/Llama-2-7b-chat-hf/ff6b3f58fac88c2f96c4.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp16", "batch_size": 16, "checkpoint_id": "meta-llama/Llama-2-7b-chat-hf", "checkpoint_revision": "92011f62d7604e261f748ec0cfe6329f31193e33", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/059827c299e8d9043f57.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/059827c299e8d9043f57.json
deleted file mode 100644
index 362d8a88eedcb88c31757b3416e39ebd8a80d0b9..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/059827c299e8d9043f57.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 2048, "intermediate_size": 5504, "num_hidden_layers": 24, "num_attention_heads": 16, "num_key_value_heads": 16, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float32", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "pad_token_id": 0, "eos_token_id": 2, "model_type": "llama", "neuron": {"auto_cast_type": "fp32", "batch_size": 4, "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 1024, "task": "text-generation"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/ece87a51a12bdc2169c6.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/ece87a51a12bdc2169c6.json
deleted file mode 100644
index 3ec01cc447a550c55b9ff0fd53391fdf6d47b7d2..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/ece87a51a12bdc2169c6.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 2048, "intermediate_size": 5504, "num_hidden_layers": 24, "num_attention_heads": 16, "num_key_value_heads": 16, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "torch_dtype": "float32", "tie_word_embeddings": false, "architectures": ["LlamaForCausalLM"], "bos_token_id": 1, "pad_token_id": 0, "eos_token_id": 2, "model_type": "llama", "neuron": {"task": "text-generation", "batch_size": 4, "num_cores": 2, "auto_cast_type": "fp32", "sequence_length": 1024, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/f1bd1033a11a1478af9e.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/f1bd1033a11a1478af9e.json
deleted file mode 100644
index 5f52b96291d3dc5f5e17bba794dfc2a380988c8b..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/llama/princeton-nlp/Sheared-LLaMA-1.3B/f1bd1033a11a1478af9e.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["LlamaForCausalLM"], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 5504, "max_position_embeddings": 4096, "model_type": "llama", "neuron": {"auto_cast_type": "fp32", "batch_size": 4, "checkpoint_id": "princeton-nlp/Sheared-LLaMA-1.3B", "checkpoint_revision": "a4b76938edbf571ea7d7d9904861cbdca08809b4", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 1024, "task": "text-generation"}, "num_attention_heads": 16, "num_hidden_layers": 24, "num_key_value_heads": 16, "pad_token_id": 0, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float32", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/1cfca4036d7b607639ea.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/1cfca4036d7b607639ea.json
deleted file mode 100644
index 31af98819e3fe66d4a30ddc20d3b5550febb6388..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/1cfca4036d7b607639ea.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "attention_probs_dropout_prob": 0.1, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 32, "initializer_range": 0.02, "intermediate_size": 37, "is_decoder": true, "max_position_embeddings": 512, "model_type": "mistral", "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "dacorvo/tiny-random-MistralForCausalLM", "checkpoint_revision": "81d453e3c8985649e9ee3d4c9378461029d1c73a", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "num_attention_heads": 4, "num_hidden_layers": 2, "num_key_value_heads": 2, "pad_token_id": 0, "rms_norm_eps": 1e-06, "rope_theta": 10000.0, "sliding_window": 4096, "tie_word_embeddings": false, "torch_dtype": "float32", "type_vocab_size": 16, "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/43c8baaa3d89767fe7f2.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/43c8baaa3d89767fe7f2.json
deleted file mode 100644
index c2228d7f2012b5f7d87e3ae0e83baace811771bc..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/43c8baaa3d89767fe7f2.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 512, "hidden_size": 32, "intermediate_size": 37, "num_hidden_layers": 2, "num_attention_heads": 4, "sliding_window": 4096, "num_key_value_heads": 2, "hidden_act": "gelu", "initializer_range": 0.02, "rms_norm_eps": 1e-06, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "float32", "tie_word_embeddings": false, "is_decoder": true, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "pad_token_id": 0, "eos_token_id": 2, "attention_probs_dropout_prob": 0.1, "hidden_dropout_prob": 0.1, "model_type": "mistral", "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "dacorvo/tiny-random-MistralForCausalLM", "checkpoint_revision": "81d453e3c8985649e9ee3d4c9378461029d1c73a", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "type_vocab_size": 16}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/f9b607c62a5393e0b42f.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/f9b607c62a5393e0b42f.json
deleted file mode 100644
index e10ee435cc28dbf6076c9f13ed2d75402909dc08..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/dacorvo/tiny-random-MistralForCausalLM/f9b607c62a5393e0b42f.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 512, "hidden_size": 32, "intermediate_size": 37, "num_hidden_layers": 2, "num_attention_heads": 4, "sliding_window": 4096, "num_key_value_heads": 2, "hidden_act": "gelu", "initializer_range": 0.02, "rms_norm_eps": 1e-06, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "float32", "tie_word_embeddings": false, "is_decoder": true, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "pad_token_id": 0, "eos_token_id": 2, "attention_probs_dropout_prob": 0.1, "hidden_dropout_prob": 0.1, "model_type": "mistral", "type_vocab_size": 16, "neuron": {"task": "text-generation", "batch_size": 2, "num_cores": 2, "auto_cast_type": "fp32", "sequence_length": 100, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "dacorvo/tiny-random-MistralForCausalLM", "checkpoint_revision": "81d453e3c8985649e9ee3d4c9378461029d1c73a"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.1/7a99245071db1d4e48e6.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.1/7a99245071db1d4e48e6.json
deleted file mode 100644
index e6549f2f7ecc2fc757e69148ddbe1c11b12160ca..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.1/7a99245071db1d4e48e6.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": 4096, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"auto_cast_type": "fp16", "batch_size": 1, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.1", "checkpoint_revision": "73068f3702d050a2fd5aa2ca1e612e5036429398", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.1/b0a5a2e1e8dd02dbe923.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.1/b0a5a2e1e8dd02dbe923.json
deleted file mode 100644
index f5d3e1177bc37a296eef3b01431dd4c272b98594..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.1/b0a5a2e1e8dd02dbe923.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": 4096, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"task": "text-generation", "batch_size": 1, "num_cores": 24, "auto_cast_type": "fp16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.1", "checkpoint_revision": "73068f3702d050a2fd5aa2ca1e612e5036429398"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/1245bb72b32d75db76f9.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/1245bb72b32d75db76f9.json
deleted file mode 100644
index bfa483779ffeb478c67a46b021b26beb31d6eab3..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/1245bb72b32d75db76f9.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 4, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 1000000.0, "sliding_window": null, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/1c4540cc29dcb3b99ff8.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/1c4540cc29dcb3b99ff8.json
deleted file mode 100644
index 861374c248966264a833725e14dccf2ff365aefc..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/1c4540cc29dcb3b99ff8.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": null, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 1000000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 1, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/2d6c1d9513debbcc5f7f.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/2d6c1d9513debbcc5f7f.json
deleted file mode 100644
index 8717e5ea05f45343e51d9d9127f9146fd889c85b..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/2d6c1d9513debbcc5f7f.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": null, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 1000000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"task": "text-generation", "batch_size": 1, "num_cores": 8, "auto_cast_type": "bf16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/41284a607429208347c0.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/41284a607429208347c0.json
deleted file mode 100644
index 9ef47daf5eb58f0f367916934f5ba30107401987..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/41284a607429208347c0.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": null, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 1000000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"task": "text-generation", "batch_size": 16, "num_cores": 8, "auto_cast_type": "bf16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/4620b76c4f39e71a8459.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/4620b76c4f39e71a8459.json
deleted file mode 100644
index 6fcf360e41b9981b9e39cbadd1b8f2610ad98d18..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/4620b76c4f39e71a8459.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": null, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 1000000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"task": "text-generation", "batch_size": 1, "num_cores": 24, "auto_cast_type": "fp16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/4fcf5530fe99f6f4f3e6.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/4fcf5530fe99f6f4f3e6.json
deleted file mode 100644
index 47d833102e13b8f6dec92c72f8ac075e2163bc03..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/4fcf5530fe99f6f4f3e6.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": null, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 1000000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 8, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/5c9bfea69bb260a59b0c.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/5c9bfea69bb260a59b0c.json
deleted file mode 100644
index e13275b621f32a6ae29f76bff50250ed818abe09..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/5c9bfea69bb260a59b0c.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": null, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 1000000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 16, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/64e0d5cd868faca4549f.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/64e0d5cd868faca4549f.json
deleted file mode 100644
index 8ed2a607b2ab3a2d8649610facd0b316dfdcfbc2..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/64e0d5cd868faca4549f.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 1, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 1000000.0, "sliding_window": null, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/698b844f07e07829c78e.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/698b844f07e07829c78e.json
deleted file mode 100644
index fd3d4e16d7eb1d8a5d460741c701dc0f9683e65f..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/698b844f07e07829c78e.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": null, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 1000000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"auto_cast_type": "fp16", "batch_size": 1, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 24, "sequence_length": 4096, "task": "text-generation"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/733bc79383c0cc56537e.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/733bc79383c0cc56537e.json
deleted file mode 100644
index bdb28c314300cfdaff23a06b828f00e63c112bcb..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/733bc79383c0cc56537e.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 16, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 1000000.0, "sliding_window": null, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/a2a8d3c93367a33b250c.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/a2a8d3c93367a33b250c.json
deleted file mode 100644
index c4bdd556d8750e46c9457aab7a849c13625e5804..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/a2a8d3c93367a33b250c.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": null, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 1000000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"task": "text-generation", "batch_size": 8, "num_cores": 8, "auto_cast_type": "bf16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/a61d8247a227bdcc16f2.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/a61d8247a227bdcc16f2.json
deleted file mode 100644
index f0d74fb21c4ddf85f4b619ec851da0b449e9934d..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/a61d8247a227bdcc16f2.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": null, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 1000000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"task": "text-generation", "batch_size": 32, "num_cores": 8, "auto_cast_type": "bf16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/ca240a79f952204527fc.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/ca240a79f952204527fc.json
deleted file mode 100644
index 8a9159b8625e0e272ebe7a732d2f41569efcddd0..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/ca240a79f952204527fc.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 8, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 1000000.0, "sliding_window": null, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/ca9ec6b16e1a62dbd649.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/ca9ec6b16e1a62dbd649.json
deleted file mode 100644
index 6a76e68941ce60f19fa2f6854254c6e52d02f640..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/ca9ec6b16e1a62dbd649.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": null, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 1000000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"task": "text-generation", "batch_size": 4, "num_cores": 8, "auto_cast_type": "bf16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/d75fbb50baa6a3b15792.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/d75fbb50baa6a3b15792.json
deleted file mode 100644
index 72862602592826178c33364062d75c57d4e63cf4..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/d75fbb50baa6a3b15792.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": null, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 1000000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 4, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/de7f8e3eb7e911ee8559.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/de7f8e3eb7e911ee8559.json
deleted file mode 100644
index c8f638043142eea0ce1c4d46bec597647f81d487..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/de7f8e3eb7e911ee8559.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": null, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 1000000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 32, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/eec3579b9122a80133a1.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/eec3579b9122a80133a1.json
deleted file mode 100644
index 1e62d97b754c28d5d38a5b14d60203cc413bab77..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-Instruct-v0.2/eec3579b9122a80133a1.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 32, "checkpoint_id": "mistralai/Mistral-7B-Instruct-v0.2", "checkpoint_revision": "41b61a33a2483885c981aa79e0df6b32407ed873", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 1000000.0, "sliding_window": null, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/06d7927d0a3a008f60b6.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/06d7927d0a3a008f60b6.json
deleted file mode 100644
index 8d9a9efc37410a41298d0cce65bcfe0c41947604..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/06d7927d0a3a008f60b6.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 4, "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 10000.0, "sliding_window": 4096, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/12212376258a9fed88b2.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/12212376258a9fed88b2.json
deleted file mode 100644
index a8ffb1006893726a4b83ed0978c9033c19b3c397..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/12212376258a9fed88b2.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": 4096, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 32, "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/15f566c81d1f67f6fd2e.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/15f566c81d1f67f6fd2e.json
deleted file mode 100644
index 444175f1611896b536a80f7bb5bcef7d39d0875c..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/15f566c81d1f67f6fd2e.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": 4096, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 8, "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/192bd7f0468f78103585.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/192bd7f0468f78103585.json
deleted file mode 100644
index 5b074670d9bd7212b7d649f0db550918ccc9e1b4..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/192bd7f0468f78103585.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": 4096, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"task": "text-generation", "batch_size": 8, "num_cores": 8, "auto_cast_type": "bf16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/1a58aae2f6a4ddf4015c.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/1a58aae2f6a4ddf4015c.json
deleted file mode 100644
index 2c461588d489ba62146cd9d980546324d54e9585..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/1a58aae2f6a4ddf4015c.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 1, "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 10000.0, "sliding_window": 4096, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/1dd1302a33c9e69730d2.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/1dd1302a33c9e69730d2.json
deleted file mode 100644
index af539ae303508a9efe3986c30f8988afc8c96bfa..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/1dd1302a33c9e69730d2.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": 4096, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 16, "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/21bbf61928931f72378f.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/21bbf61928931f72378f.json
deleted file mode 100644
index 4c82221ce76d8c25feeaf493782ed0bccb722e27..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/21bbf61928931f72378f.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": 4096, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"task": "text-generation", "batch_size": 1, "num_cores": 8, "auto_cast_type": "bf16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/236943bb2fa2da236c92.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/236943bb2fa2da236c92.json
deleted file mode 100644
index fb4e0a917d82a4718bfd64935dfc90a1728131d1..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/236943bb2fa2da236c92.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 16, "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 10000.0, "sliding_window": 4096, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/258d4c94617de7988dfd.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/258d4c94617de7988dfd.json
deleted file mode 100644
index c38b4656d31d8f54c5f1c03bd3316ac57a78a2e1..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/258d4c94617de7988dfd.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 32, "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 10000.0, "sliding_window": 4096, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/5141bf1fc4c434ce1ea7.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/5141bf1fc4c434ce1ea7.json
deleted file mode 100644
index dc3b3f34e83fe233c5c1dd80af5b07a822eb81e2..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/5141bf1fc4c434ce1ea7.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": 4096, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 4, "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/5384fd5e39aeb90be6d5.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/5384fd5e39aeb90be6d5.json
deleted file mode 100644
index 1ac313f7c77e95ba82f3d50c71ab6da08cf63d0a..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/5384fd5e39aeb90be6d5.json
+++ /dev/null
@@ -1 +0,0 @@
-{"architectures": ["MistralForCausalLM"], "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 14336, "max_position_embeddings": 32768, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 8, "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_theta": 10000.0, "sliding_window": 4096, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "use_cache": true, "vocab_size": 32000}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/67dd8bb21d625e22cd5b.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/67dd8bb21d625e22cd5b.json
deleted file mode 100644
index 1737c97088e03d46e16b1290cb0f7a7ecfa0a99f..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/67dd8bb21d625e22cd5b.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": 4096, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"task": "text-generation", "batch_size": 32, "num_cores": 8, "auto_cast_type": "bf16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/81be8abab2484836ea97.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/81be8abab2484836ea97.json
deleted file mode 100644
index ebb90c65929cb44220ef9b63127a1debed153012..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/81be8abab2484836ea97.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": 4096, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"task": "text-generation", "batch_size": 4, "num_cores": 8, "auto_cast_type": "bf16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/9e73f279a5220e46dcf1.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/9e73f279a5220e46dcf1.json
deleted file mode 100644
index a6735f184b2da971105c7c846194a388d2cfffea..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/9e73f279a5220e46dcf1.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": 4096, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"auto_cast_type": "bf16", "batch_size": 1, "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 8, "sequence_length": 4096, "task": "text-generation"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/9eb34c5c51dae5739571.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/9eb34c5c51dae5739571.json
deleted file mode 100644
index 41ab542aca365efd0911c9ba1cc82abd1e95adbc..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/mistral/mistralai/Mistral-7B-v0.1/9eb34c5c51dae5739571.json
+++ /dev/null
@@ -1 +0,0 @@
-{"vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "intermediate_size": 14336, "num_hidden_layers": 32, "num_attention_heads": 32, "sliding_window": 4096, "num_key_value_heads": 8, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "rope_theta": 10000.0, "attention_dropout": 0.0, "torch_dtype": "bfloat16", "tie_word_embeddings": false, "architectures": ["MistralForCausalLM"], "bos_token_id": 1, "eos_token_id": 2, "model_type": "mistral", "neuron": {"task": "text-generation", "batch_size": 16, "num_cores": 8, "auto_cast_type": "bf16", "sequence_length": 4096, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "mistralai/Mistral-7B-v0.1", "checkpoint_revision": "26bca36bde8333b5d7f72e9ed20ccda6a618af24"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/0eb6d808c895dd98f1d9.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/0eb6d808c895dd98f1d9.json
deleted file mode 100644
index dc4e71f6b73c2e8e64611200bace5fbd55063e3d..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/0eb6d808c895dd98f1d9.json
+++ /dev/null
@@ -1 +0,0 @@
-{"torch_dtype": "float32", "is_decoder": true, "architectures": ["OPTForCausalLM"], "bos_token_id": 2, "pad_token_id": 1, "eos_token_id": 2, "embed_dim": 16, "model_type": "opt", "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "hf-internal-testing/tiny-random-OPTForCausalLM", "checkpoint_revision": "190d1f4fc0011d2eaeaa05282e0fbd2445e4b11f", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "vocab_size": 50265, "max_position_embeddings": 100, "num_attention_heads": 4, "word_embed_proj_dim": 16, "ffn_dim": 4, "hidden_size": 16, "num_hidden_layers": 5, "dropout": 0.1, "attention_dropout": 0.1, "activation_function": "relu", "init_std": 0.02, "layerdrop": 0.0, "use_cache": true, "do_layer_norm_before": true, "enable_bias": true, "layer_norm_elementwise_affine": true, "_remove_final_layer_norm": false}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/244d78257ade535a7f03.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/244d78257ade535a7f03.json
deleted file mode 100644
index 43b4233503304a5de09277fa3d642923826a1475..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/244d78257ade535a7f03.json
+++ /dev/null
@@ -1 +0,0 @@
-{"torch_dtype": "float32", "is_decoder": true, "architectures": ["OPTForCausalLM"], "bos_token_id": 2, "pad_token_id": 1, "eos_token_id": 2, "embed_dim": 16, "model_type": "opt", "vocab_size": 50265, "max_position_embeddings": 100, "num_attention_heads": 4, "word_embed_proj_dim": 16, "ffn_dim": 4, "hidden_size": 16, "num_hidden_layers": 5, "dropout": 0.1, "attention_dropout": 0.1, "activation_function": "relu", "init_std": 0.02, "layerdrop": 0.0, "use_cache": true, "do_layer_norm_before": true, "enable_bias": true, "layer_norm_elementwise_affine": true, "_remove_final_layer_norm": false, "neuron": {"task": "text-generation", "batch_size": 2, "num_cores": 2, "auto_cast_type": "fp32", "sequence_length": 100, "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "checkpoint_id": "hf-internal-testing/tiny-random-OPTForCausalLM", "checkpoint_revision": "190d1f4fc0011d2eaeaa05282e0fbd2445e4b11f"}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/f3adfe5a9c79b5a36fd7.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/f3adfe5a9c79b5a36fd7.json
deleted file mode 100644
index 66b101aaeb2d861a356a107afb2aef0b7115a83d..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/opt/hf-internal-testing/tiny-random-OPTForCausalLM/f3adfe5a9c79b5a36fd7.json
+++ /dev/null
@@ -1 +0,0 @@
-{"_remove_final_layer_norm": false, "activation_function": "relu", "architectures": ["OPTForCausalLM"], "attention_dropout": 0.1, "bos_token_id": 2, "do_layer_norm_before": true, "dropout": 0.1, "embed_dim": 16, "enable_bias": true, "eos_token_id": 2, "ffn_dim": 4, "hidden_size": 16, "init_std": 0.02, "is_decoder": true, "layer_norm_elementwise_affine": true, "layerdrop": 0.0, "max_position_embeddings": 100, "model_type": "opt", "neuron": {"auto_cast_type": "fp32", "batch_size": 2, "checkpoint_id": "hf-internal-testing/tiny-random-OPTForCausalLM", "checkpoint_revision": "190d1f4fc0011d2eaeaa05282e0fbd2445e4b11f", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "num_cores": 2, "sequence_length": 100, "task": "text-generation"}, "num_attention_heads": 4, "num_hidden_layers": 5, "pad_token_id": 1, "torch_dtype": "float32", "use_cache": true, "vocab_size": 50265, "word_embed_proj_dim": 16}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/stable-diffusion/echarlaix/tiny-random-stable-diffusion-xl/277c773f1b2fa836ccaf.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/stable-diffusion/echarlaix/tiny-random-stable-diffusion-xl/277c773f1b2fa836ccaf.json
deleted file mode 100644
index e16698cbcb900d9f33dc3d35e43453f8547b4410..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/stable-diffusion/echarlaix/tiny-random-stable-diffusion-xl/277c773f1b2fa836ccaf.json
+++ /dev/null
@@ -1 +0,0 @@
-{"model_type": "stable-diffusion", "text_encoder": {"architectures": ["CLIPTextModel"], "attention_dropout": 0.0, "hidden_act": "gelu", "hidden_size": 32, "initializer_factor": 1.0, "initializer_range": 0.02, "intermediate_size": 37, "layer_norm_eps": 1e-05, "max_position_embeddings": 77, "model_type": "clip_text_model", "neuron": {"auto_cast": "all", "auto_cast_type": "bf16", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "dynamic_batch_size": true, "inline_weights_to_neff": false, "optlevel": "2", "output_attentions": false, "output_hidden_states": false, "static_batch_size": 1, "static_sequence_length": 77}, "num_attention_heads": 4, "num_hidden_layers": 5, "output_hidden_states": true, "task": "feature-extraction", "vocab_size": 1000}, "text_encoder_2": {"architectures": ["CLIPTextModelWithProjection"], "attention_dropout": 0.0, "hidden_act": "gelu", "hidden_size": 32, "initializer_factor": 1.0, "initializer_range": 0.02, "intermediate_size": 37, "layer_norm_eps": 1e-05, "max_position_embeddings": 77, "model_type": "clip_text_model", "neuron": {"auto_cast": "all", "auto_cast_type": "bf16", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "dynamic_batch_size": true, "inline_weights_to_neff": false, "optlevel": "2", "output_attentions": false, "output_hidden_states": false, "static_batch_size": 1, "static_sequence_length": 77}, "num_attention_heads": 4, "num_hidden_layers": 5, "output_hidden_states": true, "task": "feature-extraction", "vocab_size": 1000}, "unet": {"_class_name": "UNet2DConditionModel", "act_fn": "silu", "addition_embed_type": "text_time", "addition_embed_type_num_heads": 64, "addition_time_embed_dim": 8, "attention_head_dim": [2, 4], "attention_type": "default", "block_out_channels": [32, 64], "center_input_sample": false, "class_embed_type": null, "class_embeddings_concat": false, "conv_in_kernel": 3, "conv_out_kernel": 3, "cross_attention_dim": 64, "cross_attention_norm": null, "down_block_types": ["DownBlock2D", "CrossAttnDownBlock2D"], "downsample_padding": 1, "dropout": 0.0, "dual_cross_attention": false, "encoder_hid_dim": null, "encoder_hid_dim_type": null, "flip_sin_to_cos": true, "freq_shift": 0, "in_channels": 4, "layers_per_block": 2, "mid_block_only_cross_attention": null, "mid_block_scale_factor": 1, "mid_block_type": "UNetMidBlock2DCrossAttn", "neuron": {"auto_cast": "all", "auto_cast_type": "bf16", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "dynamic_batch_size": true, "inline_weights_to_neff": false, "optlevel": "2", "output_attentions": false, "output_hidden_states": false, "static_batch_size": 1, "static_height": 16, "static_num_channels": 4, "static_sequence_length": 77, "static_width": 16}, "norm_eps": 1e-05, "norm_num_groups": 32, "num_attention_heads": null, "num_class_embeds": null, "only_cross_attention": false, "out_channels": 4, "projection_class_embeddings_input_dim": 80, "resnet_out_scale_factor": 1.0, "resnet_skip_time_act": false, "resnet_time_scale_shift": "default", "reverse_transformer_layers_per_block": null, "task": "semantic-segmentation", "time_cond_proj_dim": null, "time_embedding_act_fn": null, "time_embedding_dim": null, "time_embedding_type": "positional", "timestep_post_act": null, "transformer_layers_per_block": [1, 2], "up_block_types": ["CrossAttnUpBlock2D", "UpBlock2D"], "upcast_attention": false, "use_linear_projection": true}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/stable-diffusion/hf-internal-testing/tiny-stable-diffusion-torch/3bbb1989a46dcfcab30c.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/stable-diffusion/hf-internal-testing/tiny-stable-diffusion-torch/3bbb1989a46dcfcab30c.json
deleted file mode 100644
index 331edc30670a9d90cb7f9160e345984540bd89db..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/stable-diffusion/hf-internal-testing/tiny-stable-diffusion-torch/3bbb1989a46dcfcab30c.json
+++ /dev/null
@@ -1 +0,0 @@
-{"model_type": "stable-diffusion", "text_encoder": {"architectures": ["CLIPTextModel"], "attention_dropout": 0.0, "dropout": 0.0, "hidden_act": "quick_gelu", "hidden_size": 32, "initializer_factor": 1.0, "initializer_range": 0.02, "intermediate_size": 37, "layer_norm_eps": 1e-05, "max_position_embeddings": 77, "model_type": "clip_text_model", "neuron": {"auto_cast": "matmul", "auto_cast_type": "bf16", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "dynamic_batch_size": false, "inline_weights_to_neff": false, "optlevel": "2", "output_attentions": false, "output_hidden_states": false, "static_batch_size": 1, "static_sequence_length": 77}, "num_attention_heads": 4, "num_hidden_layers": 5, "task": "feature-extraction", "vocab_size": 1000}, "unet": {"_class_name": "UNet2DConditionModel", "act_fn": "silu", "addition_embed_type": null, "addition_embed_type_num_heads": 64, "addition_time_embed_dim": null, "attention_head_dim": 8, "attention_type": "default", "block_out_channels": [32, 64], "center_input_sample": false, "class_embed_type": null, "class_embeddings_concat": false, "conv_in_kernel": 3, "conv_out_kernel": 3, "cross_attention_dim": 32, "cross_attention_norm": null, "down_block_types": ["DownBlock2D", "CrossAttnDownBlock2D"], "downsample_padding": 1, "dropout": 0.0, "dual_cross_attention": false, "encoder_hid_dim": null, "encoder_hid_dim_type": null, "flip_sin_to_cos": true, "freq_shift": 0, "in_channels": 4, "layers_per_block": 2, "mid_block_only_cross_attention": null, "mid_block_scale_factor": 1, "mid_block_type": "UNetMidBlock2DCrossAttn", "neuron": {"auto_cast": "matmul", "auto_cast_type": "bf16", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "dynamic_batch_size": false, "inline_weights_to_neff": false, "optlevel": "2", "output_attentions": false, "output_hidden_states": false, "static_batch_size": 1, "static_height": 32, "static_num_channels": 4, "static_sequence_length": 77, "static_width": 32}, "norm_eps": 1e-05, "norm_num_groups": 32, "num_attention_heads": null, "num_class_embeds": null, "only_cross_attention": false, "out_channels": 4, "projection_class_embeddings_input_dim": null, "resnet_out_scale_factor": 1.0, "resnet_skip_time_act": false, "resnet_time_scale_shift": "default", "reverse_transformer_layers_per_block": null, "task": "semantic-segmentation", "time_cond_proj_dim": null, "time_embedding_act_fn": null, "time_embedding_dim": null, "time_embedding_type": "positional", "timestep_post_act": null, "transformer_layers_per_block": 1, "up_block_types": ["CrossAttnUpBlock2D", "UpBlock2D"], "upcast_attention": false, "use_linear_projection": false}}
\ No newline at end of file
diff --git a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/stable-diffusion/hf-internal-testing/tiny-stable-diffusion-torch/3e7aba58821274ecca60.json b/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/stable-diffusion/hf-internal-testing/tiny-stable-diffusion-torch/3e7aba58821274ecca60.json
deleted file mode 100644
index 4f150d2d85b77c6b398bdc876ec2f76447bfe0e1..0000000000000000000000000000000000000000
--- a/neuronxcc-2.13.66.0+6dfecc895/0_REGISTRY/0.0.21.dev0/inference/stable-diffusion/hf-internal-testing/tiny-stable-diffusion-torch/3e7aba58821274ecca60.json
+++ /dev/null
@@ -1 +0,0 @@
-{"model_type": "stable-diffusion", "text_encoder": {"architectures": ["CLIPTextModel"], "attention_dropout": 0.0, "dropout": 0.0, "hidden_act": "quick_gelu", "hidden_size": 32, "initializer_factor": 1.0, "initializer_range": 0.02, "intermediate_size": 37, "layer_norm_eps": 1e-05, "max_position_embeddings": 77, "model_type": "clip_text_model", "neuron": {"auto_cast": "matmul", "auto_cast_type": "bf16", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "dynamic_batch_size": false, "inline_weights_to_neff": false, "optlevel": "2", "output_attentions": false, "output_hidden_states": false, "static_batch_size": 1, "static_sequence_length": 77}, "num_attention_heads": 4, "num_hidden_layers": 5, "task": "feature-extraction", "vocab_size": 1000}, "unet": {"_class_name": "UNet2DConditionModel", "act_fn": "silu", "addition_embed_type": null, "addition_embed_type_num_heads": 64, "addition_time_embed_dim": null, "attention_head_dim": 8, "attention_type": "default", "block_out_channels": [32, 64], "center_input_sample": false, "class_embed_type": null, "class_embeddings_concat": false, "conv_in_kernel": 3, "conv_out_kernel": 3, "cross_attention_dim": 32, "cross_attention_norm": null, "down_block_types": ["DownBlock2D", "CrossAttnDownBlock2D"], "downsample_padding": 1, "dropout": 0.0, "dual_cross_attention": false, "encoder_hid_dim": null, "encoder_hid_dim_type": null, "flip_sin_to_cos": true, "freq_shift": 0, "in_channels": 4, "layers_per_block": 2, "mid_block_only_cross_attention": null, "mid_block_scale_factor": 1, "mid_block_type": "UNetMidBlock2DCrossAttn", "neuron": {"auto_cast": "matmul", "auto_cast_type": "bf16", "compiler_type": "neuronx-cc", "compiler_version": "2.13.66.0+6dfecc895", "dynamic_batch_size": false, "inline_weights_to_neff": false, "optlevel": "2", "output_attentions": false, "output_hidden_states": false, "static_batch_size": 64, "static_height": 32, "static_num_channels": 4, "static_sequence_length": 77, "static_width": 32}, "norm_eps": 1e-05, "norm_num_groups": 32, "num_attention_heads": null, "num_class_embeds": null, "only_cross_attention": false, "out_channels": 4, "projection_class_embeddings_input_dim": null, "resnet_out_scale_factor": 1.0, "resnet_skip_time_act": false, "resnet_time_scale_shift": "default", "reverse_transformer_layers_per_block": null, "task": "semantic-segmentation", "time_cond_proj_dim": null, "time_embedding_act_fn": null, "time_embedding_dim": null, "time_embedding_type": "positional", "timestep_post_act": null, "transformer_layers_per_block": 1, "up_block_types": ["CrossAttnUpBlock2D", "UpBlock2D"], "upcast_attention": false, "use_linear_projection": false}}
\ No newline at end of file