File size: 8,690 Bytes
8693ea2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 |
################################ language examples ##############################
## LlamaTokenizerFast
tokenizer = MistralTokenizerFast.from_pretrained("mistralai/Mistral-7B-v0.1")
tokenizer.encode("Hello this is a test")
################################ INITIALIZE Mistral MODEL ##############################
## INITIALIZE MistralStarConfig
# Initializing a Mistral 7B style configuration
configuration = MistralStarConfig()
# Initializing a model from the Mistral 7B style configuration
model = MistralModel(configuration)
# Accessing the model configuration
configuration = model.config
## INITIALIZE MistralStarConfig
# Initializing a Mistral 7B style configuration
configuration = MistralQuietConfig()
# Initializing a model from the Mistral 7B style configuration
model = MistralModel(configuration)
# Accessing the model configuration
configuration = model.config
## INITIALIZE MODEL
# Initializing a Mistral 7B style configuration
configuration = MistralConfig()
# Initializing a model from the Mistral 7B style configuration
model = MistralModel(configuration)
# Accessing the model configuration
configuration = model.config
## INITIALIZE MODEL-Examples
# Download model and configuration from huggingface.co and cache.
model = MistralModel.from_pretrained("mistralai/Mistral-7B-v0.1")
# Model was saved using *save_pretrained('./test/saved_model/')* (for example purposes, not runnable).
model = MistralModel.from_pretrained("./test/saved_model/")
# Update configuration during loading.
model = MistralModel.from_pretrained("mistralai/Mistral-7B-v0.1", output_attentions=True)
assert model.config.output_attentions == True
# Loading from a TF checkpoint file instead of a PyTorch model (slower, for example purposes, not runnable).
config = MistralConfig.from_json_file("./tf_model/my_tf_model_config.json")
model = MistralModel.from_pretrained("./tf_model/my_tf_checkpoint.ckpt.index", from_tf=True, config=config)
# Loading from a Flax checkpoint file instead of a PyTorch model (slower)
model = MistralModel.from_pretrained("mistralai/Mistral-7B-v0.1", from_flax=True)
################################ MistralForCausalLM ##############################
## MistralForCausalLM
model = MistralForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1")
tokenizer = MistralTokenizerFast.from_pretrained("mistralai/Mistral-7B-v0.1")
prompt = "Hey, are you conscious? Can you talk to me?"
inputs = tokenizer(prompt, return_tensors="pt")
# Generate
generate_ids = model.generate(inputs.input_ids, max_length=30)
tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
## "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
################################ MistralForSequenceClassification ##############################
### MistralForSequenceClassification - single-label classification:
tokenizer = MistralTokenizerFast.from_pretrained("mistralai/Mistral-7B-v0.1")
model = MistralForSequenceClassification.from_pretrained("mistralai/Mistral-7B-v0.1")
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
with torch.no_grad():
logits = model(**inputs).logits
predicted_class_id = logits.argmax().item()
# To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)`
num_labels = len(model.config.id2label)
model = MistralForSequenceClassification.from_pretrained("mistralai/Mistral-7B-v0.1", num_labels=num_labels)
labels = torch.tensor([1])
loss = model(**inputs, labels=labels).loss
### MistralForSequenceClassification - multi-label classification:
tokenizer = MistralTokenizerFast.from_pretrained("mistralai/Mistral-7B-v0.1")
model = MistralForSequenceClassification.from_pretrained("mistralai/Mistral-7B-v0.1", problem_type="multi_label_classification")
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
with torch.no_grad():
logits = model(**inputs).logits
predicted_class_ids = torch.arange(0, logits.shape[-1])[torch.sigmoid(logits).squeeze(dim=0) > 0.5]
# To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)`
num_labels = len(model.config.id2label)
model = MistralForSequenceClassification.from_pretrained(
"mistralai/Mistral-7B-v0.1", num_labels=num_labels, problem_type="multi_label_classification"
)
labels = torch.sum(
torch.nn.functional.one_hot(predicted_class_ids[None, :].clone(), num_classes=num_labels), dim=1
).to(torch.float)
loss = model(**inputs, labels=labels).loss
################################ MistralForTokenClassification ##############################
### MistralForTokenClassification
tokenizer = MistralTokenizerFast.from_pretrained("mistralai/Mistral-7B-v0.1")
model = MistralForTokenClassification.from_pretrained("mistralai/Mistral-7B-v0.1")
inputs = tokenizer(
"HuggingFace is a company based in Paris and New York", add_special_tokens=False, return_tensors="pt"
)
with torch.no_grad():
logits = model(**inputs).logits
predicted_token_class_ids = logits.argmax(-1)
# Note that tokens are classified rather then input words which means that
# there might be more predicted token classes than words.
# Multiple token classes might account for the same word
predicted_tokens_classes = [model.config.id2label[t.item()] for t in predicted_token_class_ids[0]]
predicted_tokens_classes
labels = predicted_token_class_ids
loss = model(**inputs, labels=labels).loss
round(loss.item(), 2)
################################ MistralForQuestionAnswering ##############################
tokenizer = MistralTokenizerFast.from_pretrained("mistralai/Mistral-7B-v0.1")
model = MistralForQuestionAnswering.from_pretrained("mistralai/Mistral-7B-v0.1")
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
inputs = tokenizer(question, text, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
answer_start_index = outputs.start_logits.argmax()
answer_end_index = outputs.end_logits.argmax()
predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
# target is "nice puppet"
target_start_index = torch.tensor([14])
target_end_index = torch.tensor([15])
outputs = model(**inputs, start_positions=target_start_index, end_positions=target_end_index)
loss = outputs.loss
################################ Mixtral MOA Models ##############################
################################################################
# Initializing a Mixtral 7B style configuration
################################################################
configuration = MixtralConfig()
# Initializing a model from the Mixtral 7B style configuration
model = MixtralModel(configuration)
# Accessing the model configuration
configuration = model.config
################################################################
### The base model can be used as follows:
################################################################
model = MixtralForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-v0.1", device_map="auto")
tokenizer = MistralTokenizerFast.from_pretrained("mistralai/Mixtral-8x7B-v0.1")
prompt = "My favourite condiment is"
model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
model.to("cpu")
generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
tokenizer.batch_decode(generated_ids)[0]
################################################################
### The instruction tuned model can be used as follows:
################################################################
model = MixtralForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1", device_map="auto")
tokenizer = MistralTokenizerFast.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
messages = [
{"role": "user", "content": "What is your favourite condiment?"},
{"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
{"role": "user", "content": "Do you have mayonnaise recipes?"}
]
model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
tokenizer.batch_decode(generated_ids)[0]
################################ end of language examples ##############################
|