################################ language examples ############################## ## LlamaTokenizerFast tokenizer = MistralTokenizerFast.from_pretrained("mistralai/Mistral-7B-v0.1") tokenizer.encode("Hello this is a test") ################################ INITIALIZE Mistral MODEL ############################## ## INITIALIZE MistralStarConfig # Initializing a Mistral 7B style configuration configuration = MistralStarConfig() # Initializing a model from the Mistral 7B style configuration model = MistralModel(configuration) # Accessing the model configuration configuration = model.config ## INITIALIZE MistralStarConfig # Initializing a Mistral 7B style configuration configuration = MistralQuietConfig() # Initializing a model from the Mistral 7B style configuration model = MistralModel(configuration) # Accessing the model configuration configuration = model.config ## INITIALIZE MODEL # Initializing a Mistral 7B style configuration configuration = MistralConfig() # Initializing a model from the Mistral 7B style configuration model = MistralModel(configuration) # Accessing the model configuration configuration = model.config ## INITIALIZE MODEL-Examples # Download model and configuration from huggingface.co and cache. model = MistralModel.from_pretrained("mistralai/Mistral-7B-v0.1") # Model was saved using *save_pretrained('./test/saved_model/')* (for example purposes, not runnable). model = MistralModel.from_pretrained("./test/saved_model/") # Update configuration during loading. model = MistralModel.from_pretrained("mistralai/Mistral-7B-v0.1", output_attentions=True) assert model.config.output_attentions == True # Loading from a TF checkpoint file instead of a PyTorch model (slower, for example purposes, not runnable). config = MistralConfig.from_json_file("./tf_model/my_tf_model_config.json") model = MistralModel.from_pretrained("./tf_model/my_tf_checkpoint.ckpt.index", from_tf=True, config=config) # Loading from a Flax checkpoint file instead of a PyTorch model (slower) model = MistralModel.from_pretrained("mistralai/Mistral-7B-v0.1", from_flax=True) ################################ MistralForCausalLM ############################## ## MistralForCausalLM model = MistralForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1") tokenizer = MistralTokenizerFast.from_pretrained("mistralai/Mistral-7B-v0.1") prompt = "Hey, are you conscious? Can you talk to me?" inputs = tokenizer(prompt, return_tensors="pt") # Generate generate_ids = model.generate(inputs.input_ids, max_length=30) tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] ## "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you." ################################ MistralForSequenceClassification ############################## ### MistralForSequenceClassification - single-label classification: tokenizer = MistralTokenizerFast.from_pretrained("mistralai/Mistral-7B-v0.1") model = MistralForSequenceClassification.from_pretrained("mistralai/Mistral-7B-v0.1") inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") with torch.no_grad(): logits = model(**inputs).logits predicted_class_id = logits.argmax().item() # To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)` num_labels = len(model.config.id2label) model = MistralForSequenceClassification.from_pretrained("mistralai/Mistral-7B-v0.1", num_labels=num_labels) labels = torch.tensor([1]) loss = model(**inputs, labels=labels).loss ### MistralForSequenceClassification - multi-label classification: tokenizer = MistralTokenizerFast.from_pretrained("mistralai/Mistral-7B-v0.1") model = MistralForSequenceClassification.from_pretrained("mistralai/Mistral-7B-v0.1", problem_type="multi_label_classification") inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") with torch.no_grad(): logits = model(**inputs).logits predicted_class_ids = torch.arange(0, logits.shape[-1])[torch.sigmoid(logits).squeeze(dim=0) > 0.5] # To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)` num_labels = len(model.config.id2label) model = MistralForSequenceClassification.from_pretrained( "mistralai/Mistral-7B-v0.1", num_labels=num_labels, problem_type="multi_label_classification" ) labels = torch.sum( torch.nn.functional.one_hot(predicted_class_ids[None, :].clone(), num_classes=num_labels), dim=1 ).to(torch.float) loss = model(**inputs, labels=labels).loss ################################ MistralForTokenClassification ############################## ### MistralForTokenClassification tokenizer = MistralTokenizerFast.from_pretrained("mistralai/Mistral-7B-v0.1") model = MistralForTokenClassification.from_pretrained("mistralai/Mistral-7B-v0.1") inputs = tokenizer( "HuggingFace is a company based in Paris and New York", add_special_tokens=False, return_tensors="pt" ) with torch.no_grad(): logits = model(**inputs).logits predicted_token_class_ids = logits.argmax(-1) # Note that tokens are classified rather then input words which means that # there might be more predicted token classes than words. # Multiple token classes might account for the same word predicted_tokens_classes = [model.config.id2label[t.item()] for t in predicted_token_class_ids[0]] predicted_tokens_classes labels = predicted_token_class_ids loss = model(**inputs, labels=labels).loss round(loss.item(), 2) ################################ MistralForQuestionAnswering ############################## tokenizer = MistralTokenizerFast.from_pretrained("mistralai/Mistral-7B-v0.1") model = MistralForQuestionAnswering.from_pretrained("mistralai/Mistral-7B-v0.1") question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet" inputs = tokenizer(question, text, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) answer_start_index = outputs.start_logits.argmax() answer_end_index = outputs.end_logits.argmax() predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1] # target is "nice puppet" target_start_index = torch.tensor([14]) target_end_index = torch.tensor([15]) outputs = model(**inputs, start_positions=target_start_index, end_positions=target_end_index) loss = outputs.loss ################################ Mixtral MOA Models ############################## ################################################################ # Initializing a Mixtral 7B style configuration ################################################################ configuration = MixtralConfig() # Initializing a model from the Mixtral 7B style configuration model = MixtralModel(configuration) # Accessing the model configuration configuration = model.config ################################################################ ### The base model can be used as follows: ################################################################ model = MixtralForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-v0.1", device_map="auto") tokenizer = MistralTokenizerFast.from_pretrained("mistralai/Mixtral-8x7B-v0.1") prompt = "My favourite condiment is" model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda") model.to("cpu") generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True) tokenizer.batch_decode(generated_ids)[0] ################################################################ ### The instruction tuned model can be used as follows: ################################################################ model = MixtralForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1", device_map="auto") tokenizer = MistralTokenizerFast.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1") messages = [ {"role": "user", "content": "What is your favourite condiment?"}, {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"}, {"role": "user", "content": "Do you have mayonnaise recipes?"} ] model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda") generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True) tokenizer.batch_decode(generated_ids)[0] ################################ end of language examples ##############################