Custom architectures with HuggingFace 🤗
Baseline
In this section we will create a baseline model and train it, in our example we will train a simple CNN model against the MNIST dataset.
import torch
from torch import nn, optim
import torchvision
from torchvision import datasets, transforms
import torch.nn.functional as F
from torch.utils.data import DataLoader
train_dataset = datasets.MNIST(root='./data', train=True, download=True,transform=transforms.ToTensor())
# Define batch size and number of workers (if any) for data loading
batch_size = 64
num_workers = 2
# Create a DataLoader for the training dataset with specified batch size and number of workers
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
then define our model
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
self.softmax = nn.Softmax(dim=-1)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
output = self.softmax(x)
return output
then train our model and save the weights
model = Net()
criterion = nn.CrossEntropyLoss()
learning_rate = 0.01
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
epochs = 10
for epoch in range(epochs):
running_loss = 0.0
for i, data in enumerate(train_dataloader, 0):
inputs, labels = data[0], data[1]
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if i % 20 == 19: # print every 20 mini-batches
print('Epoch [%d/%d], Step [%d/%d], Loss: %.3f' %
(epoch + 1, epochs, i + 1, len(train_dataloader),running_loss / 20))
running_loss = 0.0
# Save the entire model and other necessary information
checkpoint = {
'state_dict': model.state_dict(),
}
# Specify the file path where you want to save the model
torch.save(checkpoint, 'model.pth')
custom model
to create a custom architecture that is 🤗 friendly, we need 3 files
MyConfig.py
: file defining the architectureMyModel.py
: file defining the model architectureMyPipe.py
: file defining the pipeline
Each of these files will have to be defined outside of the main python interpreter. The reason we are doing this is that this will automatically upload our dependencies and custom architecture automatically
config
The config file is a file that stores information about the architecture and it is used to instantiate the model.
In my case I chose to store only 2 parameters, which are the parameters for the conv1
and conv2
layers, you can choose to add more parameters of your choosing.
from transformers import PretrainedConfig
class MnistConfig(PretrainedConfig):
# since we have an image classification task
# we need to put a model type that is close to our task
# don't worry this will not affect our model
model_type = "MobileNetV1"
def __init__(
self,
conv1=10,
conv2=20,
**kwargs):
self.conv1 = conv1
self.conv2 = conv2
super().__init__(**kwargs)
.
├── MyFolder
│ ├── __init__.py
│ └── MyConFig.py
└── model.pth
model
For the model we need to inherit from the PreTrainedModel
class and pass the previously defined configuration above into the config_class
.
Do not forget to instantiate the model using the config parameter.
from transformers import PreTrainedModel
from .MyConfig import MnistConfig # local import
from torch import nn
import torch.nn.functional as F
class MnistModel(PreTrainedModel):
# pass the previously defined config class to the model
config_class = MnistConfig
def __init__(self, config):
# instantiate the model using the configuration
super().__init__(config)
# use the config to instantiate our model
self.conv1 = nn.Conv2d(1, config.conv1, kernel_size=5)
self.conv2 = nn.Conv2d(config.conv1, config.conv2, kernel_size=5)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
self.softmax = nn.Softmax(dim=-1)
self.criterion = nn.CrossEntropyLoss()
def forward(self, x,labels=None):
# the labels parameter allows us to finetune our model
# with the Trainer API easily
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
logits = self.softmax(x)
if labels != None :
# this will make your AI compatible with the trainer API
loss = self.criterion(logits, labels)
return {"loss": loss, "logits": logits}
return logits
The labels parameter allows your model to be compatible with the Trainer API here is a notebook showcasing how to use this it
.
├── MyFolder
│ ├── __init__.py
│ ├── MyConFig.py
│ └── MyModel.py
└── model.pth
push to hub 🤗
First we need to login using a TOKEN with writing access
from huggingface_hub import notebook_login
notebook_login()
then load the model and register it for the auto class
from MyFolder.MyConfig import MnistConfig
from MyFolder.MyModel import MnistModel
import torch
conf = MnistConfig()
HF_Model = MnistModel(conf) # instantiate the model using the config
# load the weights
weights = torch.load("model.pth")
HF_Model.load_state_dict(weights['state_dict'])
conf.register_for_auto_class()
HF_Model.register_for_auto_class("AutoModelForImageClassification")
finally push our configuration and our model to the hub 🤗
conf.push_to_hub('MyRepo')
HF_Model.push_to_hub('MyRepo')
By now your model should be available in your own repo for you to use.
custom pipeline
understanding the workflow
let's call our previously defined model and use it to classify a new image
from transformers import AutoModelForImageClassification
model = AutoModelForImageClassification.from_pretrained("not-lain/MyRepo", trust_remote_code=True)
# download an image from the web
import requests
url = "https://huggingface.co/datasets/not-lain/dependencies/resolve/main/7.webp"
response = requests.get(url, stream=True)
response.raise_for_status() # Raise an HTTPError for bad responses (4xx and 5xx)
# Open a local file to save the image
with open("image.png", "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
print("image saved as image.png")
# load and process the image
from PIL import Image
import torchvision.transforms as transforms
import torch
img = Image.open("image.png") # read image
gray = img.convert('L') # convert to grayscale if needed
print(gray.size) # get image dimensions
# >> (1490, 1480)
# process input
transform = transforms.Compose(
[transforms.ToTensor(), # convert to a torch tensor
transforms.Resize((28,28), antialias=True) # resize img
])
tensor = transform(gray) # apply to input
tensor = tensor.unsqueeze(0) # add extra dimensionality, think batch_size = 1
with torch.no_grad():
out = model(tensor) # calculate the output
label = torch.argmax(out,axis=-1) # get class
print(label.tolist()[0]) # extract the label
# >> 7
creating the pipeline
Let's automate this with our custom pipeline and create something a little bit more complex to cover most use cases:
from transformers import Pipeline
import requests
from PIL import Image
import torchvision.transforms as transforms
import torch
class MnistPipe(Pipeline):
def __init__(self,**kwargs):
# self.tokenizer = (...) # code if you want to instantiate more parameters
Pipeline.__init__(self,**kwargs) # self.model automatically instantiated here
self.transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Resize((28,28), antialias=True)
])
def _sanitize_parameters(self, **kwargs):
# will make sure where each parameter goes
preprocess_kwargs = {}
postprocess_kwargs = {}
if "download" in kwargs:
preprocess_kwargs["download"] = kwargs["download"]
if "clean_output" in kwargs :
postprocess_kwargs["clean_output"] = kwargs["clean_output"]
return preprocess_kwargs, {}, postprocess_kwargs
def preprocess(self, inputs, download=False):
if download == True :
# call download_img method and name image as "image.png"
self.download_img(inputs)
inputs = "image.png"
# we open and process the image
img = Image.open(inputs)
gray = img.convert('L')
tensor = self.transform(gray)
tensor = tensor.unsqueeze(0)
return tensor
def _forward(self, tensor):
with torch.no_grad():
# the model has been automatically instantiated
# in the __init__ method
out = self.model(tensor)
return out
def postprocess(self, out, clean_output=True):
if clean_output ==True :
label = torch.argmax(out,axis=-1) # get class
label = label.tolist()[0]
return label
else :
return out
def download_img(self,url):
# if download = True download image and name it image.png
response = requests.get(url, stream=True)
with open("image.png", "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
print("image saved as image.png")
let's explain our pipeline :
- when instantiating the model using
pipe = pipeline(...)
these parameters will be passed to the__init__
method - when calling the previously defined pipeline
pipe(...)
these parameters will be passed to the_sanitize_parameters
method which will split the parameters and pass them to either the :preprocess
method : this method is usually used to clean the input, in our case it will load the image, convert it gray, and transform it into a torch tensor_forward
method: this method is mostly used to call our model predict the outputpostprocess
method : this method is typically used to clean our output, in our example if theclean_output
parameter is not True it will return the raw input, else it will apply argmax and extract the label for us.download_img
method : this is a custom method I added to our architecture and it is not needed to create a pipeline. In the example above if thedownload
parameter is true, we call thepreprocess
method that will download the image
when using pipe(...)
we call the following methods in order :
_sanitize_parameters
: makes sure where each keyword argument goespreprocess
: cleans the input_forward
: uses the AIpostprocess
: cleans the output
Do not forget to save your code in an external file as this will automate the process of pushing our code for us
.
├── MyFolder
│ ├── __init__.py
│ ├── MyConFig.py
│ ├── MyModel.py
│ └── MyPipe.py
└── model.pth
push to hub 🤗
you need to need transformers>=4.40.0 here
pip install transformers>=4.40.0
from MyFolder.MyPipe import MnistPipe
from transformers.pipelines import PIPELINE_REGISTRY
from transformers import pipeline, AutoModelForImageClassification
# register pipeline
PIPELINE_REGISTRY.register_pipeline(
"image-classification", # or any other custom task
pipeline_class=MnistPipe,
pt_model=AutoModelForImageClassification,
# Optional parameters :
# select a default revision/branch/commit_hash for the model
# default={"pt": ("not-lain/MyRepo", "dba8d15072d743b6cb4a707246f801699897fb72")},
type="image", # current support type: text, audio, image, multimodal
)
# call the pipeline
pipe = pipeline(
# Optional : pass the task used above here
# "image-classification",
model="not-lain/MyRepo",
trust_remote_code=True)
# upload to 🤗
pipe.push_to_hub('not-lain/MyRepo')
All done, now you can use your new pipeline :
from transformers import pipeline
# no need to specify what task we are using
pipe = pipeline(model="not-lain/MyRepo", trust_remote_code=True)
pipe( "https://huggingface.co/datasets/not-lain/dependencies/resolve/main/7.webp",
download=True, # will call the download_img method
clean_output=False # will be passed as postprocess_kwargs
)
# >> image saved as image.png
# >> tensor([[0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]])
pipe("image.png")
# >> 7
pipe.download_img("https://huggingface.co/datasets/not-lain/dependencies/resolve/main/7.webp")
# >> image saved as image.png
Finally add a README.md file to your repo to let people know how to use your custom architecture 🥳
Resources :
- custom model documentation: https://huggingface.co/docs/transformers/en/custom_models
- custom pipeline documentation: https://huggingface.co/docs/transformers/en/add_new_pipeline
Repo | custom_code | custom_pipeline | notes |
---|---|---|---|
not-lain/MyRepo |
✅ | ✅ | small code and easy to follow |
vikhyatk/moondream1 |
✅ | ✅ | big architecture, pipeline can be found here |
microsoft/phi-2 |
✅ | 🟡 | big architecture, working pipeline |
Qwen/Qwen-VL-Chat |
✅ | ⭕ | big architecture, no pipeline yet |
tiiuae/falcon-7b |
✅ | 🟡 | big architecture, working pipeline |
briaai/RMBG-1.4 |
✅ | ✅ | big architecture, working pipeline |
📺 youtube : https://www.youtube.com/watch?v=9gZ7LvEJRBo
🌐 how to reach me : https://not-lain.github.io/