Spaces:
Runtime error
Runtime error
from torch import nn | |
from transformers import CLIPTokenizer, CLIPTextModel | |
class FrozenCLIPEmbedder(nn.Module): | |
"""Uses the CLIP transformer encoder for text (from huggingface)""" | |
LAYERS = [ | |
"last", | |
"pooled", | |
"hidden" | |
] | |
def __init__(self, version="openai/clip-vit-large-patch14", device="cuda", max_length=77, | |
freeze=True, layer="last", layer_idx=None): # clip-vit-base-patch32 | |
super().__init__() | |
assert layer in self.LAYERS | |
self.tokenizer = CLIPTokenizer.from_pretrained(version) | |
self.transformer = CLIPTextModel.from_pretrained(version) | |
self.device = device | |
self.max_length = max_length | |
if freeze: | |
self.freeze() | |
self.layer = layer | |
self.layer_idx = layer_idx | |
if layer == "hidden": | |
assert layer_idx is not None | |
assert 0 <= abs(layer_idx) <= 12 | |
def freeze(self): | |
self.transformer = self.transformer.eval() | |
# self.train = disabled_train | |
for param in self.parameters(): | |
param.requires_grad = False | |
def forward(self, text): | |
batch_encoding = self.tokenizer(text, truncation=True, max_length=self.max_length, return_length=True, | |
return_overflowing_tokens=False, padding="max_length", return_tensors="pt") | |
tokens = batch_encoding["input_ids"].to(self.device) | |
outputs = self.transformer(input_ids=tokens, output_hidden_states=self.layer == "hidden") | |
if self.layer == "last": | |
z = outputs.last_hidden_state | |
elif self.layer == "pooled": | |
z = outputs.pooler_output[:, None, :] | |
else: | |
z = outputs.hidden_states[self.layer_idx] | |
return z | |
def encode(self, text): | |
return self(text) |