VictorSanh
commited on
Commit
•
4746d53
1
Parent(s):
c816c5b
fixes
Browse files
vision.py
CHANGED
@@ -84,7 +84,7 @@ class SiglipVisionModelOutput(ModelOutput):
|
|
84 |
|
85 |
|
86 |
class SiglipVisionEmbeddings(nn.Module):
|
87 |
-
def __init__(self, config:
|
88 |
super().__init__()
|
89 |
self.config = config
|
90 |
self.embed_dim = config.hidden_size
|
@@ -423,7 +423,7 @@ class SiglipMLP(nn.Module):
|
|
423 |
|
424 |
# Copied from transformers.models.clip.modeling_clip.CLIPEncoderLayer with CLIP->Siglip
|
425 |
class SiglipEncoderLayer(nn.Module):
|
426 |
-
def __init__(self, config:
|
427 |
super().__init__()
|
428 |
self.embed_dim = config.hidden_size
|
429 |
self.self_attn = (
|
@@ -552,7 +552,7 @@ class SiglipEncoder(nn.Module):
|
|
552 |
|
553 |
|
554 |
class SiglipVisionTransformer(nn.Module):
|
555 |
-
def __init__(self, config:
|
556 |
super().__init__()
|
557 |
self.config = config
|
558 |
embed_dim = config.hidden_size
|
@@ -607,7 +607,7 @@ class SiglipVisionTransformer(nn.Module):
|
|
607 |
class SiglipMultiheadAttentionPoolingHead(nn.Module):
|
608 |
"""Multihead Attention Pooling."""
|
609 |
|
610 |
-
def __init__(self, config:
|
611 |
super().__init__()
|
612 |
|
613 |
self.probe = nn.Parameter(torch.randn(1, 1, config.hidden_size))
|
@@ -629,7 +629,7 @@ class SiglipMultiheadAttentionPoolingHead(nn.Module):
|
|
629 |
|
630 |
|
631 |
class SiglipVisionModel(nn.Module):
|
632 |
-
def __init__(self, config:
|
633 |
super().__init__()
|
634 |
|
635 |
self.config = config
|
|
|
84 |
|
85 |
|
86 |
class SiglipVisionEmbeddings(nn.Module):
|
87 |
+
def __init__(self, config: VMistralVisionConfig):
|
88 |
super().__init__()
|
89 |
self.config = config
|
90 |
self.embed_dim = config.hidden_size
|
|
|
423 |
|
424 |
# Copied from transformers.models.clip.modeling_clip.CLIPEncoderLayer with CLIP->Siglip
|
425 |
class SiglipEncoderLayer(nn.Module):
|
426 |
+
def __init__(self, config: VMistralVisionConfig):
|
427 |
super().__init__()
|
428 |
self.embed_dim = config.hidden_size
|
429 |
self.self_attn = (
|
|
|
552 |
|
553 |
|
554 |
class SiglipVisionTransformer(nn.Module):
|
555 |
+
def __init__(self, config: VMistralVisionConfig):
|
556 |
super().__init__()
|
557 |
self.config = config
|
558 |
embed_dim = config.hidden_size
|
|
|
607 |
class SiglipMultiheadAttentionPoolingHead(nn.Module):
|
608 |
"""Multihead Attention Pooling."""
|
609 |
|
610 |
+
def __init__(self, config: VMistralVisionConfig):
|
611 |
super().__init__()
|
612 |
|
613 |
self.probe = nn.Parameter(torch.randn(1, 1, config.hidden_size))
|
|
|
629 |
|
630 |
|
631 |
class SiglipVisionModel(nn.Module):
|
632 |
+
def __init__(self, config: VMistralVisionConfig):
|
633 |
super().__init__()
|
634 |
|
635 |
self.config = config
|