jinaai
/

xlm-roberta-flash-implementation

Inference Endpoints

🇪🇺 Region: EU

Model card Files Files and versions Community

Jackmin108 commited on Aug 8

Commit

4ee2970

•

1 Parent(s): d2c9d06

fix: mlp

Signed-off-by: Meow <[email protected]>

Files changed (2) hide show

mlp.py +1 -1
xlm_padding.py +1 -7

mlp.py CHANGED Viewed

@@ -74,7 +74,7 @@ class Mlp(nn.Module):
                 task_out = self.fc2(task_tensor, task_id=task_id)
                 out[task_indices] = task_out
         else:
-            out = self.fc1(y)
         return out if not self.return_residual else (out, x)

                 task_out = self.fc2(task_tensor, task_id=task_id)
                 out[task_indices] = task_out
         else:
+            out = self.fc2(y)
         return out if not self.return_residual else (out, x)

xlm_padding.py CHANGED Viewed

@@ -114,13 +114,7 @@ def unpad_input(hidden_states, attention_mask, adapter_mask=None):
     max_seqlen_in_batch = seqlens_in_batch.max().item()
     cu_seqlens = F.pad(torch.cumsum(seqlens_in_batch, dim=0, dtype=torch.torch.int32), (1, 0))
-    cu_adapter_mask = None
-    if adapter_mask:
-        cu_adapter_mask = torch.empty(cu_seqlens[-1], dtype=torch.int32)
-        for i in range(len(adapter_mask)):
-            start_idx = cu_seqlens[i]
-            end_idx = cu_seqlens[i + 1]
-            cu_adapter_mask[start_idx:end_idx] = adapter_mask[i]
     # TD [2022-03-04] We don't want to index with a bool mask, because Pytorch will expand the
     # bool mask, then call nonzero to get the indices, then index with those. The indices is @dim

     max_seqlen_in_batch = seqlens_in_batch.max().item()
     cu_seqlens = F.pad(torch.cumsum(seqlens_in_batch, dim=0, dtype=torch.torch.int32), (1, 0))
+    cu_adapter_mask = torch.repeat_interleave(adapter_mask, cu_seqlens[1:] - cu_seqlens[:-1]) if adapter_mask is not None else None
     # TD [2022-03-04] We don't want to index with a bool mask, because Pytorch will expand the
     # bool mask, then call nonzero to get the indices, then index with those. The indices is @dim