sberbank-ai commited on
Commit
6287e8b
1 Parent(s): 3bb76fa

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -4
README.md CHANGED
@@ -21,14 +21,14 @@ def mean_pooling(model_output, attention_mask):
21
  token_embeddings = model_output[0] #First element of model_output contains all token embeddings
22
  input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
23
  sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
24
- sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
25
  return sum_embeddings / sum_mask
26
  #Sentences we want sentence embeddings for
27
  sentences = ['Привет! Как твои дела?',
28
  'А правда, что 42 твое любимое число?']
29
  #Load AutoModel from huggingface model repository
30
- tokenizer = AutoTokenizer.from_pretrained("Andrilko/ru_s_electra_small")
31
- model = AutoModel.from_pretrained("Andrilko/ru_s_electra_small")
32
  #Tokenize sentences
33
  encoded_input = tokenizer(sentences, padding=True, truncation=True, max_length=24, return_tensors='pt')
34
  #Compute token embeddings
@@ -36,4 +36,5 @@ with torch.no_grad():
36
  model_output = model(**encoded_input)
37
  #Perform pooling. In this case, mean pooling
38
  sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
39
- ```
 
 
21
  token_embeddings = model_output[0] #First element of model_output contains all token embeddings
22
  input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
23
  sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
24
+ sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-10)
25
  return sum_embeddings / sum_mask
26
  #Sentences we want sentence embeddings for
27
  sentences = ['Привет! Как твои дела?',
28
  'А правда, что 42 твое любимое число?']
29
  #Load AutoModel from huggingface model repository
30
+ tokenizer = AutoTokenizer.from_pretrained("sberbank-ai/ru_s_electra_small")
31
+ model = AutoModel.from_pretrained("sberbank-ai/ru_s_electra_small")
32
  #Tokenize sentences
33
  encoded_input = tokenizer(sentences, padding=True, truncation=True, max_length=24, return_tensors='pt')
34
  #Compute token embeddings
 
36
  model_output = model(**encoded_input)
37
  #Perform pooling. In this case, mean pooling
38
  sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
39
+ ```
40
+ created by Aleksandr Abramov (Andrilko)