rossevine commited on
Commit
ca2dc0c
1 Parent(s): 6794870

Upload lm-boosted decoder

Browse files
language_model/5gram.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbb3016860c1398f93c3489e9f08623d1404aacfa67b0756afcf83c0c148563f
3
- size 2020500033
 
 
 
 
language_model/5gram_correct.arpa CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:167ce46d3318f0d94aae7c491d24951de450487b7da49d253fa05066237eb7ab
3
- size 4161893177
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3745b5f78523d7792c4b6f885a7a9a741ac3d82084204e5d27443700f9aeea62
3
+ size 3962698644
language_model/unigrams.txt CHANGED
The diff for this file is too large to render. See raw diff
 
preprocessor_config.json CHANGED
@@ -4,6 +4,7 @@
4
  "feature_size": 1,
5
  "padding_side": "right",
6
  "padding_value": 0.0,
 
7
  "return_attention_mask": true,
8
  "sampling_rate": 16000
9
  }
 
4
  "feature_size": 1,
5
  "padding_side": "right",
6
  "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2ProcessorWithLM",
8
  "return_attention_mask": true,
9
  "sampling_rate": 16000
10
  }
special_tokens_map.json CHANGED
@@ -1,5 +1,19 @@
1
  {
2
  "additional_special_tokens": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  {
4
  "content": "<s>",
5
  "lstrip": false,
 
1
  {
2
  "additional_special_tokens": [
3
+ {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "</s>",
12
+ "lstrip": false,
13
+ "normalized": true,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
  {
18
  "content": "<s>",
19
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -5,6 +5,7 @@
5
  "eos_token": "</s>",
6
  "model_max_length": 1000000000000000019884624838656,
7
  "pad_token": "[PAD]",
 
8
  "replace_word_delimiter_char": " ",
9
  "target_lang": null,
10
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
 
5
  "eos_token": "</s>",
6
  "model_max_length": 1000000000000000019884624838656,
7
  "pad_token": "[PAD]",
8
+ "processor_class": "Wav2Vec2ProcessorWithLM",
9
  "replace_word_delimiter_char": " ",
10
  "target_lang": null,
11
  "tokenizer_class": "Wav2Vec2CTCTokenizer",