michaelfeil
commited on
Commit
•
727e312
1
Parent(s):
1d943b2
Update README.md
Browse files
README.md
CHANGED
@@ -102,36 +102,54 @@ language:
|
|
102 |
- zh
|
103 |
- zu
|
104 |
license: mit
|
|
|
|
|
105 |
---
|
106 |
|
107 |
Converted 5/13/23 to Ctranslate2
|
108 |
```bash
|
109 |
export ORG="facebook"
|
110 |
-
export NAME="
|
111 |
ct2-transformers-converter --model "$ORG/$NAME" --copy_files .gitattributes README.md generation_config.json sentencepiece.bpe.model special_tokens_map.json tokenizer_config.json vocab.json --quantization float16
|
112 |
```
|
113 |
Fast-Inference with Ctranslate2
|
114 |
Speedup inference by 2x-8x using int8 inference in C++
|
115 |
-
|
116 |
quantized version of facebook/m2m100_1.2B
|
117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
pip install hf_hub_ctranslate2>=1.0.0 ctranslate2>=3.13.0
|
119 |
|
120 |
Checkpoint compatible to ctranslate2 and hf-hub-ctranslate2
|
121 |
|
122 |
compute_type=int8_float16 for device="cuda"
|
123 |
compute_type=int8 for device="cpu"
|
124 |
-
```
|
125 |
from hf_hub_ctranslate2 import TranslatorCT2fromHfHub, GeneratorCT2fromHfHub
|
126 |
|
127 |
-
model_name = "michaelfeil/ct2fast-
|
128 |
model = TranslatorCT2fromHfHub(
|
129 |
# load in int8 on CUDA
|
130 |
model_name_or_path=model_name,
|
131 |
device="cuda",
|
132 |
compute_type="int8_float16"
|
133 |
)
|
134 |
-
model.tokenizer = AutoTokenizer.from_pretrained("facebook/
|
135 |
outputs = model.generate(
|
136 |
text=["Translate to german: How are you doing?"],
|
137 |
min_decoding_length=24,
|
|
|
102 |
- zh
|
103 |
- zu
|
104 |
license: mit
|
105 |
+
tags:
|
106 |
+
- ctranslate2
|
107 |
---
|
108 |
|
109 |
Converted 5/13/23 to Ctranslate2
|
110 |
```bash
|
111 |
export ORG="facebook"
|
112 |
+
export NAME="m2m100_PARAMS"
|
113 |
ct2-transformers-converter --model "$ORG/$NAME" --copy_files .gitattributes README.md generation_config.json sentencepiece.bpe.model special_tokens_map.json tokenizer_config.json vocab.json --quantization float16
|
114 |
```
|
115 |
Fast-Inference with Ctranslate2
|
116 |
Speedup inference by 2x-8x using int8 inference in C++
|
|
|
117 |
quantized version of facebook/m2m100_1.2B
|
118 |
|
119 |
+
```python
|
120 |
+
import ctranslate2
|
121 |
+
import transformers
|
122 |
+
|
123 |
+
translator = ctranslate2.Translator("m2m100_PARAMS")
|
124 |
+
tokenizer = transformers.AutoTokenizer.from_pretrained("facebook/m2m100_PARAMS")
|
125 |
+
tokenizer.src_lang = "en"
|
126 |
+
|
127 |
+
source = tokenizer.convert_ids_to_tokens(tokenizer.encode("Hello world!"))
|
128 |
+
target_prefix = [tokenizer.lang_code_to_token["de"]]
|
129 |
+
results = translator.translate_batch([source], target_prefix=[target_prefix])
|
130 |
+
target = results[0].hypotheses[0][1:]
|
131 |
+
|
132 |
+
print(tokenizer.decode(tokenizer.convert_tokens_to_ids(target)))
|
133 |
+
```
|
134 |
+
|
135 |
+
Alternative:
|
136 |
pip install hf_hub_ctranslate2>=1.0.0 ctranslate2>=3.13.0
|
137 |
|
138 |
Checkpoint compatible to ctranslate2 and hf-hub-ctranslate2
|
139 |
|
140 |
compute_type=int8_float16 for device="cuda"
|
141 |
compute_type=int8 for device="cpu"
|
142 |
+
```python
|
143 |
from hf_hub_ctranslate2 import TranslatorCT2fromHfHub, GeneratorCT2fromHfHub
|
144 |
|
145 |
+
model_name = "michaelfeil/ct2fast-m2m100_PARAMS"
|
146 |
model = TranslatorCT2fromHfHub(
|
147 |
# load in int8 on CUDA
|
148 |
model_name_or_path=model_name,
|
149 |
device="cuda",
|
150 |
compute_type="int8_float16"
|
151 |
)
|
152 |
+
model.tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_PARAMS")
|
153 |
outputs = model.generate(
|
154 |
text=["Translate to german: How are you doing?"],
|
155 |
min_decoding_length=24,
|