Update README.md
Browse files
README.md
CHANGED
@@ -25,7 +25,7 @@ licence: apache-2.0
|
|
25 |
---
|
26 |
# Randeng-T5-784M-QA-Chinese
|
27 |
T5 for Chinese Question Answering
|
28 |
-
- Github: [Fengshenbang-LM](https://github.com/IDEA-CCNL/Fengshenbang-LM)
|
29 |
- Docs: [Fengshenbang-Docs](https://fengshenbang-doc.readthedocs.io/)
|
30 |
|
31 |
|
@@ -80,7 +80,7 @@ plain_text='question:'+sample['question']+'knowledge:'+sample['context'][:self.m
|
|
80 |
|
81 |
res_prefix=tokenizer.encode('answer',add_special_token=False)
|
82 |
res_prefix.append(tokenizer.convert_tokens_to_ids('<extra_id_0>')
|
83 |
-
res_prefix.
|
84 |
l_rp=len(res_prefix)
|
85 |
|
86 |
tokenized=tokenizer.encode(plain_text,add_special_tokens=False,truncation=True,max_length=self.max_seq_length-2-l_rp)
|
@@ -90,7 +90,7 @@ tokenized+=res_prefix
|
|
90 |
# Generate answer
|
91 |
pred_ids = model.generate(input_ids=tokenized,max_new_token=self.max_target_length,do_sample=True,top_p=0.9)
|
92 |
pred_tokens=tokenizer.batch_decode(pred_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
93 |
-
res=pred_tokens.replace('<extra_id_0>','').replace('有答案:')
|
94 |
```
|
95 |
|
96 |
|
|
|
25 |
---
|
26 |
# Randeng-T5-784M-QA-Chinese
|
27 |
T5 for Chinese Question Answering
|
28 |
+
- Github: [Fengshenbang-LM](https://github.com/IDEA-CCNL/Fengshenbang-LM/tree/main/fengshen/examples/qa_t5)
|
29 |
- Docs: [Fengshenbang-Docs](https://fengshenbang-doc.readthedocs.io/)
|
30 |
|
31 |
|
|
|
80 |
|
81 |
res_prefix=tokenizer.encode('answer',add_special_token=False)
|
82 |
res_prefix.append(tokenizer.convert_tokens_to_ids('<extra_id_0>')
|
83 |
+
res_prefix.append(EOS_TOKEN_ID)
|
84 |
l_rp=len(res_prefix)
|
85 |
|
86 |
tokenized=tokenizer.encode(plain_text,add_special_tokens=False,truncation=True,max_length=self.max_seq_length-2-l_rp)
|
|
|
90 |
# Generate answer
|
91 |
pred_ids = model.generate(input_ids=tokenized,max_new_token=self.max_target_length,do_sample=True,top_p=0.9)
|
92 |
pred_tokens=tokenizer.batch_decode(pred_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
93 |
+
res=pred_tokens.replace('<extra_id_0>','').replace('有答案:','')
|
94 |
```
|
95 |
|
96 |
|