Upload folder using huggingface_hub
Browse files
README.md
CHANGED
@@ -1709,12 +1709,18 @@ For more details please refer to our Github: [FlagEmbedding](https://github.com/
|
|
1709 |
## 📑 Open-source Plan
|
1710 |
|
1711 |
- [x] Checkpoint
|
1712 |
-
- [
|
|
|
1713 |
- [ ] Evaluation Pipeline
|
1714 |
-
- [ ] Technical Report
|
1715 |
|
1716 |
-
|
1717 |
|
|
|
|
|
|
|
|
|
|
|
|
|
1718 |
|
1719 |
## Usage
|
1720 |
|
@@ -1920,6 +1926,15 @@ print(scores.tolist())
|
|
1920 |
If you find this repository useful, please consider giving a star :star: and citation
|
1921 |
|
1922 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1923 |
@misc{bge_embedding,
|
1924 |
title={C-Pack: Packaged Resources To Advance General Chinese Embedding},
|
1925 |
author={Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff},
|
|
|
1709 |
## 📑 Open-source Plan
|
1710 |
|
1711 |
- [x] Checkpoint
|
1712 |
+
- [x] Training Data
|
1713 |
+
- [x] Technical Report
|
1714 |
- [ ] Evaluation Pipeline
|
|
|
1715 |
|
1716 |
+
The technical report for **BGE-EN-ICL** can be found in [Making Text Embedders Few-Shot Learners](https://arxiv.org/abs/2409.15700)
|
1717 |
|
1718 |
+
## Data List
|
1719 |
+
|
1720 |
+
| Data | Introduction |
|
1721 |
+
| ------------------------------------------------------------ | ------------------------------------------------------------ |
|
1722 |
+
| [e5-data](https://huggingface.co/datasets/cfli/bge-e5data) | Public data identical to [e5-mistral](https://huggingface.co/intfloat/e5-mistral-7b-instruct) |
|
1723 |
+
| [full-data](https://huggingface.co/datasets/cfli/bge-full-data) | The full dataset we used for training |
|
1724 |
|
1725 |
## Usage
|
1726 |
|
|
|
1926 |
If you find this repository useful, please consider giving a star :star: and citation
|
1927 |
|
1928 |
```
|
1929 |
+
@misc{li2024makingtextembeddersfewshot,
|
1930 |
+
title={Making Text Embedders Few-Shot Learners},
|
1931 |
+
author={Chaofan Li and MingHao Qin and Shitao Xiao and Jianlyu Chen and Kun Luo and Yingxia Shao and Defu Lian and Zheng Liu},
|
1932 |
+
year={2024},
|
1933 |
+
eprint={2409.15700},
|
1934 |
+
archivePrefix={arXiv},
|
1935 |
+
primaryClass={cs.IR},
|
1936 |
+
url={https://arxiv.org/abs/2409.15700},
|
1937 |
+
}
|
1938 |
@misc{bge_embedding,
|
1939 |
title={C-Pack: Packaged Resources To Advance General Chinese Embedding},
|
1940 |
author={Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff},
|