|
<!DOCTYPE html> |
|
<html lang="en"> |
|
<head> |
|
<meta charset="UTF-8" /> |
|
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no" /> |
|
<meta http-equiv="x-ua-compatible" content="ie=edge" /> |
|
<title>Pyserini Reproductions: MS MARCO V1 Passage</title> |
|
|
|
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.11.2/css/all.css" /> |
|
|
|
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap" /> |
|
|
|
<link href="https://cdnjs.cloudflare.com/ajax/libs/mdb-ui-kit/4.0.0/mdb.min.css" rel="stylesheet" /> |
|
|
|
<style> |
|
tr.hide-table-padding td { |
|
padding: 0; |
|
} |
|
|
|
.expand-button { |
|
position: relative; |
|
} |
|
|
|
.accordion-toggle .expand-button:after { |
|
position: absolute; |
|
left:.75rem; |
|
top: 50%; |
|
transform: translate(0, -50%); |
|
content: '-'; |
|
} |
|
|
|
.accordion-toggle.collapsed .expand-button:after { |
|
content: '+'; |
|
} |
|
|
|
blockquote.mycode { |
|
border-left: 3px solid #ccc; |
|
margin-left: 25px; |
|
margin-top: 15px; |
|
padding-left: 15px; |
|
} |
|
|
|
blockquote.mycode2 { |
|
border-left: 3px solid #ccc; |
|
margin-left: 25px; |
|
padding-top: 10px; |
|
padding-bottom: 10px; |
|
padding-left: 15px; |
|
} |
|
|
|
tr th.headertop { |
|
border-bottom: none; |
|
padding-bottom: 0rem |
|
} |
|
|
|
tr th.headerbottom { |
|
padding-top: 0rem |
|
} |
|
|
|
.table>:not(caption)>*>*{padding:0.75rem 0.75rem} |
|
|
|
.copy-code-button { |
|
border-radius: 0; |
|
min-width: 55px; |
|
background: none repeat scroll 0 0 transparent; |
|
background-color: grey; |
|
color: #F1F2F3 !important; |
|
cursor: pointer; |
|
border-style: none; |
|
font-family: 'HELVETICA',sans-serif; |
|
font-size: 0.8em; |
|
font-weight: normal; |
|
text-align: center; |
|
text-decoration: none; |
|
text-indent: 0; |
|
text-transform: uppercase; |
|
font-weight: 500; |
|
line-height: 1.42rem; |
|
margin: 0; |
|
padding: 3px 8px; |
|
position: absolute !important; |
|
top: 0 !important; |
|
right: 0 !important; |
|
} |
|
|
|
.copy-code-button > span { |
|
color: #F1F2F3 !important; |
|
} |
|
|
|
.copy-code-button, ::before, ::after { |
|
box-sizing: inherit; |
|
} |
|
|
|
.copy-code-button::before { |
|
content: ''; |
|
display: inline-block; |
|
width: 16px; |
|
height: 16px; |
|
margin-right: 3px; |
|
background-size: contain; |
|
background-image: url("data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiPz4KPHN2ZyB3aWR0aD0iMTVweCIgaGVpZ2h0PSIxNXB4IiB2aWV3Qm94PSIwIDAgMTUgMTUiIHZlcnNpb249IjEuMSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB4bWxuczp4bGluaz0iaHR0cDovL3d3dy53My5vcmcvMTk5OS94bGluayI+CiAgICA8IS0tIEdlbmVyYXRvcjogU2tldGNoIDUwLjIgKDU1MDQ3KSAtIGh0dHA6Ly93d3cuYm9oZW1pYW5jb2RpbmcuY29tL3NrZXRjaCAtLT4KICAgIDx0aXRsZT5QYWdlIDE8L3RpdGxlPgogICAgPGRlc2M+Q3JlYXRlZCB3aXRoIFNrZXRjaC48L2Rlc2M+CiAgICA8ZGVmcz48L2RlZnM+CiAgICA8ZyBpZD0iRmxvdyIgc3Ryb2tlPSJub25lIiBzdHJva2Utd2lkdGg9IjEiIGZpbGw9Im5vbmUiIGZpbGwtcnVsZT0iZXZlbm9kZCI+CiAgICAgICAgPGcgaWQ9IkJ0dG5faHRtbCIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoLTgxOS4wMDAwMDAsIC03NTMuMDAwMDAwKSIgZmlsbD0iI0ZGRkZGRiI+CiAgICAgICAgICAgIDxnIGlkPSJHcm91cC0xIiB0cmFuc2Zvcm09InRyYW5zbGF0ZSgzMTEuMDAwMDAwLCA0MDUuMDAwMDAwKSI+CiAgICAgICAgICAgICAgICA8ZyBpZD0iR3JvdXAtMiIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoNTA4LjAwMDAwMCwgMzQyLjAwMDAwMCkiPgogICAgICAgICAgICAgICAgICAgIDxwYXRoIGQ9Ik0xMy45NzcyNzI3LDYgTDMuNDA5MDkwOTEsNiBDMi44NDQ1NDU0NSw2IDIuMzg2MzYzNjQsNi40NTgxODE4MiAyLjM4NjM2MzY0LDcuMDIyNzI3MjcgTDIuMzg2MzYzNjQsMTcuNTkwOTA5MSBDMi4zODYzNjM2NCwxOC4xNTU0NTQ1IDIuODQ0NTQ1NDUsMTguNjEzNjM2NCAzLjQwOTA5MDkxLDE4LjYxMzYzNjQgTDEzLjk3NzI3MjcsMTguNjEzNjM2NCBDMTQuNTQxODE4MiwxOC42MTM2MzY0IDE1LDE4LjE1NTQ1NDUgMTUsMTcuNTkwOTA5MSBMMTUsNy4wMjI3MjcyNyBDMTUsNi40NTgxODE4MiAxNC41NDE4MTgyLDYgMTMuOTc3MjcyNyw2IFogTTE0LjMxODE4MTgsMTcuNTkwOTA5MSBDMTQuMzE4MTgxOCwxNy43NzkwOTA5IDE0LjE2NTQ1NDUsMTcuOTMxODE4MiAxMy45NzcyNzI3LDE3LjkzMTgxODIgTDMuNDA5MDkwOTEsMTcuOTMxODE4MiBDMy4yMjA5MDkwOSwxNy45MzE4MTgyIDMuMDY4MTgxODIsMTcuNzc5MDkwOSAzLjA2ODE4MTgyLDE3LjU5MDkwOTEgTDMuMDY4MTgxODIsNy4wMjI3MjcyNyBDMy4wNjgxODE4Miw2LjgzNDU0NTQ1IDMuMjIwOTA5MDksNi42ODE4MTgxOCAzLjQwOTA5MDkxLDYuNjgxODE4MTggTDEzLjk3NzI3MjcsNi42ODE4MTgxOCBDMTQuMTY1NDU0NSw2LjY4MTgxODE4IDE0LjMxODE4MTgsNi44MzQ1NDU0NSAxNC4zMTgxODE4LDcuMDIyNzI3MjcgTDE0LjMxODE4MTgsMTcuNTkwOTA5MSBaIE0xMS45MzE4MTgyLDE5Ljk3NzI3MjcgQzExLjkzMTgxODIsMjAuMTY1NDU0NSAxMS43NzkwOTA5LDIwLjMxODE4MTggMTEuNTkwOTA5MSwyMC4zMTgxODE4IEwxLjAyMjcyNzI3LDIwLjMxODE4MTggQzAuODM0NTQ1NDU1LDIwLjMxODE4MTggMC42ODE4MTgxODIsMjAuMTY1NDU0NSAwLjY4MTgxODE4MiwxOS45NzcyNzI3IEwwLjY4MTgxODE4Miw5LjQwOTA5MDkxIEMwLjY4MTgxODE4Miw5LjIyMDkwOTA5IDAuODM0NTQ1NDU1LDkuMDY4MTgxODIgMS4wMjI3MjcyNyw5LjA2ODE4MTgyIEwxLjM2MzYzNjM2LDkuMDY4MTgxODIgTDEuMzYzNjM2MzYsOC4zODYzNjM2NCBMMS4wMjI3MjcyNyw4LjM4NjM2MzY0IEMwLjQ1ODE4MTgxOCw4LjM4NjM2MzY0IDAsOC44NDQ1NDU0NSAwLDkuNDA5MDkwOTEgTDAsMTkuOTc3MjcyNyBDMCwyMC41NDE4MTgyIDAuNDU4MTgxODE4LDIxIDEuMDIyNzI3MjcsMjEgTDExLjU5MDkwOTEsMjEgQzEyLjE1NTQ1NDUsMjEgMTIuNjEzNjM2NCwyMC41NDE4MTgyIDEyLjYxMzYzNjQsMTkuOTc3MjcyNyBMMTIuNjEzNjM2NCwxOS42MzYzNjM2IEwxMS45MzE4MTgyLDE5LjYzNjM2MzYgTDExLjkzMTgxODIsMTkuOTc3MjcyNyBaIiBpZD0iUGFnZS0xIj48L3BhdGg+CiAgICAgICAgICAgICAgICA8L2c+CiAgICAgICAgICAgIDwvZz4KICAgICAgICA8L2c+CiAgICA8L2c+Cjwvc3ZnPg=="); |
|
background-repeat: no-repeat; |
|
position: relative; |
|
top: 3px; |
|
} |
|
|
|
.copy-code-button:focus { |
|
|
|
|
|
|
|
|
|
outline: 0; |
|
} |
|
|
|
pre[class*="prettyprint"] { |
|
position: relative; |
|
overflow: hidden; |
|
} |
|
</style> |
|
</head> |
|
<body> |
|
|
|
|
|
<div id="intro" class="bg-image vh-100 shadow-1-strong" style="max-height: 150px"> |
|
<div class="mask" style=" |
|
background: linear-gradient( |
|
45deg, |
|
rgba(29, 236, 197, 0.7), |
|
rgba(91, 14, 214, 0.7) 100% |
|
); |
|
"> |
|
<div class="container d-flex align-items-center justify-content-center text-center h-100" style="max-height: 150px"> |
|
<div class="text-white"> |
|
<h1 class="mb-3">$title</h1> |
|
</div> |
|
</div> |
|
</div> |
|
</div> |
|
|
|
|
|
<div class="container my-4"> |
|
|
|
<p>The two-click<a href="#" data-mdb-toggle="tooltip" title="What are the two clicks, you ask? Copy and paste!"><sup>*</sup></a> reproduction matrix below provides commands for reproducing experimental results reported in a number of papers, denoted by the references in square brackets. |
|
Instructions for programmatic execution are shown at the bottom of this page (scroll down).</p> |
|
|
|
<div class="table-responsive"> |
|
<table class="table"> |
|
<thead> |
|
<tr> |
|
<th class="headertop"></th> |
|
<th class="headertop"></th> |
|
<th class="headertop"></th> |
|
<th class="headertop" colspan="4"><b>TREC 2019</b></th> |
|
<th class="headertop" colspan="4"><b>TREC 2020</b></th> |
|
<th class="headertop" colspan="3"><b>dev</b></th> |
|
</tr> |
|
<tr> |
|
<th class="headerbottom" scope="col"></th> |
|
<th class="headerbottom" scope="col"></th> |
|
<th class="headerbottom" scope="col"></th> |
|
<th class="headerbottom" scope="col"><br/>AP</th> |
|
<th class="headerbottom" scope="col">nDCG@10</th> |
|
<th class="headerbottom" scope="col">R@1K</th> |
|
<th class="headerbottom" scope="col"></th> |
|
<th class="headerbottom" scope="col"><br/>AP</th> |
|
<th class="headerbottom" scope="col">nDCG@10</th> |
|
<th class="headerbottom" scope="col">R@1K</th> |
|
<th class="headerbottom" scope="col"></th> |
|
<th class="headerbottom" scope="col">RR@10</th> |
|
<th class="headerbottom" scope="col">R@1K</th> |
|
|
|
</tr> |
|
</thead> |
|
<tbody> |
|
|
|
$rows |
|
|
|
</tbody> |
|
</table> |
|
</div> |
|
|
|
<ul style="list-style-type:none; padding-top: 25px"> |
|
|
|
<li><p>[1] Xueguang Ma, Ronak Pradeep, Rodrigo Nogueira, and Jimmy Lin. |
|
<a href="https://cs.uwaterloo.ca/~jimmylin/publications/Ma_etal_SIGIR2022.pdf">Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.</a> |
|
<i>Proceedings of the 45th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2022)</i>, July 2022.</p> |
|
|
|
<p> Numbers in parentheses correspond to rows in Table 1 of the paper.</p></li> |
|
|
|
<li><p>[2] Thibault Formal, Carlos Lassance, Benjamin Piwowarski, and Stéphane Clinchant |
|
<a href="https://arxiv.org/abs/2205.04733">From Distillation to Hard Negative Sampling: Making Sparse Neural IR Models More Effective.</a> |
|
<i>Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2022), May 2022.</i></p></li> |
|
|
|
<li><p>[3] Lee Xiong, Chenyan Xiong, Ye Li, Kwok-Fung Tang, Jialin Liu, Paul N. Bennett, Junaid Ahmed, and Arnold Overwijk. |
|
<a href="https://openreview.net/forum?id=zeFrfgyZln">Approximate Nearest Neighbor Negative Contrastive Learning for Dense Text Retrieval.</a> |
|
<i>Proceedings of the 9th International Conference on Learning Representations (ICLR 2021), May 2021.</i></p></li> |
|
|
|
<li><p>[4] Sebastian Hofstätter, Sophia Althammer, Michael Schröder, Mete Sertkan, and Allan Hanbury. |
|
<a href="https://arxiv.org/abs/2010.02666">Improving Efficient Neural Ranking Models with Cross-Architecture Knowledge Distillation.</a> |
|
<i>arXiv:2010.02666</i>, October 2020.</p></li> |
|
|
|
<li><p>[5] Sebastian Hofstätter, Sheng-Chieh Lin, Jheng-Hong Yang, Jimmy Lin, and Allan Hanbury. |
|
<a href="https://dl.acm.org/doi/10.1145/3404835.3462891">Efficiently Teaching an Effective Dense Retriever with Balanced Topic Aware Sampling.</a> |
|
<i>Proceedings of the 44th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2021)</i>, pages 113-122, July 2021.</p></li> |
|
|
|
<li><p>[6] Sheng-Chieh Lin, Jheng-Hong Yang, and Jimmy Lin. |
|
<a href="https://aclanthology.org/2021.repl4nlp-1.17/">In-Batch Negatives for Knowledge Distillation with Tightly-Coupled Teachers for Dense Retrieval.</a> |
|
<i>Proceedings of the 6th Workshop on Representation Learning for NLP (RepL4NLP-2021)</i>, pages 163-173, August 2021.</p></li> |
|
|
|
<li><p>[7] Minghan Li, Sheng-Chieh Lin, Xueguang Ma, Jimmy Lin. |
|
<a href="https://arxiv.org/abs/2302.06587">SLIM: Sparsified Late Interaction for Multi-Vector Retrieval with Inverted Indexes.</a> |
|
<i>arXiv:2302.06587</i>, Feburary 2023.</p></li> |
|
|
|
<li><p>[8] Sheng-Chieh Lin, Minghan Li and Jimmy Lin. |
|
<a href="https://arxiv.org/abs/2208.00511">Aggretriever: A Simple Approach to Aggregate Textual Representation for Robust Dense Passage Retrieval.</a> |
|
<i>arXiv:2208.00511</i>, July 2022.</p></li> |
|
|
|
</ul> |
|
|
|
<div style="padding-top: 20px"/> |
|
|
|
<h4>Programmatic Execution</h4> |
|
|
|
<p>All experimental runs shown in the above table can be programmatically executed based on the instructions below. |
|
To list all the experimental conditions:</p> |
|
|
|
<blockquote class="mycode2"><tt> |
|
python -m pyserini.2cr.msmarco --collection v1-passage --list-conditions |
|
</tt></blockquote> |
|
|
|
<p>These conditions correspond to the table rows above.</p> |
|
|
|
<p>For all conditions, just show the commands in a "dry run":</p> |
|
|
|
<blockquote class="mycode2"><tt> |
|
python -m pyserini.2cr.msmarco --collection v1-passage --all --display-commands --dry-run |
|
</tt></blockquote> |
|
|
|
<p>To actually run all the experimental conditions:</p> |
|
|
|
<blockquote class="mycode2"><tt> |
|
python -m pyserini.2cr.msmarco --collection v1-passage --all --display-commands |
|
</tt></blockquote> |
|
|
|
<p>With the above command, run files will be placed in the current directory. |
|
Use the option <tt>--directory runs/</tt> to place the runs in a sub-directory.</p> |
|
|
|
<p>To show the commands for a specific condition:</p> |
|
|
|
<blockquote class="mycode2"><tt> |
|
python -m pyserini.2cr.msmarco --collection v1-passage --condition bm25-default --display-commands --dry-run |
|
</tt></blockquote> |
|
|
|
<p>This will generate exactly the commands for a specific condition above (corresponding to a row in the table).</p> |
|
|
|
<p>To actually run a specific condition:</p> |
|
|
|
<blockquote class="mycode2"><tt> |
|
python -m pyserini.2cr.msmarco --collection v1-passage --condition bm25-default --display-commands |
|
</tt></blockquote> |
|
|
|
<p>Again, with the above command, run files will be placed in the current directory. |
|
Use the option <tt>--directory runs/</tt> to place the runs in a sub-directory.</p> |
|
|
|
<p>Finally, to generate this page:</p> |
|
|
|
<blockquote class="mycode2"><tt> |
|
python -m pyserini.2cr.msmarco --collection v1-passage --generate-report --output msmarco-v1-passage.html |
|
</tt></blockquote> |
|
|
|
<p>The output file <tt>msmarco-v1-passage.html</tt> should be identical to this page.</p> |
|
|
|
<div style="padding-top: 50px"/> |
|
|
|
</div> |
|
|
|
|
|
|
|
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.0/jquery.min.js"></script> |
|
<script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.3.1/js/bootstrap.min.js"></script> |
|
<script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mdb-ui-kit/4.0.0/mdb.min.js"></script> |
|
<script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.10/clipboard.min.js"></script> |
|
|
|
<script> |
|
document.querySelectorAll('pre').forEach(function (codeBlock) { |
|
var button = document.createElement('button'); |
|
button.className = 'copy-code-button'; |
|
button.type = 'button'; |
|
var s = codeBlock.innerText; |
|
button.setAttribute('data-clipboard-text',s); |
|
button.innerText = 'Copy'; |
|
|
|
|
|
codeBlock.classList.add('prettyprint'); |
|
|
|
codeBlock.appendChild(button); |
|
}); |
|
|
|
var clipboard = new ClipboardJS('.copy-code-button'); |
|
|
|
clipboard.on('success', function(e) { |
|
console.info('Action:', e.action); |
|
console.info('Text:', e.text); |
|
console.info('Trigger:', e.trigger); |
|
e.trigger.textContent = 'Copied'; |
|
window.setTimeout(function() { |
|
e.trigger.textContent = 'Copy'; |
|
}, 2000); |
|
e.clearSelection(); |
|
}); |
|
|
|
clipboard.on('error', function(e) { |
|
console.error('Action:', e.action); |
|
console.error('Trigger:', e.trigger); |
|
e.trigger.textContent = 'Error Copying'; |
|
window.setTimeout(function() { |
|
e.trigger.textContent = 'Copy'; |
|
}, 2000); |
|
e.clearSelection(); |
|
}); |
|
|
|
</script> |
|
|
|
</body> |
|
</html> |
|
|