Spaces:
Running
Running
kwargs isn't passed down when using evaluate.combine()
#2
by
alvations
- opened
Using the default .load()
function works:
import evaluate
bertscore = evaluate.load("bertscore")
predictions = ["hello there", "general kenobi"]
references = ["hello there", "general kenobi"]
results = bertscore.compute(predictions=predictions, references=references, lang="en")
results
[out]:
{'precision': [1.000000238418579, 0.9999999403953552],
'recall': [1.000000238418579, 0.9999999403953552],
'f1': [1.000000238418579, 0.9999999403953552],
'hashcode': 'roberta-large_L17_no-idf_version=0.3.12(hug_trans=4.20.1)'}
But when using .combine()
, e.g.
import evaluate
bertscore = evaluate.combine(["bertscore"])
predictions = ["hello there", "general kenobi"]
references = ["hello there", "general kenobi"]
results = bertscore.compute(predictions=predictions, references=references, lang="en")
results
it throws the error below, it looks like the kwargs isn't passed down:
[out]:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_27/994019662.py in <module>
3 predictions = ["hello there", "general kenobi"]
4 references = ["hello there", "general kenobi"]
----> 5 results = bertscore.compute(predictions=predictions, references=references, lang="en")
6 results
/opt/conda/lib/python3.7/site-packages/evaluate/module.py in compute(self, predictions, references, **kwargs)
860 batch = {"predictions": predictions, "references": references, **kwargs}
861 batch = {input_name: batch[input_name] for input_name in evaluation_module._feature_names()}
--> 862 results.append(evaluation_module.compute(**batch))
863
864 return self._merge_results(results)
/opt/conda/lib/python3.7/site-packages/evaluate/module.py in compute(self, predictions, references, **kwargs)
442 inputs = {input_name: self.data[input_name] for input_name in self._feature_names()}
443 with temp_seed(self.seed):
--> 444 output = self._compute(**inputs, **compute_kwargs)
445
446 if self.buf_writer is not None:
~/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--bertscore/cf4907b18f8f741f202232c0f8009a3bd49ff98802c245abcb6ea51a37a8c05b/bertscore.py in _compute(self, predictions, references, lang, model_type, num_layers, verbose, idf, device, batch_size, nthreads, all_layers, rescale_with_baseline, baseline_path, use_fast_tokenizer)
169 if lang is None:
170 raise ValueError(
--> 171 "Either 'lang' (e.g. 'en') or 'model_type' (e.g. 'microsoft/deberta-xlarge-mnli')"
172 " must be specified"
173 )
ValueError: Either 'lang' (e.g. 'en') or 'model_type' (e.g. 'microsoft/deberta-xlarge-mnli') must be specified