alvations commited on
Commit
71d157f
1 Parent(s): 9665d2d

Add my new, shiny module.

Browse files
Files changed (2) hide show
  1. llm_harness_mistral_arc.py +23 -62
  2. requirements.txt +2 -1
llm_harness_mistral_arc.py CHANGED
@@ -1,66 +1,23 @@
1
- # Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- """TODO: Add a description here."""
15
-
16
  import evaluate
17
  import datasets
18
-
19
 
20
  # TODO: Add BibTeX citation
21
- _CITATION = """\
22
- @InProceedings{huggingface:module,
23
- title = {A great new module},
24
- authors={huggingface, Inc.},
25
- year={2020}
26
- }
27
  """
28
 
29
  # TODO: Add description of the module here
30
- _DESCRIPTION = """\
31
- This new module is designed to solve this great ML task and is crafted with a lot of care.
32
  """
33
 
34
 
35
  # TODO: Add description of the arguments of the module here
36
  _KWARGS_DESCRIPTION = """
37
- Calculates how good are predictions given some references, using certain scores
38
- Args:
39
- predictions: list of predictions to score. Each predictions
40
- should be a string with tokens separated by spaces.
41
- references: list of reference for each prediction. Each
42
- reference should be a string with tokens separated by spaces.
43
- Returns:
44
- accuracy: description of the first score,
45
- another_score: description of the second score,
46
- Examples:
47
- Examples should be written in doctest format, and should illustrate how
48
- to use the function.
49
-
50
- >>> my_new_module = evaluate.load("my_new_module")
51
- >>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1])
52
- >>> print(results)
53
- {'accuracy': 1.0}
54
  """
55
 
56
- # TODO: Define external resources urls if needed
57
- BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
58
-
59
 
60
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
61
  class llm_harness_mistral_arc(evaluate.Metric):
62
- """TODO: Short description of my evaluation module."""
63
-
64
  def _info(self):
65
  # TODO: Specifies the evaluate.EvaluationModuleInfo object
66
  return evaluate.MetricInfo(
@@ -70,10 +27,14 @@ class llm_harness_mistral_arc(evaluate.Metric):
70
  citation=_CITATION,
71
  inputs_description=_KWARGS_DESCRIPTION,
72
  # This defines the format of each prediction and reference
73
- features=datasets.Features({
74
- 'predictions': datasets.Value('int64'),
75
- 'references': datasets.Value('int64'),
76
- }),
 
 
 
 
77
  # Homepage of the module for documentation
78
  homepage="http://module.homepage",
79
  # Additional links to the codebase or references
@@ -81,15 +42,15 @@ class llm_harness_mistral_arc(evaluate.Metric):
81
  reference_urls=["http://path.to.reference.url/new_module"]
82
  )
83
 
84
- def _download_and_prepare(self, dl_manager):
85
- """Optional: download external resources useful to compute the scores"""
86
- # TODO: Download external resources if needed
87
- pass
88
-
89
- def _compute(self, predictions, references):
90
- """Returns the scores"""
91
- # TODO: Compute the different scores of the module
92
- accuracy = sum(i == j for i, j in zip(predictions, references)) / len(predictions)
93
- return {
94
- "accuracy": accuracy,
95
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import evaluate
2
  import datasets
3
+ import lm_eval
4
 
5
  # TODO: Add BibTeX citation
6
+ _CITATION = """
 
 
 
 
 
7
  """
8
 
9
  # TODO: Add description of the module here
10
+ _DESCRIPTION = """
 
11
  """
12
 
13
 
14
  # TODO: Add description of the arguments of the module here
15
  _KWARGS_DESCRIPTION = """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  """
17
 
 
 
 
18
 
19
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
20
  class llm_harness_mistral_arc(evaluate.Metric):
 
 
21
  def _info(self):
22
  # TODO: Specifies the evaluate.EvaluationModuleInfo object
23
  return evaluate.MetricInfo(
 
27
  citation=_CITATION,
28
  inputs_description=_KWARGS_DESCRIPTION,
29
  # This defines the format of each prediction and reference
30
+ features=[
31
+ datasets.Features(
32
+ {
33
+ "pretrained": datasets.Value("string", id="sequence"),
34
+ "tasks": datasets.Value(datasets.Value("string", id="sequence"), id="tasks"),
35
+ }
36
+ )
37
+ ],
38
  # Homepage of the module for documentation
39
  homepage="http://module.homepage",
40
  # Additional links to the codebase or references
 
42
  reference_urls=["http://path.to.reference.url/new_module"]
43
  )
44
 
45
+ def _compute(self, pretrained, tasks):
46
+ outputs = lm_eval.simple_evaluate(
47
+ model="hf",
48
+ model_args={"pretrained":pretrained},
49
+ tasks=tasks,
50
+ num_fewshot=0,
51
+ )
52
+ results = {}
53
+ for task in outputs['results']:
54
+ results[task] = {'acc':outputs['results'][task]['acc,none'],
55
+ 'acc_norm':outputs['results'][task]['acc_norm,none']}
56
+ return results
requirements.txt CHANGED
@@ -1 +1,2 @@
1
- git+https://github.com/huggingface/evaluate@main
 
 
1
+ git+https://github.com/huggingface/evaluate@main
2
+ git+https://github.com/EleutherAI/lm-evaluation-harness@main