datasets

competition_math.py
94 строки · 3.2 Кб
Перенос по словам
1
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License");
4
# you may not use this file except in compliance with the License.
5
# You may obtain a copy of the License at
6
#
7
#     http://www.apache.org/licenses/LICENSE-2.0
8
#
9
# Unless required by applicable law or agreed to in writing, software
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
14
"""Accuracy metric for the Mathematics Aptitude Test of Heuristics (MATH) dataset."""
15

16
import math_equivalence  # From: git+https://github.com/hendrycks/math.git
17

18
import datasets
19

20

21
_CITATION = """\
22
@article{hendrycksmath2021,
23
  title={Measuring Mathematical Problem Solving With the MATH Dataset},
24
  author={Dan Hendrycks
25
    and Collin Burns
26
    and Saurav Kadavath
27
    and Akul Arora
28
    and Steven Basart
29
    and Eric Tang
30
    and Dawn Song
31
    and Jacob Steinhardt},
32
  journal={arXiv preprint arXiv:2103.03874},
33
  year={2021}
34
}
35
"""
36

37

38
_DESCRIPTION = """\
39
This metric is used to assess performance on the Mathematics Aptitude Test of Heuristics (MATH) dataset.
40
It first canonicalizes the inputs (e.g., converting "1/2" to "\\frac{1}{2}") and then computes accuracy.
41
"""
42

43

44
_KWARGS_DESCRIPTION = r"""
45
Calculates accuracy after canonicalizing inputs.
46

47
Args:
48
    predictions: list of predictions to score. Each prediction
49
        is a string that contains natural language and LaTex.
50
    references: list of reference for each prediction. Each
51
        reference is a string that contains natural language
52
        and LaTex.
53
Returns:
54
    accuracy: accuracy after canonicalizing inputs
55
        (e.g., converting "1/2" to "\\frac{1}{2}")
56

57
Examples:
58
    >>> metric = datasets.load_metric("competition_math")
59
    >>> results = metric.compute(references=["\\frac{1}{2}"], predictions=["1/2"])
60
    >>> print(results)
61
    {'accuracy': 1.0}
62
"""
63

64

65
@datasets.utils.file_utils.add_end_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
66
class CompetitionMathMetric(datasets.Metric):
67
    """Accuracy metric for the MATH dataset."""
68

69
    def _info(self):
70
        return datasets.MetricInfo(
71
            description=_DESCRIPTION,
72
            citation=_CITATION,
73
            inputs_description=_KWARGS_DESCRIPTION,
74
            features=datasets.Features(
75
                {
76
                    "predictions": datasets.Value("string"),
77
                    "references": datasets.Value("string"),
78
                }
79
            ),
80
            # Homepage of the metric for documentation
81
            homepage="https://github.com/hendrycks/math",
82
            # Additional links to the codebase or references
83
            codebase_urls=["https://github.com/hendrycks/math"],
84
        )
85

86
    def _compute(self, predictions, references):
87
        """Returns the scores"""
88
        n_correct = 0.0
89
        for i, j in zip(predictions, references):
90
            n_correct += 1.0 if math_equivalence.is_equiv(i, j) else 0.0
91
        accuracy = n_correct / len(predictions)
92
        return {
93
            "accuracy": accuracy,
94
        }
95
datasets

Использование cookies