datasets
94 строки · 3.2 Кб
1# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""Accuracy metric for the Mathematics Aptitude Test of Heuristics (MATH) dataset."""
15
16import math_equivalence # From: git+https://github.com/hendrycks/math.git
17
18import datasets
19
20
21_CITATION = """\
22@article{hendrycksmath2021,
23title={Measuring Mathematical Problem Solving With the MATH Dataset},
24author={Dan Hendrycks
25and Collin Burns
26and Saurav Kadavath
27and Akul Arora
28and Steven Basart
29and Eric Tang
30and Dawn Song
31and Jacob Steinhardt},
32journal={arXiv preprint arXiv:2103.03874},
33year={2021}
34}
35"""
36
37
38_DESCRIPTION = """\
39This metric is used to assess performance on the Mathematics Aptitude Test of Heuristics (MATH) dataset.
40It first canonicalizes the inputs (e.g., converting "1/2" to "\\frac{1}{2}") and then computes accuracy.
41"""
42
43
44_KWARGS_DESCRIPTION = r"""
45Calculates accuracy after canonicalizing inputs.
46
47Args:
48predictions: list of predictions to score. Each prediction
49is a string that contains natural language and LaTex.
50references: list of reference for each prediction. Each
51reference is a string that contains natural language
52and LaTex.
53Returns:
54accuracy: accuracy after canonicalizing inputs
55(e.g., converting "1/2" to "\\frac{1}{2}")
56
57Examples:
58>>> metric = datasets.load_metric("competition_math")
59>>> results = metric.compute(references=["\\frac{1}{2}"], predictions=["1/2"])
60>>> print(results)
61{'accuracy': 1.0}
62"""
63
64
65@datasets.utils.file_utils.add_end_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
66class CompetitionMathMetric(datasets.Metric):
67"""Accuracy metric for the MATH dataset."""
68
69def _info(self):
70return datasets.MetricInfo(
71description=_DESCRIPTION,
72citation=_CITATION,
73inputs_description=_KWARGS_DESCRIPTION,
74features=datasets.Features(
75{
76"predictions": datasets.Value("string"),
77"references": datasets.Value("string"),
78}
79),
80# Homepage of the metric for documentation
81homepage="https://github.com/hendrycks/math",
82# Additional links to the codebase or references
83codebase_urls=["https://github.com/hendrycks/math"],
84)
85
86def _compute(self, predictions, references):
87"""Returns the scores"""
88n_correct = 0.0
89for i, j in zip(predictions, references):
90n_correct += 1.0 if math_equivalence.is_equiv(i, j) else 0.0
91accuracy = n_correct / len(predictions)
92return {
93"accuracy": accuracy,
94}
95