datasets

squad.py
109 строк · 4.4 Кб
Перенос по словам
1
# Copyright 2020 The HuggingFace Datasets Authors.
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License");
4
# you may not use this file except in compliance with the License.
5
# You may obtain a copy of the License at
6
#
7
#     http://www.apache.org/licenses/LICENSE-2.0
8
#
9
# Unless required by applicable law or agreed to in writing, software
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
14
"""SQuAD metric."""
15

16
import datasets
17

18
from .evaluate import evaluate
19

20

21
_CITATION = """\
22
@inproceedings{Rajpurkar2016SQuAD10,
23
  title={SQuAD: 100, 000+ Questions for Machine Comprehension of Text},
24
  author={Pranav Rajpurkar and Jian Zhang and Konstantin Lopyrev and Percy Liang},
25
  booktitle={EMNLP},
26
  year={2016}
27
}
28
"""
29

30
_DESCRIPTION = """
31
This metric wrap the official scoring script for version 1 of the Stanford Question Answering Dataset (SQuAD).
32

33
Stanford Question Answering Dataset (SQuAD) is a reading comprehension dataset, consisting of questions posed by
34
crowdworkers on a set of Wikipedia articles, where the answer to every question is a segment of text, or span,
35
from the corresponding reading passage, or the question might be unanswerable.
36
"""
37

38
_KWARGS_DESCRIPTION = """
39
Computes SQuAD scores (F1 and EM).
40
Args:
41
    predictions: List of question-answers dictionaries with the following key-values:
42
        - 'id': id of the question-answer pair as given in the references (see below)
43
        - 'prediction_text': the text of the answer
44
    references: List of question-answers dictionaries with the following key-values:
45
        - 'id': id of the question-answer pair (see above),
46
        - 'answers': a Dict in the SQuAD dataset format
47
            {
48
                'text': list of possible texts for the answer, as a list of strings
49
                'answer_start': list of start positions for the answer, as a list of ints
50
            }
51
            Note that answer_start values are not taken into account to compute the metric.
52
Returns:
53
    'exact_match': Exact match (the normalized answer exactly match the gold answer)
54
    'f1': The F-score of predicted tokens versus the gold answer
55
Examples:
56

57
    >>> predictions = [{'prediction_text': '1976', 'id': '56e10a3be3433e1400422b22'}]
58
    >>> references = [{'answers': {'answer_start': [97], 'text': ['1976']}, 'id': '56e10a3be3433e1400422b22'}]
59
    >>> squad_metric = datasets.load_metric("squad")
60
    >>> results = squad_metric.compute(predictions=predictions, references=references)
61
    >>> print(results)
62
    {'exact_match': 100.0, 'f1': 100.0}
63
"""
64

65

66
@datasets.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
67
class Squad(datasets.Metric):
68
    def _info(self):
69
        return datasets.MetricInfo(
70
            description=_DESCRIPTION,
71
            citation=_CITATION,
72
            inputs_description=_KWARGS_DESCRIPTION,
73
            features=datasets.Features(
74
                {
75
                    "predictions": {"id": datasets.Value("string"), "prediction_text": datasets.Value("string")},
76
                    "references": {
77
                        "id": datasets.Value("string"),
78
                        "answers": datasets.features.Sequence(
79
                            {
80
                                "text": datasets.Value("string"),
81
                                "answer_start": datasets.Value("int32"),
82
                            }
83
                        ),
84
                    },
85
                }
86
            ),
87
            codebase_urls=["https://rajpurkar.github.io/SQuAD-explorer/"],
88
            reference_urls=["https://rajpurkar.github.io/SQuAD-explorer/"],
89
        )
90

91
    def _compute(self, predictions, references):
92
        pred_dict = {prediction["id"]: prediction["prediction_text"] for prediction in predictions}
93
        dataset = [
94
            {
95
                "paragraphs": [
96
                    {
97
                        "qas": [
98
                            {
99
                                "answers": [{"text": answer_text} for answer_text in ref["answers"]["text"]],
100
                                "id": ref["id"],
101
                            }
102
                            for ref in references
103
                        ]
104
                    }
105
                ]
106
            }
107
        ]
108
        score = evaluate(dataset=dataset, predictions=pred_dict)
109
        return score
110
datasets

Использование cookies