google-research
115 строк · 3.8 Кб
1# coding=utf-8
2# Copyright 2024 The Google Research Authors.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16r"""Evaluation program for split+rephrase sentence decomposition.
17
18Used for calculating metrics reported in the paper:
19Learning to Split and Rephrase From Wikipedia Edit History, (Botha et al., 2018)
20
21Metrics included:
22- Multi-reference corpus-level BLEU
23- Macro-averaged sentence-level BLEU
24- Length-ratios as reported in the original WebSplit paper:
25output_sentences per input_sentence, tokens per output_sentence
26
27Scores are written as tab-separated rows to standard output.
28
29Usage:
30$ cd google_research
31$ python -m wiki_split_bleu_eval.score_main \
32--gold="/path/to/references.tsv" \
33--pred="/path/to/predictions.txt" \
34
35The files should be parallel line for line.
36
37- predictions: system output for decomposing a sentence into one or more simpler
38sentences. (The code refers to each such simpler sentence as a parcel.)
39Format:
40parcel_1 SEP parcel_2 ...
41For example, a decomposition of "I think , therefore I am ." into two
42sentences (parcels) should be represented as:
43I think . <::::> Therefore I am .
44
45- gold: ground truth decomposition(s) for the corresponding line.
46Format:
47decomposition_1 <TAB> decomposition_2 [<TAB> decomposition_3 ...]
48where each decomposition has the format
49parcel_1 SEP parcel_2 ...
50Example of two alternative reference decompositions:
51I think . <::::> Therefore I am . <TAB> I think . <::::> Thus I am .
52
53The --parcel_sep flag controls the <::::>-separator.
54"""
55
56from __future__ import absolute_import57from __future__ import division58from __future__ import print_function59
60import sys61
62from absl import app63from absl import flags64from absl import logging65
66from wiki_split_bleu_eval import score_lib67
68flags.DEFINE_string(69'gold', None,70'Gold (ground-truth) decompositions. Single line per instance: '71'references are split by tabs and sentence boundaries by --parcel_sep.')72flags.DEFINE_string(73'pred', None,74'Predicted decompositions. Single line per instance, containing one '75'decomposition with sentence boundaries split by --parcel_sep.')76flags.DEFINE_string('parcel_sep', '<::::>',77'Separator between parcels, for parallel-mode')78flags.DEFINE_string('output_sep', '\t', 'Delimiter for results output')79flags.DEFINE_bool('debug', False, 'output debug info')80
81FLAGS = flags.FLAGS82
83
84def main(unused_argv):85logging.info('Scoring file "%s"', FLAGS.pred)86
87with open(FLAGS.gold, 'r') as gold_fd:88gold = score_lib.ReadParcels(gold_fd, parcel_sep=FLAGS.parcel_sep)89
90with open(FLAGS.pred, 'r') as pred_fd:91pred = score_lib.ReadParcels(92pred_fd, parcel_sep=FLAGS.parcel_sep, reduce_to_single_analysis=True)93
94results = {}95results = score_lib.PerformEval(gold=gold, pred=pred, debug=FLAGS.debug)96
97results['_gold_file'] = FLAGS.gold98results['_pred_file'] = FLAGS.pred99
100# Output scoring results in TSV format.101def as_tsv(results):102"""Helper to format dict as TSV."""103results_list = [104'{}{}{}'.format(k, FLAGS.output_sep, v)105for k, v in sorted(results.items(), key=lambda x: x[0])106]107return '\n'.join(results_list)108
109tsv_results = as_tsv(results)110print(tsv_results)111
112
113if __name__ == '__main__':114assert sys.version_info[0] >= 3, 'This code targets Python 3.'115app.run(main)116