google-research

evaluate_chart_to_table.py
92 строки · 2.8 Кб
Перенос по словам
1
# coding=utf-8
2
# Copyright 2024 The Google Research Authors.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16
"""Binary to compute table equality metrics."""
17

18
from collections.abc import Sequence, Mapping
19
import csv
20
import json
21
import os
22
import zipfile
23

24
from absl import app
25
from absl import flags
26
import tensorflow as tf
27

28
from deplot import metrics
29

30

31
_PATH = flags.DEFINE_string(
32
    'path', None, 'Directory containing tables')
33

34
_JSONL = flags.DEFINE_string(
35
    'jsonl', None, 'JSONL directory with predictions')
36

37

38
def _to_markdown(bts):
39
  reader = csv.reader(bts.decode().splitlines(), delimiter=',')
40
  parts = ['title |'] + [' | '.join(row) for row in reader]
41
  return '\n'.join(parts)
42

43

44
def _get_files(suffix):
45
  with zipfile.ZipFile(tf.io.gfile.GFile(
46
      f'{_PATH.value}_{suffix}.zip', 'rb')) as f:
47
    return {os.path.basename(name): f.read(name) for name in f.namelist()
48
            if name.endswith('.csv')}
49

50

51
def main(argv):
52
  if len(argv) > 1:
53
    raise app.UsageError('Too many command-line arguments.')
54

55
  if _PATH.value and _JSONL.value:
56
    raise ValueError('Only one path or value can be specified.')
57

58
  targets, predictions = [], []
59

60
  if _PATH.value:
61
    targets_by_id = _get_files('targets')
62
    predictions_by_id = _get_files('predictions')
63

64
    with tf.io.gfile.GFile(_PATH.value + '.jsonl', 'w') as f:
65
      for k in sorted(targets_by_id.keys()):
66
        target = _to_markdown(targets_by_id[k])
67
        prediction = _to_markdown(predictions_by_id[k])
68
        targets.append([target])
69
        predictions.append(prediction)
70
        line = {'input': {'id': k}, 'target': target, 'prediction': prediction}
71
        f.write(json.dumps(line) + '\n')
72
  elif _JSONL.value:
73
    with tf.io.gfile.GFile(_JSONL.value) as f:
74
      for line in f:
75
        example = json.loads(line)
76
        targets.append(example['target'])
77
        predictions.append(example['prediction'])
78
  else:
79
    raise ValueError('No input method specified.')
80

81
  metric = {}
82
  metric.update(metrics.table_datapoints_precision_recall(targets, predictions))
83
  metric.update(metrics.table_number_accuracy(targets, predictions))
84
  metric_log = json.dumps(metric, indent=2)
85
  print(metric_log)
86
  if _PATH.value:
87
    with tf.io.gfile.GFile(_PATH.value + '-metrics.json', 'w') as f:
88
      f.write(metric_log)
89

90

91
if __name__ == '__main__':
92
  app.run(main)
93
google-research

Использование cookies