google-research

Форк
0
/
sample_molecules.py 
99 строк · 3.2 Кб
1
# coding=utf-8
2
# Copyright 2024 The Google Research Authors.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16
#!/usr/bin/python
17
r"""Program to sample molecules from a given stoichiometry.
18

19
Example usage:
20
prefix=3_COFH
21
./sample_molecules.py --min_samples=3000 \
22
    --stoich_file=stoichs/${prefix}.stoich \
23
    --out_file=weighted/${prefix}.graphml
24
"""
25

26
import sys
27
import timeit
28

29
from absl import app
30
from absl import flags
31

32
from graph_sampler import graph_io
33
from graph_sampler import molecule_sampler
34
from graph_sampler import stoichiometry
35

36
FLAGS = flags.FLAGS
37

38
flags.DEFINE_string('stoich_file', None, 'Csv file with desired stoichiometry.')
39
flags.DEFINE_integer('min_samples', 10000, 'Minimum number of samples.')
40
flags.DEFINE_float(
41
    'relative_precision', 0.01,
42
    'Keep sampling until (std_err / estimate) is less than this number.')
43
flags.DEFINE_float(
44
    'min_uniform_proportion', None,
45
    'Keep sampling until this this set of samples can be rejected down to a '
46
    'uniform sample containing at least this proportion of the estimated '
47
    'number of graphs.')
48
flags.DEFINE_string('out_file', None, 'Output file path.')
49
flags.DEFINE_string('seed', None, 'Seed used for random number generation.')
50

51

52
def main(argv):
53
  if len(argv) > 1:
54
    raise RuntimeError(f'Unexpected arguments: {argv[1:]}')
55

56
  print(f'Reading stoich from: {FLAGS.stoich_file}')
57
  with open(FLAGS.stoich_file) as f:
58
    stoich = stoichiometry.read(f)
59

60
  mol_sampler = molecule_sampler.MoleculeSampler(
61
      stoich,
62
      min_samples=FLAGS.min_samples,
63
      min_uniform_proportion=FLAGS.min_uniform_proportion,
64
      relative_precision=FLAGS.relative_precision,
65
      rng_seed=FLAGS.seed)
66
  start_time = timeit.default_timer()
67
  num = 0
68

69
  def print_progress():
70
    stats = mol_sampler.stats()
71
    std_err_frac = stats['num_graphs_std_err'] / stats['estimated_num_graphs']
72
    est_proportion = (
73
        stats['num_after_rejection'] / stats['estimated_num_graphs'])
74
    print(f'Sampled {stats["num_samples"]} ({num} valid), '
75
          f'{timeit.default_timer() - start_time:.03f} sec, '
76
          f'{stats["estimated_num_graphs"]:.3E} graphs '
77
          f'(std err={100 * std_err_frac:.3f}%), '
78
          f'proportion after rejection={est_proportion:.3E}')
79
    sys.stdout.flush()
80

81
  with open(FLAGS.out_file, 'w') as out:
82
    for graph in mol_sampler:
83
      graph_io.write_graph(graph, out)
84
      num += 1
85
      if num % 10000 == 0:
86
        print_progress()
87

88
    stats = mol_sampler.stats()
89
    stats['elapsed time'] = timeit.default_timer() - start_time
90
    graph_io.write_stats(stats, out)
91

92
  print('Done generating molecules!')
93
  if num % 10000 != 0:
94
    print_progress()
95

96

97
if __name__ == '__main__':
98
  flags.mark_flags_as_required(['stoich_file', 'out_file'])
99
  app.run(main)
100

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.