google-research

collect_demos.py
174 строки · 5.9 Кб
Перенос по словам
1
# coding=utf-8
2
# Copyright 2024 The Google Research Authors.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16
"""Collect demonstrations for Adroit using an expert policy."""
17

18
import os
19
import pickle
20

21
from absl import app
22
from absl import flags
23
import gym
24
import numpy as np
25

26
from rrlfd import adroit_ext  # pylint: disable=unused-import
27
from rrlfd.bc import pickle_dataset
28
from tensorflow.io import gfile
29

30

31
flags.DEFINE_enum('task', None, ['door', 'hammer', 'pen', 'relocate'],
32
                  'Adroit task for which to collect demonstrations.')
33
flags.DEFINE_integer('num_episodes', 100, 'Number of episodes to record.')
34
flags.DEFINE_integer('seed', 0, 'Experiment seed.')
35
flags.DEFINE_boolean('increment_seed', False,
36
                     'If True, increment seed at every episode.')
37
flags.DEFINE_integer('image_size', None, 'Size of rendered images.')
38

39
flags.DEFINE_string('expert_policy_dir', None,
40
                    'Path to pickle file with expert policy.')
41
flags.DEFINE_boolean('record_failed', False,
42
                     'If True, save failed demonstrations.')
43
flags.DEFINE_string('logdir', None, 'Location to save demonstrations to.')
44
flags.DEFINE_string('run_id', None,
45
                    'If set, a custom string to append to saved demonstrations '
46
                    'file name.')
47

48
FLAGS = flags.FLAGS
49

50

51
def env_loop(env, agent, num_episodes, log_path, record_failed, seed,
52
             increment_seed, compress_images=True):
53
  """Loop for collecting demonstrations with an agent in a Gym environment."""
54
  if log_path is None:
55
    log_f = None
56
    success_f = None
57
    demo_writer = None
58
  else:
59
    log_f = gfile.GFile(log_path + '_log.txt', 'w')
60
    success_f = gfile.GFile(log_path + '_success.txt', 'w')
61
    demo_writer = pickle_dataset.DemoWriter(log_path + '.pkl', compress_images)
62
    print('Writing demos to', log_path + '.pkl')
63
  e = 0
64
  # Counter to keep track of seed offset, if not recording failed episodes.
65
  skipped_seeds = 0
66
  num_successes = 0
67
  num_attempts = 0
68
  min_reward, max_reward = np.inf, -np.inf
69
  while e < num_episodes:
70
    if e % 10 == 0 and e > 0:
71
      print(f'Episode {e} / {num_episodes}; '
72
            f'Success rate {num_successes} / {num_attempts}')
73
    if increment_seed:
74
      env.seed(seed + skipped_seeds + e)
75
    obs = env.reset()
76

77
    done = False
78
    _, agent_info = agent.get_action(obs['original_obs'])
79
    action = agent_info['evaluation']
80
    observations = []
81
    actions = []
82
    rewards = []
83
    # For envs with non-Markovian success criteria, track required fields.
84
    goals_achieved = []
85

86
    while not done:
87
      observations.append(obs)
88
      actions.append(action)
89
      obs, reward, done, info = env.step(action)
90
      rewards.append(reward)
91
      min_reward = min(min_reward, reward)
92
      max_reward = max(max_reward, reward)
93
      _, agent_info = agent.get_action(obs['original_obs'])
94
      action = agent_info['evaluation']
95
      if 'goal_achieved' in info:
96
        goals_achieved.append(info['goal_achieved'])
97

98
    # Environment defines success criteria based on full episode.
99
    success_percentage = env.evaluate_success(
100
        [{'env_infos': {'goal_achieved': goals_achieved}}])
101
    success = bool(success_percentage)
102

103
    num_successes += int(success)
104
    num_attempts += 1
105
    if success:
106
      print(f'{e}: success')
107
      if log_f is not None:
108
        log_f.write(f'{e}: success\n')
109
        log_f.flush()
110
      if success_f is not None:
111
        success_f.write('success\n')
112
        success_f.flush()
113
    else:
114
      if 'TimeLimit.truncated' in info and info['TimeLimit.truncated']:
115
        print(f'{e}: failure: time limit')
116
      else:
117
        print(f'{e}: failure')
118
      if log_f is not None:
119
        if 'TimeLimit.truncated' in info  and info['TimeLimit.truncated']:
120
          log_f.write(f'{e}: failure: time limit \n')
121
        else:
122
          log_f.write(f'{e}: failure\n')
123
        log_f.flush()
124
      if success_f is not None:
125
        success_f.write('failure\n')
126
        success_f.flush()
127

128
    if success or record_failed:
129
      e += 1
130
      if demo_writer is not None:
131
        demo_writer.write_episode(observations, actions, rewards)
132
    elif not record_failed:
133
      skipped_seeds += 1
134

135
  print(f'Done; Success rate {num_successes} / {num_attempts}')
136
  print('min reward', min_reward)
137
  print('max reward', max_reward)
138
  if log_f is not None:
139
    log_f.write(f'Done; Success rate {num_successes} / {num_attempts}\n')
140
    log_f.write(f'min reward {min_reward}\n')
141
    log_f.write(f'max reward {max_reward}\n')
142
    log_f.close()
143

144

145
def main(_):
146
  with gfile.GFile(
147
      os.path.join(FLAGS.expert_policy_dir, f'{FLAGS.task}.pickle'), 'rb') as f:
148
    agent = pickle.load(f)
149
  env = gym.make(f'visual-{FLAGS.task}-v0')
150
  env.seed(FLAGS.seed)
151
  im_size = FLAGS.image_size
152
  if im_size is not None:
153
    env.env.im_size = im_size
154

155
  if FLAGS.logdir is None:
156
    log_path = None
157
  else:
158
    logdir = os.path.join(FLAGS.logdir, f'{FLAGS.task}')
159
    run_id = '' if FLAGS.run_id is None else '_' + FLAGS.run_id
160
    if FLAGS.record_failed:
161
      run_id += '_all'
162
    if im_size is not None and im_size != adroit_ext.camera_kwargs['im_size']:
163
      run_id += f'_{im_size}px'
164
    increment_str = 'i' if FLAGS.increment_seed else ''
165
    log_path = os.path.join(
166
        logdir, f's{FLAGS.seed}{increment_str}_e{FLAGS.num_episodes}{run_id}')
167
    gfile.makedirs(os.path.dirname(log_path))
168
    print('Writing to', log_path)
169
  env_loop(env, agent, FLAGS.num_episodes, log_path, FLAGS.record_failed,
170
           FLAGS.seed, FLAGS.increment_seed)
171

172

173
if __name__ == '__main__':
174
  app.run(main)
175
google-research

Использование cookies