google-research
438 строк · 13.8 Кб
1# coding=utf-8
2# Copyright 2024 The Google Research Authors.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16"""Functions shared among files under word2act/data_generation."""
17
18from __future__ import absolute_import19from __future__ import division20from __future__ import print_function21
22import collections23import os24
25import attr26from enum import Enum27import numpy as np28import tensorflow.compat.v1 as tf # tf29
30from seq2act.data_generation import config31from seq2act.data_generation import view_hierarchy32
33
34gfile = tf.gfile35
36
37@attr.s38class MaxValues(object):39"""Represents max values for a task and UI."""40
41# For instrction42max_word_num = attr.ib(default=None)43max_word_length = attr.ib(default=None)44
45# For UI objects46max_ui_object_num = attr.ib(default=None)47max_ui_object_word_num = attr.ib(default=None)48max_ui_object_word_length = attr.ib(default=None)49
50def update(self, other):51"""Update max value from another MaxValues instance.52
53This will be used when want to merge several MaxValues instances:
54
55max_values_list = ...
56result = MaxValues()
57for v in max_values_list:
58result.update(v)
59
60Then `result` contains merged max values in each field.
61
62Args:
63other: another MaxValues instance, contains updated data.
64"""
65self.max_word_num = max(self.max_word_num, other.max_word_num)66self.max_word_length = max(self.max_word_length, other.max_word_length)67self.max_ui_object_num = max(self.max_ui_object_num,68other.max_ui_object_num)69self.max_ui_object_word_num = max(self.max_ui_object_word_num,70other.max_ui_object_word_num)71self.max_ui_object_word_length = max(self.max_ui_object_word_length,72other.max_ui_object_word_length)73
74
75class ActionRules(Enum):76"""The rule_id to generate synthetic action."""77SINGLE_OBJECT_RULE = 078GRID_CONTEXT_RULE = 179NEIGHBOR_CONTEXT_RULE = 280SWIPE_TO_OBJECT_RULE = 381SWIPE_TO_DIRECTION_RULE = 482REAL = 5 # The action is not generated, but a real user action.83CROWD_COMPUTE = 684DIRECTION_VERB_RULE = 7 # For win, "click button under some tab/combobox85CONSUMED_MULTI_STEP = 8 # For win, if the target verb is not direction_verb86UNCONSUMED_MULTI_STEP = 987NO_VERB_RULE = 1088
89
90class ActionTypes(Enum):91"""The action types and ids of Android actions."""92CLICK = 293INPUT = 394SWIPE = 495CHECK = 596UNCHECK = 697LONG_CLICK = 798OTHERS = 899GO_HOME = 9100GO_BACK = 10101
102
103VERB_ID_MAP = {104'check': ActionTypes.CHECK,105'find': ActionTypes.SWIPE,106'navigate': ActionTypes.SWIPE,107'uncheck': ActionTypes.UNCHECK,108'head to': ActionTypes.SWIPE,109'enable': ActionTypes.CHECK,110'turn on': ActionTypes.CHECK,111'locate': ActionTypes.SWIPE,112'disable': ActionTypes.UNCHECK,113'tap and hold': ActionTypes.LONG_CLICK,114'long press': ActionTypes.LONG_CLICK,115'look': ActionTypes.SWIPE,116'press and hold': ActionTypes.LONG_CLICK,117'turn it on': ActionTypes.CHECK,118'turn off': ActionTypes.UNCHECK,119'switch on': ActionTypes.CHECK,120'visit': ActionTypes.SWIPE,121'hold': ActionTypes.LONG_CLICK,122'switch off': ActionTypes.UNCHECK,123'head': ActionTypes.SWIPE,124'head over': ActionTypes.SWIPE,125'long-press': ActionTypes.LONG_CLICK,126'un-click': ActionTypes.UNCHECK,127'tap': ActionTypes.CLICK,128'check off': ActionTypes.UNCHECK,129# 'power on': 21130}
131
132
133class WinActionTypes(Enum):134"""The action types and ids of windows actions."""135LEFT_CLICK = 2136RIGHT_CLICK = 3137DOUBLE_CLICK = 4138INPUT = 5139
140
141@attr.s142class Action(object):143"""The class for a word2act action."""144instruction_str = attr.ib(default=None)145verb_str = attr.ib(default=None)146obj_desc_str = attr.ib(default=None)147input_content_str = attr.ib(default=None)148action_type = attr.ib(default=None)149action_rule = attr.ib(default=None)150target_obj_idx = attr.ib(default=None)151obj_str_pos = attr.ib(default=None)152input_str_pos = attr.ib(default=None)153verb_str_pos = attr.ib(default=None)154# start/end position of one whole step155step_str_pos = attr.ib(default=[0, 0])156# Defalt action is 1-step consumed action157is_consumed = attr.ib(default=True)158
159def __eq__(self, other):160if not isinstance(other, Action):161return NotImplemented162return self.instruction_str == other.instruction_str163
164def is_valid(self):165"""Does valid check for action instance.166
167Returns true when any component is None or obj_desc_str is all spaces.
168
169Returns:
170a boolean
171"""
172invalid_obj_pos = (np.array(self.obj_str_pos) == 0).all()173if (not self.instruction_str or invalid_obj_pos or174not self.obj_desc_str.strip()):175return False176
177return True178
179def has_valid_input(self):180"""Does valid check for input positions.181
182Returns true when input_str_pos is not all default value.
183
184Returns:
185a boolean
186"""
187return (self.input_str_pos != np.array([188config.LABEL_DEFAULT_VALUE_INT, config.LABEL_DEFAULT_VALUE_INT189])).any()190
191def regularize_strs(self):192"""Trims action instance's obj_desc_str, input_content_str, verb_str."""193self.obj_desc_str = self.obj_desc_str.strip()194self.input_content_str = self.input_content_str.strip()195self.verb_str = self.verb_str.strip()196
197def convert_to_lower_case(self):198self.instruction_str = self.instruction_str.lower()199self.obj_desc_str = self.obj_desc_str.lower()200self.input_content_str = self.input_content_str.lower()201self.verb_str = self.verb_str.lower()202
203
204@attr.s205class ActionEvent(object):206"""This class defines ActionEvent class.207
208ActionEvent is high level event summarized from low level android event logs.
209This example shows the android event logs and the extracted ActionEvent
210object:
211
212Android Event Logs:
213[ 42.407808] EV_ABS ABS_MT_TRACKING_ID 00000000
214[ 42.407808] EV_ABS ABS_MT_TOUCH_MAJOR 00000004
215[ 42.407808] EV_ABS ABS_MT_PRESSURE 00000081
216[ 42.407808] EV_ABS ABS_MT_POSITION_X 00004289
217[ 42.407808] EV_ABS ABS_MT_POSITION_Y 00007758
218[ 42.407808] EV_SYN SYN_REPORT 00000000
219[ 42.453256] EV_ABS ABS_MT_PRESSURE 00000000
220[ 42.453256] EV_ABS ABS_MT_TRACKING_ID ffffffff
221[ 42.453256] EV_SYN SYN_REPORT 00000000
222
223This log can be generated from this command during runing android emulator:
224adb shell getevent -lt /dev/input/event1
225
226If screen pixel size is [480,800], this is the extracted ActionEvent Object:
227ActionEvent(
228event_time = 42.407808
229action_type = ActionTypes.CLICK
230action_object_id = -1
231coordinates_x = [17033,]
232coordinates_y = [30552,]
233coordinates_x_pixel = [249,]
234coordinates_y_pixel = [747,]
235action_params = []
236)
237"""
238
239event_time = attr.ib()240action_type = attr.ib()241coordinates_x = attr.ib()242coordinates_y = attr.ib()243action_params = attr.ib()244# These fields will be generated by public method update_info_from_screen()245coordinates_x_pixel = None246coordinates_y_pixel = None247object_id = config.LABEL_DEFAULT_INVALID_INT248leaf_nodes = None # If dedup, the nodes here will be less than XML249debug_target_object_word_sequence = None250
251def update_info_from_screen(self, screen_info, dedup=False):252"""Updates action event attributes from screen_info.253
254Updates coordinates_x(y)_pixel and object_id from the screen_info proto.
255
256Args:
257screen_info: ScreenInfo protobuf
258dedup: whether dedup the UI objs with same text or content desc.
259Raises:
260ValueError when fail to find object id.
261"""
262self.update_norm_coordinates((config.SCREEN_WIDTH, config.SCREEN_HEIGHT))263vh = view_hierarchy.ViewHierarchy()264vh.load_xml(screen_info.view_hierarchy.xml.encode('utf-8'))265if dedup:266vh.dedup((self.coordinates_x_pixel[0], self.coordinates_y_pixel[0]))267self.leaf_nodes = vh.get_leaf_nodes()268ui_object_list = vh.get_ui_objects()269self._update_object_id(ui_object_list)270
271def _update_object_id(self, ui_object_list):272"""Updates ui object index from view_hierarchy.273
274If point(X,Y) surrounded by multiple UI objects, select the one with
275smallest area.
276
277Args:
278ui_object_list: .
279Raises:
280ValueError when fail to find object id.
281"""
282smallest_area = -1283for index, ui_obj in enumerate(ui_object_list):284box = ui_obj.bounding_box285if (box.x1 <= self.coordinates_x_pixel[0] <= box.x2 and286box.y1 <= self.coordinates_y_pixel[0] <= box.y2):287area = (box.x2 - box.x1) * (box.y2 - box.y1)288if smallest_area == -1 or area < smallest_area:289self.object_id = index290self.debug_target_object_word_sequence = ui_obj.word_sequence291smallest_area = area292
293if smallest_area == -1:294raise ValueError(('Object id not found: x,y=%d,%d coordinates fail to '295'match every UI bounding box') %296(self.coordinates_x_pixel[0],297self.coordinates_y_pixel[0]))298
299def update_norm_coordinates(self, screen_size):300"""Update coordinates_x(y)_norm according to screen_size.301
302self.coordinate_x is scaled between [0, ANDROID_LOG_MAX_ABS_X]
303self.coordinate_y is scaled between [0, ANDROID_LOG_MAX_ABS_Y]
304This function recovers coordinate of android event logs back to coordinate
305in real screen's pixel level.
306
307coordinates_x_pixel = coordinates_x/ANDROID_LOG_MAX_ABS_X*horizontal_pixel
308coordinates_y_pixel = coordinates_y/ANDROID_LOG_MAX_ABS_Y*vertical_pixel
309
310For example,
311ANDROID_LOG_MAX_ABS_X = ANDROID_LOG_MAX_ABS_Y = 32676
312coordinate_x = [17033, ]
313object_cords_y = [30552, ]
314screen_size = (480, 800)
315Then the updated pixel coordinates are as follow:
316coordinates_x_pixel = [250, ]
317coordinates_y_pixel = [747, ]
318
319Args:
320screen_size: a tuple of screen pixel size.
321"""
322(horizontal_pixel, vertical_pixel) = screen_size323self.coordinates_x_pixel = [324int(cord * horizontal_pixel / config.ANDROID_LOG_MAX_ABS_X)325for cord in self.coordinates_x326]327self.coordinates_y_pixel = [328int(cord * vertical_pixel / config.ANDROID_LOG_MAX_ABS_Y)329for cord in self.coordinates_y330]331
332
333# For Debug: Get distribution info for each cases
334word_num_distribution_dict = collections.defaultdict(int)335word_length_distribution_dict = collections.defaultdict(int)336
337
338def get_word_statistics(file_path):339"""Calculates maximum word number/length from ui objects in one xml/json file.340
341Args:
342file_path: The full path of a xml/json file.
343
344Returns:
345A tuple (max_word_num, max_word_length)
346ui_object_num: UI object num.
347max_word_num: The maximum number of words contained in all ui objects.
348max_word_length: The maximum length of words contained in all ui objects.
349"""
350max_word_num = 0351max_word_length = 0352
353leaf_nodes = get_view_hierarchy_list(file_path)354for view_hierarchy_object in leaf_nodes:355word_sequence = view_hierarchy_object.uiobject.word_sequence356max_word_num = max(max_word_num, len(word_sequence))357word_num_distribution_dict[len(word_sequence)] += 1358
359for word in word_sequence:360max_word_length = max(max_word_length, len(word))361word_length_distribution_dict[len(word)] += 1362return len(leaf_nodes), max_word_num, max_word_length363
364
365def get_ui_max_values(file_paths):366"""Calculates max values from ui objects in multi xml/json files.367
368Args:
369file_paths: The full paths of multi xml/json files.
370Returns:
371max_values: instrance of MaxValues.
372"""
373max_values = MaxValues()374for file_path in file_paths:375(ui_object_num,376max_ui_object_word_num,377max_ui_object_word_length) = get_word_statistics(file_path)378
379max_values.max_ui_object_num = max(380max_values.max_ui_object_num, ui_object_num)381max_values.max_ui_object_word_num = max(382max_values.max_ui_object_word_num, max_ui_object_word_num)383max_values.max_ui_object_word_length = max(384max_values.max_ui_object_word_length, max_ui_object_word_length)385return max_values386
387
388def get_ui_object_list(file_path):389"""Gets ui object list from view hierarchy leaf nodes.390
391Args:
392file_path: file path of xml or json
393Returns:
394A list of ui objects according to view hierarchy leaf nodes.
395"""
396
397vh = _get_view_hierachy(file_path)398return vh.get_ui_objects()399
400
401def get_view_hierarchy_list(file_path):402"""Gets view hierarchy leaf node list.403
404Args:
405file_path: file path of xml or json
406Returns:
407A list of view hierarchy leaf nodes.
408"""
409vh = _get_view_hierachy(file_path)410return vh.get_leaf_nodes()411
412
413def _get_view_hierachy(file_path):414"""Gets leaf nodes view hierarchy lists.415
416Args:
417file_path: The full path of an input xml/json file.
418Returns:
419A ViewHierarchy object.
420Raises:
421ValueError: unsupported file format.
422"""
423with gfile.GFile(file_path, 'r') as f:424data = f.read()425
426_, file_extension = os.path.splitext(file_path)427if file_extension == '.xml':428vh = view_hierarchy.ViewHierarchy(429screen_width=config.SCREEN_WIDTH, screen_height=config.SCREEN_HEIGHT)430vh.load_xml(data)431elif file_extension == '.json':432vh = view_hierarchy.ViewHierarchy(433screen_width=config.RICO_SCREEN_WIDTH,434screen_height=config.RICO_SCREEN_HEIGHT)435vh.load_json(data)436else:437raise ValueError('unsupported file format %s' % file_extension)438return vh439