paddlenlp

preprocess.py
404 строки · 13.1 Кб
Перенос по словам
1
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License");
4
# you may not use this file except in compliance with the License.
5
# You may obtain a copy of the License at
6
#     http://www.apache.org/licenses/LICENSE-2.0
7
#
8
# Unless required by applicable law or agreed to in writing, software
9
# distributed under the License is distributed on an "AS IS" BASIS,
10
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
# See the License for the specific language governing permissions and
12
# limitations under the License.
13

14
from __future__ import absolute_import, division, print_function, unicode_literals
15

16
import math
17
import random
18
from functools import partial
19

20
import cv2
21
import numpy as np
22
from paddle.vision.transforms import ColorJitter as PPColorJitter
23
from paddle.vision.transforms import functional as F
24
from PIL import Image, ImageFilter
25
from ppfleetx.utils.log import logger
26

27

28
class OperatorParamError(ValueError):
29
    """OperatorParamError"""
30

31
    pass
32

33

34
class DecodeImage(object):
35
    """decode image"""
36

37
    def __init__(self, to_rgb=True, channel_first=False):
38
        self.to_rgb = to_rgb
39
        self.channel_first = channel_first
40

41
    def __call__(self, img):
42
        assert type(img) is bytes and len(img) > 0, "invalid input 'img' in DecodeImage"
43
        data = np.frombuffer(img, dtype="uint8")
44
        img = cv2.imdecode(data, 1)
45
        if self.to_rgb:
46
            assert img.shape[2] == 3, "invalid shape of image[%s]" % (img.shape)
47
            img = img[:, :, ::-1]
48

49
        if self.channel_first:
50
            img = img.transpose((2, 0, 1))
51

52
        return img
53

54

55
class UnifiedResize(object):
56
    def __init__(self, interpolation=None, backend="cv2"):
57
        _cv2_interp_from_str = {
58
            "nearest": cv2.INTER_NEAREST,
59
            "bilinear": cv2.INTER_LINEAR,
60
            "area": cv2.INTER_AREA,
61
            "bicubic": cv2.INTER_CUBIC,
62
            "lanczos": cv2.INTER_LANCZOS4,
63
        }
64
        _pil_interp_from_str = {
65
            "nearest": Image.NEAREST,
66
            "bilinear": Image.BILINEAR,
67
            "bicubic": Image.BICUBIC,
68
            "box": Image.BOX,
69
            "lanczos": Image.LANCZOS,
70
            "hamming": Image.HAMMING,
71
        }
72

73
        def _pil_resize(src, size, resample):
74
            pil_img = Image.fromarray(src)
75
            pil_img = pil_img.resize(size, resample)
76
            return np.asarray(pil_img)
77

78
        if backend.lower() == "cv2":
79
            if isinstance(interpolation, str):
80
                interpolation = _cv2_interp_from_str[interpolation.lower()]
81
            # compatible with opencv < version 4.4.0
82
            elif interpolation is None:
83
                interpolation = cv2.INTER_LINEAR
84
            self.resize_func = partial(cv2.resize, interpolation=interpolation)
85
        elif backend.lower() == "pil":
86
            if isinstance(interpolation, str):
87
                interpolation = _pil_interp_from_str[interpolation.lower()]
88
            self.resize_func = partial(_pil_resize, resample=interpolation)
89
        else:
90
            logger.warning(
91
                f'The backend of Resize only support "cv2" or "PIL". "f{backend}" is unavailable. Use "cv2" instead.'
92
            )
93
            self.resize_func = cv2.resize
94

95
    def __call__(self, src, size):
96
        return self.resize_func(src, size)
97

98

99
class ResizeImage(object):
100
    """resize image"""
101

102
    def __init__(self, size=None, resize_short=None, interpolation=None, backend="cv2"):
103
        if resize_short is not None and resize_short > 0:
104
            self.resize_short = resize_short
105
            self.w = None
106
            self.h = None
107
        elif size is not None:
108
            self.resize_short = None
109
            self.w = size if type(size) is int else size[0]
110
            self.h = size if type(size) is int else size[1]
111
        else:
112
            raise OperatorParamError(
113
                "invalid params for ReisizeImage for '\
114
                'both 'size' and 'resize_short' are None"
115
            )
116

117
        self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend)
118

119
    def __call__(self, img):
120
        img_h, img_w = img.shape[:2]
121
        if self.resize_short is not None:
122
            percent = float(self.resize_short) / min(img_w, img_h)
123
            w = int(round(img_w * percent))
124
            h = int(round(img_h * percent))
125
        else:
126
            w = self.w
127
            h = self.h
128
        return self._resize_func(img, (w, h))
129

130

131
class CenterCropImage(object):
132
    """crop image"""
133

134
    def __init__(self, size):
135
        if type(size) is int:
136
            self.size = (size, size)
137
        else:
138
            self.size = size  # (h, w)
139

140
    def __call__(self, img):
141
        w, h = self.size
142
        img_h, img_w = img.shape[:2]
143
        w_start = (img_w - w) // 2
144
        h_start = (img_h - h) // 2
145

146
        w_end = w_start + w
147
        h_end = h_start + h
148
        return img[h_start:h_end, w_start:w_end, :]
149

150

151
class RandCropImage(object):
152
    """random crop image"""
153

154
    def __init__(self, size, scale=None, ratio=None, interpolation=None, backend="cv2"):
155
        if type(size) is int:
156
            self.size = (size, size)  # (h, w)
157
        else:
158
            self.size = size
159

160
        self.scale = [0.08, 1.0] if scale is None else scale
161
        self.ratio = [3.0 / 4.0, 4.0 / 3.0] if ratio is None else ratio
162

163
        self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend)
164

165
    def __call__(self, img):
166
        size = self.size
167
        scale = self.scale
168
        ratio = self.ratio
169

170
        aspect_ratio = math.sqrt(random.uniform(*ratio))
171
        w = 1.0 * aspect_ratio
172
        h = 1.0 / aspect_ratio
173

174
        img_h, img_w = img.shape[:2]
175

176
        bound = min((float(img_w) / img_h) / (w**2), (float(img_h) / img_w) / (h**2))
177
        scale_max = min(scale[1], bound)
178
        scale_min = min(scale[0], bound)
179

180
        target_area = img_w * img_h * random.uniform(scale_min, scale_max)
181
        target_size = math.sqrt(target_area)
182
        w = int(target_size * w)
183
        h = int(target_size * h)
184

185
        i = random.randint(0, img_w - w)
186
        j = random.randint(0, img_h - h)
187

188
        img = img[j : j + h, i : i + w, :]
189

190
        return self._resize_func(img, size)
191

192

193
class RandFlipImage(object):
194
    """random flip image
195
    flip_code:
196
        1: Flipped Horizontally
197
        0: Flipped Vertically
198
        -1: Flipped Horizontally & Vertically
199
    """
200

201
    def __init__(self, flip_code=1):
202
        assert flip_code in [-1, 0, 1], "flip_code should be a value in [-1, 0, 1]"
203
        self.flip_code = flip_code
204

205
    def __call__(self, img):
206
        if random.randint(0, 1) == 1:
207
            return cv2.flip(img, self.flip_code)
208
        else:
209
            return img
210

211

212
class NormalizeImage(object):
213
    """normalize image such as substract mean, divide std"""
214

215
    def __init__(self, scale=None, mean=None, std=None, order="chw", output_fp16=False, channel_num=3):
216
        if isinstance(scale, str):
217
            scale = eval(scale)
218
        assert channel_num in [3, 4], "channel number of input image should be set to 3 or 4."
219
        self.channel_num = channel_num
220
        self.output_dtype = "float16" if output_fp16 else "float32"
221
        self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
222
        self.order = order
223
        mean = mean if mean is not None else [0.485, 0.456, 0.406]
224
        std = std if std is not None else [0.229, 0.224, 0.225]
225

226
        shape = (3, 1, 1) if self.order == "chw" else (1, 1, 3)
227
        self.mean = np.array(mean).reshape(shape).astype("float32")
228
        self.std = np.array(std).reshape(shape).astype("float32")
229

230
    def __call__(self, img):
231
        if isinstance(img, Image.Image):
232
            img = np.array(img)
233

234
        assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage"
235

236
        img = (img.astype("float32") * self.scale - self.mean) / self.std
237

238
        if self.channel_num == 4:
239
            img_h = img.shape[1] if self.order == "chw" else img.shape[0]
240
            img_w = img.shape[2] if self.order == "chw" else img.shape[1]
241
            pad_zeros = np.zeros((1, img_h, img_w)) if self.order == "chw" else np.zeros((img_h, img_w, 1))
242
            img = (
243
                np.concatenate((img, pad_zeros), axis=0)
244
                if self.order == "chw"
245
                else np.concatenate((img, pad_zeros), axis=2)
246
            )
247
        return img.astype(self.output_dtype)
248

249

250
class ToCHWImage(object):
251
    """convert hwc image to chw image"""
252

253
    def __init__(self):
254
        pass
255

256
    def __call__(self, img):
257
        if isinstance(img, Image.Image):
258
            img = np.array(img)
259

260
        return img.transpose((2, 0, 1))
261

262

263
class ColorJitter(PPColorJitter):
264
    """ColorJitter."""
265

266
    def __init__(self, *args, **kwargs):
267
        self.p = kwargs.pop("p", 1.0)
268
        super().__init__(*args, **kwargs)
269

270
    def __call__(self, img):
271
        if random.random() < self.p:
272
            if not isinstance(img, Image.Image):
273
                img = np.ascontiguousarray(img)
274
                img = Image.fromarray(img)
275
            img = super()._apply_image(img)
276
            if isinstance(img, Image.Image):
277
                img = np.asarray(img)
278
        return img
279

280

281
class GaussianBlur(object):
282
    """Gaussian blur augmentation in SimCLR https://arxiv.org/abs/2002.05709"""
283

284
    def __init__(self, sigma=[0.1, 2.0], p=1.0):
285
        self.p = p
286
        self.sigma = sigma
287

288
    def __call__(self, img):
289
        if random.random() < self.p:
290
            if not isinstance(img, Image.Image):
291
                img = np.ascontiguousarray(img)
292
                img = Image.fromarray(img)
293
            sigma = random.uniform(self.sigma[0], self.sigma[1])
294
            img = img.filter(ImageFilter.GaussianBlur(radius=sigma))
295
            if isinstance(img, Image.Image):
296
                img = np.asarray(img)
297
        return img
298

299

300
class Pixels(object):
301
    def __init__(self, mode="const", mean=[0.0, 0.0, 0.0]):
302
        self._mode = mode
303
        self._mean = mean
304

305
    def __call__(self, h=224, w=224, c=3):
306
        if self._mode == "rand":
307
            return np.random.normal(size=(1, 1, 3))
308
        elif self._mode == "pixel":
309
            return np.random.normal(size=(h, w, c))
310
        elif self._mode == "const":
311
            return self._mean
312
        else:
313
            raise Exception('Invalid mode in RandomErasing, only support "const", "rand", "pixel"')
314

315

316
class RandomErasing(object):
317
    """RandomErasing.
318
    This code is adapted from https://github.com/zhunzhong07/Random-Erasing, and refer to Timm.
319
    """
320

321
    def __init__(
322
        self,
323
        EPSILON=0.5,
324
        sl=0.02,
325
        sh=0.4,
326
        r1=0.3,
327
        mean=[0.0, 0.0, 0.0],
328
        attempt=100,
329
        use_log_aspect=False,
330
        mode="const",
331
    ):
332
        self.EPSILON = eval(EPSILON) if isinstance(EPSILON, str) else EPSILON
333
        self.sl = eval(sl) if isinstance(sl, str) else sl
334
        self.sh = eval(sh) if isinstance(sh, str) else sh
335
        r1 = eval(r1) if isinstance(r1, str) else r1
336
        self.r1 = (math.log(r1), math.log(1 / r1)) if use_log_aspect else (r1, 1 / r1)
337
        self.use_log_aspect = use_log_aspect
338
        self.attempt = attempt
339
        self.get_pixels = Pixels(mode, mean)
340

341
    def __call__(self, img):
342
        if random.random() > self.EPSILON:
343
            return img
344

345
        for _ in range(self.attempt):
346
            area = img.shape[0] * img.shape[1]
347

348
            target_area = random.uniform(self.sl, self.sh) * area
349
            aspect_ratio = random.uniform(*self.r1)
350
            if self.use_log_aspect:
351
                aspect_ratio = math.exp(aspect_ratio)
352

353
            h = int(round(math.sqrt(target_area * aspect_ratio)))
354
            w = int(round(math.sqrt(target_area / aspect_ratio)))
355

356
            if w < img.shape[1] and h < img.shape[0]:
357
                pixels = self.get_pixels(h, w, img.shape[2])
358
                x1 = random.randint(0, img.shape[0] - h)
359
                y1 = random.randint(0, img.shape[1] - w)
360
                if img.shape[2] == 3:
361
                    img[x1 : x1 + h, y1 : y1 + w, :] = pixels
362
                else:
363
                    img[x1 : x1 + h, y1 : y1 + w, 0] = pixels[0]
364
                return img
365
        return img
366

367

368
class RandomGrayscale(object):
369
    """Randomly convert image to grayscale with a probability of p (default 0.1).
370
    Args:
371
        p (float): probability that image should be converted to grayscale.
372
    Returns:
373
        PIL Image: Grayscale version of the input image with probability p and unchanged
374
        with probability (1-p).
375
        - If input image is 1 channel: grayscale version is 1 channel
376
        - If input image is 3 channel: grayscale version is 3 channel with r == g == b
377
    """
378

379
    def __init__(self, p=0.1):
380
        self.p = p
381

382
    def __call__(self, img):
383
        """
384
        Args:
385
            img (PIL Image): Image to be converted to grayscale.
386
        Returns:
387
            PIL Image: Randomly grayscaled image.
388
        """
389

390
        flag = False
391
        if not isinstance(img, Image.Image):
392
            img = np.ascontiguousarray(img)
393
            img = Image.fromarray(img)
394
            flag = True
395

396
        num_output_channels = 1 if img.mode == "L" else 3
397

398
        if random.random() < self.p:
399
            img = F.to_grayscale(img, num_output_channels=num_output_channels)
400

401
        if flag:
402
            img = np.asarray(img)
403

404
        return img
405
paddlenlp

Использование cookies