onnxruntime

Форк
0
376 строк · 12.0 Кб
1
// Copyright (c) Microsoft Corporation. All rights reserved.
2
// Licensed under the MIT License.
3

4
import { AttributeWithCacheKey, createAttributeWithCacheKey } from '../../../attribute-with-cache-key';
5
import { Graph } from '../../../graph';
6
import { OperatorImplementation, OperatorInitialization } from '../../../operators';
7
import { Tensor } from '../../../tensor';
8
import { getGlsl } from '../glsl-source';
9
import { WebGLInferenceHandler } from '../inference-handler';
10
import { ProgramInfo, TextureType } from '../types';
11

12
export interface UpsampleAttributes extends AttributeWithCacheKey {
13
  readonly opset: number;
14
  readonly isResize: boolean;
15
  readonly mode: string;
16
  readonly scales: number[];
17
  readonly extrapolationValue: number;
18
  readonly coordinateTransformMode: string;
19
  readonly useExtrapolation: boolean;
20
  readonly needRoiInput: boolean;
21
  readonly nearestMode: string;
22
  readonly cubicCoefficientA: number;
23
  readonly excludeOutside: boolean;
24
  readonly useNearest2xOptimization: boolean;
25
  readonly roiInputIdx: number;
26
  readonly scalesInputIdx: number;
27
  readonly sizesInputIdx: number;
28
}
29

30
const upsampleProgramMetadata = {
31
  name: 'Upsample',
32
  inputNames: ['X'],
33
  inputTypes: [TextureType.unpacked],
34
};
35

36
export const upsample: OperatorImplementation<UpsampleAttributes> = (
37
  inferenceHandler: WebGLInferenceHandler,
38
  inputs: Tensor[],
39
  attributes: UpsampleAttributes,
40
): Tensor[] => {
41
  validateInputs(inputs, attributes);
42
  const output = inferenceHandler.run(
43
    {
44
      ...upsampleProgramMetadata,
45
      cacheHint: attributes.cacheKey,
46
      get: () => createUpsampleProgramInfo(inferenceHandler, inputs, attributes),
47
    },
48
    inputs,
49
  );
50
  return [output];
51
};
52

53
export const parseUpsampleAttributesV7: OperatorInitialization<UpsampleAttributes> = (
54
  node: Graph.Node,
55
): UpsampleAttributes => parseUpsampleAttributes(node, 7);
56

57
export const parseUpsampleAttributesV9: OperatorInitialization<UpsampleAttributes> = (
58
  node: Graph.Node,
59
): UpsampleAttributes => parseUpsampleAttributes(node, 9);
60

61
export const parseUpsampleAttributes = (node: Graph.Node, opset: number): UpsampleAttributes => {
62
  const isResize = opset >= 10;
63

64
  // processing node attributes
65
  const mode = node.attributes.getString('mode', 'nearest');
66
  if (mode !== 'nearest' && mode !== 'linear' && (opset < 11 || mode !== 'cubic')) {
67
    throw new Error(`unrecognized mode: ${mode}`);
68
  }
69

70
  let scales: number[] = [];
71
  if (opset < 9) {
72
    scales = node.attributes.getFloats('scales');
73
    scalesValidation(scales, mode, isResize);
74
  }
75

76
  const extrapolationValue = node.attributes.getFloat('extrapolation_value', 0.0);
77

78
  const coordinateTransformMode =
79
    opset > 10 ? node.attributes.getString('coordinate_transformation_mode', 'half_pixel') : 'asymmetric';
80
  if (
81
    [
82
      'asymmetric',
83
      'pytorch_half_pixel',
84
      'tf_half_pixel_for_nn',
85
      'align_corners',
86
      'tf_crop_and_resize',
87
      'half_pixel',
88
    ].indexOf(coordinateTransformMode) === -1
89
  ) {
90
    throw new Error(`coordinate_transform_mode '${coordinateTransformMode}' is not supported`);
91
  }
92
  const needRoiInput = coordinateTransformMode === 'tf_crop_and_resize';
93
  const useExtrapolation = needRoiInput;
94

95
  const nearestMode =
96
    mode === 'nearest' && opset >= 11 ? node.attributes.getString('nearest_mode', 'round_prefer_floor') : '';
97
  if (['round_prefer_floor', 'round_prefer_ceil', 'floor', 'ceil', ''].indexOf(nearestMode) === -1) {
98
    throw new Error(`nearest_mode '${nearestMode}' is not supported`);
99
  }
100

101
  const cubicCoefficientA = node.attributes.getFloat('cubic_coeff_a', -0.75);
102
  const excludeOutside = node.attributes.getInt('exclude_outside', 0) !== 0;
103
  if (excludeOutside && mode !== 'cubic') {
104
    throw new Error('exclude_outside can be set to 1 only when mode is CUBIC.');
105
  }
106

107
  const useNearest2xOptimization =
108
    opset < 11 ? true : mode === 'nearest' && coordinateTransformMode === 'asymmetric' && nearestMode === 'floor';
109

110
  let roiInputIdx = 0;
111
  let scalesInputIdx = 0;
112
  let sizesInputIdx = 0;
113

114
  if (opset > 10) {
115
    // handle when roiInput is not given
116
    if (node.inputs.length > 2) {
117
      roiInputIdx = 1;
118
      scalesInputIdx = 2;
119
      sizesInputIdx = 3;
120
    } else {
121
      scalesInputIdx = 1;
122
      sizesInputIdx = 2;
123
    }
124
  } else if (opset === 9) {
125
    scalesInputIdx = 1;
126
  }
127

128
  return createAttributeWithCacheKey({
129
    opset,
130
    isResize,
131
    mode,
132
    scales,
133
    extrapolationValue,
134
    coordinateTransformMode,
135
    useExtrapolation,
136
    needRoiInput,
137
    nearestMode,
138
    cubicCoefficientA,
139
    excludeOutside,
140
    useNearest2xOptimization,
141
    roiInputIdx,
142
    scalesInputIdx,
143
    sizesInputIdx,
144
  });
145
};
146

147
const createUpsampleProgramInfo = (
148
  inferenceHandler: WebGLInferenceHandler,
149
  inputs: Tensor[],
150
  attributes: UpsampleAttributes,
151
): ProgramInfo => {
152
  const glsl = getGlsl(inferenceHandler.session.backend.glContext.version);
153
  const [inputWidth, inputHeight] = inferenceHandler.calculateTextureWidthAndHeight(
154
    inputs[0].dims,
155
    TextureType.unpacked,
156
  );
157

158
  const outputShape = inputs[0].dims.map((dim, i) => Math.floor(dim * attributes.scales[i]));
159
  const [outputWidth, outputHeight] = inferenceHandler.calculateTextureWidthAndHeight(
160
    outputShape,
161
    TextureType.unpacked,
162
  );
163
  const dim = outputShape.length;
164

165
  const outputPitches = new Array<number>(dim);
166
  const inputPitches = new Array<number>(dim);
167
  let precalculatedPitches = `
168
      int output_pitches[${dim}];
169
      int input_pitches[${dim}];
170
      `;
171
  for (let d = dim - 1; d >= 0; d--) {
172
    outputPitches[d] = d === dim - 1 ? 1 : outputPitches[d + 1] * outputShape[d + 1];
173
    inputPitches[d] = d === dim - 1 ? 1 : inputPitches[d + 1] * inputs[0].dims[d + 1];
174

175
    precalculatedPitches += `
176
        output_pitches[${d}] = ${outputPitches[d]};
177
        input_pitches[${d}] = ${inputPitches[d]};
178
        `;
179
  }
180
  const getInputFloatFunction = `
181
      float getInputFloat(int index) {
182
        vec2 coords = offsetToCoords(index, ${inputWidth}, ${inputHeight});
183
        float value = getColorAsFloat(${glsl.texture2D}(X, coords));
184
        return value;
185
      }
186
      `;
187

188
  const shaderSource =
189
    attributes.mode === 'nearest'
190
      ? // nearest
191
        `
192
    ${getInputFloatFunction}
193
    float process(int indices[${dim}]) {
194
      int input_index = 0;
195
      int output_index = coordsToOffset(TexCoords, ${outputWidth}, ${outputHeight});
196

197
      ${precalculatedPitches}
198

199
      int d, m;
200
      for (int dim = 0; dim < ${dim}; ++dim) {
201
        d = output_index / output_pitches[dim];
202
        m = output_index - d * output_pitches[dim];
203
        output_index = m;
204

205
        if (scales[dim] != 1 && d > 0) {
206
          int d2 = d / scales[dim];
207
          m = d - d2 * scales[dim];
208
          d = d2;
209
        }
210
        input_index += input_pitches[dim] * d;
211
      }
212

213
      return getInputFloat(input_index);
214
    }`
215
      : dim === 4
216
        ? // bilinear 4D
217
          `
218
    ${getInputFloatFunction}
219
    float process(int indices[4]) {
220
      int input_index = 0;
221
      int output_index = coordsToOffset(TexCoords, ${outputWidth}, ${outputHeight});
222

223
      ${precalculatedPitches}
224

225
      int m;
226
      int index_of_dim0, index_of_dim1, index_of_dim2, index_of_dim3;
227
      index_of_dim0 = output_index / output_pitches[0];
228
      m = output_index - index_of_dim0 * output_pitches[0];
229
      index_of_dim1 = m / output_pitches[1];
230
      m = m - index_of_dim1 * output_pitches[1];
231
      index_of_dim2 = m / output_pitches[2];
232
      m = m - index_of_dim2 * output_pitches[2];
233
      index_of_dim3 = m;
234

235
      int index_of_input_dim2, index_of_input_dim3, x_offset, y_offset;
236
      index_of_input_dim2 = index_of_dim2 / scales[2];
237
      y_offset = index_of_dim2 - index_of_input_dim2 * scales[2];
238
      index_of_input_dim3 = index_of_dim3 / scales[3];
239
      x_offset = index_of_dim3 - index_of_input_dim3 * scales[3];
240

241
      input_index = index_of_dim0 * input_pitches[0] +
242
            index_of_dim1 * input_pitches[1] +
243
            index_of_input_dim2 * input_pitches[2] +
244
            index_of_input_dim3;
245

246
      float x00 = getInputFloat(input_index);
247
      float x10, x01, x11;
248

249
      bool end_of_dim2 = false;
250
      if (index_of_input_dim2 == (${inputs[0].dims[2]} - 1)) {
251
        // It's the end in dimension 2
252
        x01 = x00;
253
        end_of_dim2 = true;
254
      } else {
255
        x01 = getInputFloat(input_index + input_pitches[2]);
256
      }
257

258
      if (index_of_input_dim3 == (input_pitches[2] - 1)) {
259
        // It's the end in dimension 3
260
        x10 = x00;
261
        x11 = x01;
262
      }
263
      else {
264
        x10 = getInputFloat(input_index + 1);
265
        x11 = end_of_dim2 ? x10 : getInputFloat(input_index + input_pitches[2] + 1);
266
      }
267

268
      float y0 = x00 + float(y_offset) * (x01 - x00) / float(scales[2]);
269
      float y1 = x10 + float(y_offset) * (x11 - x10) / float(scales[2]);
270
      return y0 + float(x_offset) * (y1 - y0) / float(scales[3]);
271
    }`
272
        : // bilinear 2D
273
          `
274
    ${getInputFloatFunction}
275
    float process(int indices[2]) {
276
      int input_index = 0;
277
      int output_index = coordsToOffset(TexCoords, ${outputWidth}, ${outputHeight});
278

279
      ${precalculatedPitches}
280

281
      int m;
282
      int index_of_dim0, index_of_dim1;
283
      index_of_dim0 = output_index / output_pitches[0];
284
      m = output_index - index_of_dim0 * output_pitches[0];
285
      index_of_dim1 = m;
286

287
      int index_of_input_dim0, index_of_input_dim1, x_offset, y_offset;
288
      index_of_input_dim0 = index_of_dim0 / scales[0];
289
      y_offset = index_of_dim0 - index_of_input_dim0 * scales[0];
290
      index_of_input_dim1 = index_of_dim1 / scales[1];
291
      x_offset = index_of_dim1 - index_of_input_dim1 * scales[1];
292

293
      input_index = index_of_input_dim0 * input_pitches[0] + index_of_input_dim1;
294

295
      float x00 = getInputFloat(input_index);
296
      float x10, x01, x11;
297

298
      bool end_of_dim0 = false;
299
      if (index_of_input_dim0 == (${inputs[0].dims[0]} - 1)) {
300
        // It's the end in dimension 0
301
        x01 = x00;
302
        end_of_dim0 = true;
303
      } else {
304
        x01 = getInputFloat(input_index + input_pitches[0]);
305
      }
306

307
      if (index_of_input_dim1 == (input_pitches[0] - 1)) {
308
        // It's the end in dimension 1
309
        x10 = x00;
310
        x11 = x01;
311
      }
312
      else {
313
        x10 = getInputFloat(input_index + 1);
314
        x11 = end_of_dim0 ? x10 : getInputFloat(input_index + input_pitches[0] + 1);
315
      }
316

317
      float y0 = x00 + float(y_offset) * (x01 - x00) / float(scales[0]);
318
      float y1 = x10 + float(y_offset) * (x11 - x10) / float(scales[0]);
319
      return y0 + float(x_offset) * (y1 - y0) / float(scales[1]);
320
    }`;
321
  return {
322
    ...upsampleProgramMetadata,
323
    output: { dims: outputShape, type: inputs[0].type, textureType: TextureType.unpacked },
324
    shaderSource,
325
    variables: [
326
      {
327
        name: 'scales',
328
        type: 'int',
329
        arrayLength: attributes.scales.length,
330
        data: attributes.scales.map((x) => Math.ceil(x)),
331
      },
332
    ],
333
  };
334
};
335

336
export const validateInputs = (inputs: Tensor[], attribute: UpsampleAttributes): void => {
337
  if (
338
    !inputs ||
339
    (attribute.opset < 9 && inputs.length !== 1) ||
340
    (attribute.opset >= 9 && attribute.opset < 11 && inputs.length !== 2) ||
341
    (attribute.opset >= 11 && inputs.length < 2)
342
  ) {
343
    throw new Error('invalid inputs.');
344
  }
345

346
  if (attribute.scales.length > 0 && inputs[0].dims.length !== attribute.scales.length) {
347
    throw new Error('Invalid input shape.');
348
  }
349

350
  if (inputs[0].type === 'string') {
351
    throw new Error('Invalid input tensor types.');
352
  }
353
};
354

355
export const scalesValidation = (scales: number[], mode: string, isResize: boolean): void => {
356
  if (!isResize) {
357
    for (const scale of scales) {
358
      if (scale < 1) {
359
        throw new Error('Scale value should be greater than or equal to 1.');
360
      }
361
    }
362
  } else {
363
    for (const scale of scales) {
364
      if (scale <= 0) {
365
        throw new Error('Scale value should be greater than 0.');
366
      }
367
    }
368
  }
369
  if (mode === 'linear' || mode === 'cubic') {
370
    if (scales.length !== 2 && (scales.length !== 4 || scales[0] !== 1 || scales[1] !== 1)) {
371
      throw new Error(`'Linear' mode and 'Cubic' mode only support 2-D inputs ('Bilinear', 'Bicubic') \
372
        or 4-D inputs with the corresponding outermost 2 scale values being 1 \
373
        in the ${isResize ? 'Resize' : 'Upsample'} opeartor.`);
374
    }
375
  }
376
};
377

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.