opencv

Форк
0
/
palette_neon_intrinsics.c 
150 строк · 4.6 Кб
1

2
/* palette_neon_intrinsics.c - NEON optimised palette expansion functions
3
 *
4
 * Copyright (c) 2018-2019 Cosmin Truta
5
 * Copyright (c) 2017-2018 Arm Holdings. All rights reserved.
6
 * Written by Richard Townsend <Richard.Townsend@arm.com>, February 2017.
7
 *
8
 * This code is released under the libpng license.
9
 * For conditions of distribution and use, see the disclaimer
10
 * and license in png.h
11
 */
12

13
#include "../pngpriv.h"
14

15
#if PNG_ARM_NEON_IMPLEMENTATION == 1
16

17
#if defined(_MSC_VER) && !defined(__clang__) && defined(_M_ARM64)
18
#  include <arm64_neon.h>
19
#else
20
#  include <arm_neon.h>
21
#endif
22

23
/* Build an RGBA8 palette from the separate RGB and alpha palettes. */
24
void
25
png_riffle_palette_neon(png_structrp png_ptr)
26
{
27
   png_const_colorp palette = png_ptr->palette;
28
   png_bytep riffled_palette = png_ptr->riffled_palette;
29
   png_const_bytep trans_alpha = png_ptr->trans_alpha;
30
   int num_trans = png_ptr->num_trans;
31
   int i;
32

33
   /* Initially black, opaque. */
34
   uint8x16x4_t w = {{
35
      vdupq_n_u8(0x00),
36
      vdupq_n_u8(0x00),
37
      vdupq_n_u8(0x00),
38
      vdupq_n_u8(0xff),
39
   }};
40

41
   png_debug(1, "in png_riffle_palette_neon");
42

43
   /* First, riffle the RGB colours into an RGBA8 palette.
44
    * The alpha component is set to opaque for now.
45
    */
46
   for (i = 0; i < 256; i += 16)
47
   {
48
      uint8x16x3_t v = vld3q_u8((png_const_bytep)(palette + i));
49
      w.val[0] = v.val[0];
50
      w.val[1] = v.val[1];
51
      w.val[2] = v.val[2];
52
      vst4q_u8(riffled_palette + (i << 2), w);
53
   }
54

55
   /* Fix up the missing transparency values. */
56
   for (i = 0; i < num_trans; i++)
57
      riffled_palette[(i << 2) + 3] = trans_alpha[i];
58
}
59

60
/* Expands a palettized row into RGBA8. */
61
int
62
png_do_expand_palette_rgba8_neon(png_structrp png_ptr, png_row_infop row_info,
63
    png_const_bytep row, png_bytepp ssp, png_bytepp ddp)
64
{
65
   png_uint_32 row_width = row_info->width;
66
   const png_uint_32 *riffled_palette =
67
      (const png_uint_32 *)png_ptr->riffled_palette;
68
   const png_uint_32 pixels_per_chunk = 4;
69
   png_uint_32 i;
70

71
   png_debug(1, "in png_do_expand_palette_rgba8_neon");
72

73
   PNG_UNUSED(row)
74
   if (row_width < pixels_per_chunk)
75
      return 0;
76

77
   /* This function originally gets the last byte of the output row.
78
    * The NEON part writes forward from a given position, so we have
79
    * to seek this back by 4 pixels x 4 bytes.
80
    */
81
   *ddp = *ddp - ((pixels_per_chunk * sizeof(png_uint_32)) - 1);
82

83
   for (i = 0; i < row_width; i += pixels_per_chunk)
84
   {
85
      uint32x4_t cur;
86
      png_bytep sp = *ssp - i, dp = *ddp - (i << 2);
87
      cur = vld1q_dup_u32 (riffled_palette + *(sp - 3));
88
      cur = vld1q_lane_u32(riffled_palette + *(sp - 2), cur, 1);
89
      cur = vld1q_lane_u32(riffled_palette + *(sp - 1), cur, 2);
90
      cur = vld1q_lane_u32(riffled_palette + *(sp - 0), cur, 3);
91
      vst1q_u32((void *)dp, cur);
92
   }
93
   if (i != row_width)
94
   {
95
      /* Remove the amount that wasn't processed. */
96
      i -= pixels_per_chunk;
97
   }
98

99
   /* Decrement output pointers. */
100
   *ssp = *ssp - i;
101
   *ddp = *ddp - (i << 2);
102
   return i;
103
}
104

105
/* Expands a palettized row into RGB8. */
106
int
107
png_do_expand_palette_rgb8_neon(png_structrp png_ptr, png_row_infop row_info,
108
    png_const_bytep row, png_bytepp ssp, png_bytepp ddp)
109
{
110
   png_uint_32 row_width = row_info->width;
111
   png_const_bytep palette = (png_const_bytep)png_ptr->palette;
112
   const png_uint_32 pixels_per_chunk = 8;
113
   png_uint_32 i;
114

115
   png_debug(1, "in png_do_expand_palette_rgb8_neon");
116

117
   PNG_UNUSED(row)
118
   if (row_width <= pixels_per_chunk)
119
      return 0;
120

121
   /* Seeking this back by 8 pixels x 3 bytes. */
122
   *ddp = *ddp - ((pixels_per_chunk * sizeof(png_color)) - 1);
123

124
   for (i = 0; i < row_width; i += pixels_per_chunk)
125
   {
126
      uint8x8x3_t cur;
127
      png_bytep sp = *ssp - i, dp = *ddp - ((i << 1) + i);
128
      cur = vld3_dup_u8(palette + sizeof(png_color) * (*(sp - 7)));
129
      cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 6)), cur, 1);
130
      cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 5)), cur, 2);
131
      cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 4)), cur, 3);
132
      cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 3)), cur, 4);
133
      cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 2)), cur, 5);
134
      cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 1)), cur, 6);
135
      cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 0)), cur, 7);
136
      vst3_u8((void *)dp, cur);
137
   }
138

139
   if (i != row_width)
140
   {
141
      /* Remove the amount that wasn't processed. */
142
      i -= pixels_per_chunk;
143
   }
144

145
   /* Decrement output pointers. */
146
   *ssp = *ssp - i;
147
   *ddp = *ddp - ((i << 1) + i);
148
   return i;
149
}
150

151
#endif /* PNG_ARM_NEON_IMPLEMENTATION */
152

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.