podman

Форк
0
278 строк · 8.4 Кб
1
// Copyright 2016 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4

5
//go:build amd64 && gc && !purego
6

7
#include "textflag.h"
8

9
DATA ·iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
10
DATA ·iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
11
GLOBL ·iv0<>(SB), (NOPTR+RODATA), $16
12

13
DATA ·iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b
14
DATA ·iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1
15
GLOBL ·iv1<>(SB), (NOPTR+RODATA), $16
16

17
DATA ·iv2<>+0x00(SB)/8, $0x510e527fade682d1
18
DATA ·iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
19
GLOBL ·iv2<>(SB), (NOPTR+RODATA), $16
20

21
DATA ·iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b
22
DATA ·iv3<>+0x08(SB)/8, $0x5be0cd19137e2179
23
GLOBL ·iv3<>(SB), (NOPTR+RODATA), $16
24

25
DATA ·c40<>+0x00(SB)/8, $0x0201000706050403
26
DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
27
GLOBL ·c40<>(SB), (NOPTR+RODATA), $16
28

29
DATA ·c48<>+0x00(SB)/8, $0x0100070605040302
30
DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
31
GLOBL ·c48<>(SB), (NOPTR+RODATA), $16
32

33
#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \
34
	MOVO       v4, t1; \
35
	MOVO       v5, v4; \
36
	MOVO       t1, v5; \
37
	MOVO       v6, t1; \
38
	PUNPCKLQDQ v6, t2; \
39
	PUNPCKHQDQ v7, v6; \
40
	PUNPCKHQDQ t2, v6; \
41
	PUNPCKLQDQ v7, t2; \
42
	MOVO       t1, v7; \
43
	MOVO       v2, t1; \
44
	PUNPCKHQDQ t2, v7; \
45
	PUNPCKLQDQ v3, t2; \
46
	PUNPCKHQDQ t2, v2; \
47
	PUNPCKLQDQ t1, t2; \
48
	PUNPCKHQDQ t2, v3
49

50
#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \
51
	MOVO       v4, t1; \
52
	MOVO       v5, v4; \
53
	MOVO       t1, v5; \
54
	MOVO       v2, t1; \
55
	PUNPCKLQDQ v2, t2; \
56
	PUNPCKHQDQ v3, v2; \
57
	PUNPCKHQDQ t2, v2; \
58
	PUNPCKLQDQ v3, t2; \
59
	MOVO       t1, v3; \
60
	MOVO       v6, t1; \
61
	PUNPCKHQDQ t2, v3; \
62
	PUNPCKLQDQ v7, t2; \
63
	PUNPCKHQDQ t2, v6; \
64
	PUNPCKLQDQ t1, t2; \
65
	PUNPCKHQDQ t2, v7
66

67
#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \
68
	PADDQ  m0, v0;        \
69
	PADDQ  m1, v1;        \
70
	PADDQ  v2, v0;        \
71
	PADDQ  v3, v1;        \
72
	PXOR   v0, v6;        \
73
	PXOR   v1, v7;        \
74
	PSHUFD $0xB1, v6, v6; \
75
	PSHUFD $0xB1, v7, v7; \
76
	PADDQ  v6, v4;        \
77
	PADDQ  v7, v5;        \
78
	PXOR   v4, v2;        \
79
	PXOR   v5, v3;        \
80
	PSHUFB c40, v2;       \
81
	PSHUFB c40, v3;       \
82
	PADDQ  m2, v0;        \
83
	PADDQ  m3, v1;        \
84
	PADDQ  v2, v0;        \
85
	PADDQ  v3, v1;        \
86
	PXOR   v0, v6;        \
87
	PXOR   v1, v7;        \
88
	PSHUFB c48, v6;       \
89
	PSHUFB c48, v7;       \
90
	PADDQ  v6, v4;        \
91
	PADDQ  v7, v5;        \
92
	PXOR   v4, v2;        \
93
	PXOR   v5, v3;        \
94
	MOVOU  v2, t0;        \
95
	PADDQ  v2, t0;        \
96
	PSRLQ  $63, v2;       \
97
	PXOR   t0, v2;        \
98
	MOVOU  v3, t0;        \
99
	PADDQ  v3, t0;        \
100
	PSRLQ  $63, v3;       \
101
	PXOR   t0, v3
102

103
#define LOAD_MSG(m0, m1, m2, m3, src, i0, i1, i2, i3, i4, i5, i6, i7) \
104
	MOVQ   i0*8(src), m0;     \
105
	PINSRQ $1, i1*8(src), m0; \
106
	MOVQ   i2*8(src), m1;     \
107
	PINSRQ $1, i3*8(src), m1; \
108
	MOVQ   i4*8(src), m2;     \
109
	PINSRQ $1, i5*8(src), m2; \
110
	MOVQ   i6*8(src), m3;     \
111
	PINSRQ $1, i7*8(src), m3
112

113
// func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
114
TEXT ·hashBlocksSSE4(SB), 4, $288-48 // frame size = 272 + 16 byte alignment
115
	MOVQ h+0(FP), AX
116
	MOVQ c+8(FP), BX
117
	MOVQ flag+16(FP), CX
118
	MOVQ blocks_base+24(FP), SI
119
	MOVQ blocks_len+32(FP), DI
120

121
	MOVQ SP, R10
122
	ADDQ $15, R10
123
	ANDQ $~15, R10
124

125
	MOVOU ·iv3<>(SB), X0
126
	MOVO  X0, 0(R10)
127
	XORQ  CX, 0(R10)     // 0(R10) = ·iv3 ^ (CX || 0)
128

129
	MOVOU ·c40<>(SB), X13
130
	MOVOU ·c48<>(SB), X14
131

132
	MOVOU 0(AX), X12
133
	MOVOU 16(AX), X15
134

135
	MOVQ 0(BX), R8
136
	MOVQ 8(BX), R9
137

138
loop:
139
	ADDQ $128, R8
140
	CMPQ R8, $128
141
	JGE  noinc
142
	INCQ R9
143

144
noinc:
145
	MOVQ R8, X8
146
	PINSRQ $1, R9, X8
147

148
	MOVO X12, X0
149
	MOVO X15, X1
150
	MOVOU 32(AX), X2
151
	MOVOU 48(AX), X3
152
	MOVOU ·iv0<>(SB), X4
153
	MOVOU ·iv1<>(SB), X5
154
	MOVOU ·iv2<>(SB), X6
155

156
	PXOR X8, X6
157
	MOVO 0(R10), X7
158

159
	LOAD_MSG(X8, X9, X10, X11, SI, 0, 2, 4, 6, 1, 3, 5, 7)
160
	MOVO X8, 16(R10)
161
	MOVO X9, 32(R10)
162
	MOVO X10, 48(R10)
163
	MOVO X11, 64(R10)
164
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
165
	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
166
	LOAD_MSG(X8, X9, X10, X11, SI, 8, 10, 12, 14, 9, 11, 13, 15)
167
	MOVO X8, 80(R10)
168
	MOVO X9, 96(R10)
169
	MOVO X10, 112(R10)
170
	MOVO X11, 128(R10)
171
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
172
	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
173

174
	LOAD_MSG(X8, X9, X10, X11, SI, 14, 4, 9, 13, 10, 8, 15, 6)
175
	MOVO X8, 144(R10)
176
	MOVO X9, 160(R10)
177
	MOVO X10, 176(R10)
178
	MOVO X11, 192(R10)
179
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
180
	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
181
	LOAD_MSG(X8, X9, X10, X11, SI, 1, 0, 11, 5, 12, 2, 7, 3)
182
	MOVO X8, 208(R10)
183
	MOVO X9, 224(R10)
184
	MOVO X10, 240(R10)
185
	MOVO X11, 256(R10)
186
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
187
	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
188

189
	LOAD_MSG(X8, X9, X10, X11, SI, 11, 12, 5, 15, 8, 0, 2, 13)
190
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
191
	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
192
	LOAD_MSG(X8, X9, X10, X11, SI, 10, 3, 7, 9, 14, 6, 1, 4)
193
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
194
	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
195

196
	LOAD_MSG(X8, X9, X10, X11, SI, 7, 3, 13, 11, 9, 1, 12, 14)
197
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
198
	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
199
	LOAD_MSG(X8, X9, X10, X11, SI, 2, 5, 4, 15, 6, 10, 0, 8)
200
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
201
	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
202

203
	LOAD_MSG(X8, X9, X10, X11, SI, 9, 5, 2, 10, 0, 7, 4, 15)
204
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
205
	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
206
	LOAD_MSG(X8, X9, X10, X11, SI, 14, 11, 6, 3, 1, 12, 8, 13)
207
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
208
	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
209

210
	LOAD_MSG(X8, X9, X10, X11, SI, 2, 6, 0, 8, 12, 10, 11, 3)
211
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
212
	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
213
	LOAD_MSG(X8, X9, X10, X11, SI, 4, 7, 15, 1, 13, 5, 14, 9)
214
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
215
	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
216

217
	LOAD_MSG(X8, X9, X10, X11, SI, 12, 1, 14, 4, 5, 15, 13, 10)
218
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
219
	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
220
	LOAD_MSG(X8, X9, X10, X11, SI, 0, 6, 9, 8, 7, 3, 2, 11)
221
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
222
	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
223

224
	LOAD_MSG(X8, X9, X10, X11, SI, 13, 7, 12, 3, 11, 14, 1, 9)
225
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
226
	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
227
	LOAD_MSG(X8, X9, X10, X11, SI, 5, 15, 8, 2, 0, 4, 6, 10)
228
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
229
	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
230

231
	LOAD_MSG(X8, X9, X10, X11, SI, 6, 14, 11, 0, 15, 9, 3, 8)
232
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
233
	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
234
	LOAD_MSG(X8, X9, X10, X11, SI, 12, 13, 1, 10, 2, 7, 4, 5)
235
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
236
	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
237

238
	LOAD_MSG(X8, X9, X10, X11, SI, 10, 8, 7, 1, 2, 4, 6, 5)
239
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
240
	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
241
	LOAD_MSG(X8, X9, X10, X11, SI, 15, 9, 3, 13, 11, 14, 12, 0)
242
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
243
	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
244

245
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X11, X13, X14)
246
	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
247
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X11, X13, X14)
248
	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
249

250
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X11, X13, X14)
251
	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
252
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X11, X13, X14)
253
	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
254

255
	MOVOU 32(AX), X10
256
	MOVOU 48(AX), X11
257
	PXOR  X0, X12
258
	PXOR  X1, X15
259
	PXOR  X2, X10
260
	PXOR  X3, X11
261
	PXOR  X4, X12
262
	PXOR  X5, X15
263
	PXOR  X6, X10
264
	PXOR  X7, X11
265
	MOVOU X10, 32(AX)
266
	MOVOU X11, 48(AX)
267

268
	LEAQ 128(SI), SI
269
	SUBQ $128, DI
270
	JNE  loop
271

272
	MOVOU X12, 0(AX)
273
	MOVOU X15, 16(AX)
274

275
	MOVQ R8, 0(BX)
276
	MOVQ R9, 8(BX)
277

278
	RET
279

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.