podman

Форк
0
243 строки · 6.2 Кб
1
// Copyright 2017 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4

5
//go:build amd64 && gc && !purego
6

7
#include "textflag.h"
8

9
DATA ·c40<>+0x00(SB)/8, $0x0201000706050403
10
DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
11
GLOBL ·c40<>(SB), (NOPTR+RODATA), $16
12

13
DATA ·c48<>+0x00(SB)/8, $0x0100070605040302
14
DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
15
GLOBL ·c48<>(SB), (NOPTR+RODATA), $16
16

17
#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \
18
	MOVO       v4, t1; \
19
	MOVO       v5, v4; \
20
	MOVO       t1, v5; \
21
	MOVO       v6, t1; \
22
	PUNPCKLQDQ v6, t2; \
23
	PUNPCKHQDQ v7, v6; \
24
	PUNPCKHQDQ t2, v6; \
25
	PUNPCKLQDQ v7, t2; \
26
	MOVO       t1, v7; \
27
	MOVO       v2, t1; \
28
	PUNPCKHQDQ t2, v7; \
29
	PUNPCKLQDQ v3, t2; \
30
	PUNPCKHQDQ t2, v2; \
31
	PUNPCKLQDQ t1, t2; \
32
	PUNPCKHQDQ t2, v3
33

34
#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \
35
	MOVO       v4, t1; \
36
	MOVO       v5, v4; \
37
	MOVO       t1, v5; \
38
	MOVO       v2, t1; \
39
	PUNPCKLQDQ v2, t2; \
40
	PUNPCKHQDQ v3, v2; \
41
	PUNPCKHQDQ t2, v2; \
42
	PUNPCKLQDQ v3, t2; \
43
	MOVO       t1, v3; \
44
	MOVO       v6, t1; \
45
	PUNPCKHQDQ t2, v3; \
46
	PUNPCKLQDQ v7, t2; \
47
	PUNPCKHQDQ t2, v6; \
48
	PUNPCKLQDQ t1, t2; \
49
	PUNPCKHQDQ t2, v7
50

51
#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, t0, c40, c48) \
52
	MOVO    v0, t0;        \
53
	PMULULQ v2, t0;        \
54
	PADDQ   v2, v0;        \
55
	PADDQ   t0, v0;        \
56
	PADDQ   t0, v0;        \
57
	PXOR    v0, v6;        \
58
	PSHUFD  $0xB1, v6, v6; \
59
	MOVO    v4, t0;        \
60
	PMULULQ v6, t0;        \
61
	PADDQ   v6, v4;        \
62
	PADDQ   t0, v4;        \
63
	PADDQ   t0, v4;        \
64
	PXOR    v4, v2;        \
65
	PSHUFB  c40, v2;       \
66
	MOVO    v0, t0;        \
67
	PMULULQ v2, t0;        \
68
	PADDQ   v2, v0;        \
69
	PADDQ   t0, v0;        \
70
	PADDQ   t0, v0;        \
71
	PXOR    v0, v6;        \
72
	PSHUFB  c48, v6;       \
73
	MOVO    v4, t0;        \
74
	PMULULQ v6, t0;        \
75
	PADDQ   v6, v4;        \
76
	PADDQ   t0, v4;        \
77
	PADDQ   t0, v4;        \
78
	PXOR    v4, v2;        \
79
	MOVO    v2, t0;        \
80
	PADDQ   v2, t0;        \
81
	PSRLQ   $63, v2;       \
82
	PXOR    t0, v2;        \
83
	MOVO    v1, t0;        \
84
	PMULULQ v3, t0;        \
85
	PADDQ   v3, v1;        \
86
	PADDQ   t0, v1;        \
87
	PADDQ   t0, v1;        \
88
	PXOR    v1, v7;        \
89
	PSHUFD  $0xB1, v7, v7; \
90
	MOVO    v5, t0;        \
91
	PMULULQ v7, t0;        \
92
	PADDQ   v7, v5;        \
93
	PADDQ   t0, v5;        \
94
	PADDQ   t0, v5;        \
95
	PXOR    v5, v3;        \
96
	PSHUFB  c40, v3;       \
97
	MOVO    v1, t0;        \
98
	PMULULQ v3, t0;        \
99
	PADDQ   v3, v1;        \
100
	PADDQ   t0, v1;        \
101
	PADDQ   t0, v1;        \
102
	PXOR    v1, v7;        \
103
	PSHUFB  c48, v7;       \
104
	MOVO    v5, t0;        \
105
	PMULULQ v7, t0;        \
106
	PADDQ   v7, v5;        \
107
	PADDQ   t0, v5;        \
108
	PADDQ   t0, v5;        \
109
	PXOR    v5, v3;        \
110
	MOVO    v3, t0;        \
111
	PADDQ   v3, t0;        \
112
	PSRLQ   $63, v3;       \
113
	PXOR    t0, v3
114

115
#define LOAD_MSG_0(block, off) \
116
	MOVOU 8*(off+0)(block), X0;  \
117
	MOVOU 8*(off+2)(block), X1;  \
118
	MOVOU 8*(off+4)(block), X2;  \
119
	MOVOU 8*(off+6)(block), X3;  \
120
	MOVOU 8*(off+8)(block), X4;  \
121
	MOVOU 8*(off+10)(block), X5; \
122
	MOVOU 8*(off+12)(block), X6; \
123
	MOVOU 8*(off+14)(block), X7
124

125
#define STORE_MSG_0(block, off) \
126
	MOVOU X0, 8*(off+0)(block);  \
127
	MOVOU X1, 8*(off+2)(block);  \
128
	MOVOU X2, 8*(off+4)(block);  \
129
	MOVOU X3, 8*(off+6)(block);  \
130
	MOVOU X4, 8*(off+8)(block);  \
131
	MOVOU X5, 8*(off+10)(block); \
132
	MOVOU X6, 8*(off+12)(block); \
133
	MOVOU X7, 8*(off+14)(block)
134

135
#define LOAD_MSG_1(block, off) \
136
	MOVOU 8*off+0*8(block), X0;  \
137
	MOVOU 8*off+16*8(block), X1; \
138
	MOVOU 8*off+32*8(block), X2; \
139
	MOVOU 8*off+48*8(block), X3; \
140
	MOVOU 8*off+64*8(block), X4; \
141
	MOVOU 8*off+80*8(block), X5; \
142
	MOVOU 8*off+96*8(block), X6; \
143
	MOVOU 8*off+112*8(block), X7
144

145
#define STORE_MSG_1(block, off) \
146
	MOVOU X0, 8*off+0*8(block);  \
147
	MOVOU X1, 8*off+16*8(block); \
148
	MOVOU X2, 8*off+32*8(block); \
149
	MOVOU X3, 8*off+48*8(block); \
150
	MOVOU X4, 8*off+64*8(block); \
151
	MOVOU X5, 8*off+80*8(block); \
152
	MOVOU X6, 8*off+96*8(block); \
153
	MOVOU X7, 8*off+112*8(block)
154

155
#define BLAMKA_ROUND_0(block, off, t0, t1, c40, c48) \
156
	LOAD_MSG_0(block, off);                                   \
157
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \
158
	SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1);                  \
159
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \
160
	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1);              \
161
	STORE_MSG_0(block, off)
162

163
#define BLAMKA_ROUND_1(block, off, t0, t1, c40, c48) \
164
	LOAD_MSG_1(block, off);                                   \
165
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \
166
	SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1);                  \
167
	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \
168
	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1);              \
169
	STORE_MSG_1(block, off)
170

171
// func blamkaSSE4(b *block)
172
TEXT ·blamkaSSE4(SB), 4, $0-8
173
	MOVQ b+0(FP), AX
174

175
	MOVOU ·c40<>(SB), X10
176
	MOVOU ·c48<>(SB), X11
177

178
	BLAMKA_ROUND_0(AX, 0, X8, X9, X10, X11)
179
	BLAMKA_ROUND_0(AX, 16, X8, X9, X10, X11)
180
	BLAMKA_ROUND_0(AX, 32, X8, X9, X10, X11)
181
	BLAMKA_ROUND_0(AX, 48, X8, X9, X10, X11)
182
	BLAMKA_ROUND_0(AX, 64, X8, X9, X10, X11)
183
	BLAMKA_ROUND_0(AX, 80, X8, X9, X10, X11)
184
	BLAMKA_ROUND_0(AX, 96, X8, X9, X10, X11)
185
	BLAMKA_ROUND_0(AX, 112, X8, X9, X10, X11)
186

187
	BLAMKA_ROUND_1(AX, 0, X8, X9, X10, X11)
188
	BLAMKA_ROUND_1(AX, 2, X8, X9, X10, X11)
189
	BLAMKA_ROUND_1(AX, 4, X8, X9, X10, X11)
190
	BLAMKA_ROUND_1(AX, 6, X8, X9, X10, X11)
191
	BLAMKA_ROUND_1(AX, 8, X8, X9, X10, X11)
192
	BLAMKA_ROUND_1(AX, 10, X8, X9, X10, X11)
193
	BLAMKA_ROUND_1(AX, 12, X8, X9, X10, X11)
194
	BLAMKA_ROUND_1(AX, 14, X8, X9, X10, X11)
195
	RET
196

197
// func mixBlocksSSE2(out, a, b, c *block)
198
TEXT ·mixBlocksSSE2(SB), 4, $0-32
199
	MOVQ out+0(FP), DX
200
	MOVQ a+8(FP), AX
201
	MOVQ b+16(FP), BX
202
	MOVQ c+24(FP), CX
203
	MOVQ $128, DI
204

205
loop:
206
	MOVOU 0(AX), X0
207
	MOVOU 0(BX), X1
208
	MOVOU 0(CX), X2
209
	PXOR  X1, X0
210
	PXOR  X2, X0
211
	MOVOU X0, 0(DX)
212
	ADDQ  $16, AX
213
	ADDQ  $16, BX
214
	ADDQ  $16, CX
215
	ADDQ  $16, DX
216
	SUBQ  $2, DI
217
	JA    loop
218
	RET
219

220
// func xorBlocksSSE2(out, a, b, c *block)
221
TEXT ·xorBlocksSSE2(SB), 4, $0-32
222
	MOVQ out+0(FP), DX
223
	MOVQ a+8(FP), AX
224
	MOVQ b+16(FP), BX
225
	MOVQ c+24(FP), CX
226
	MOVQ $128, DI
227

228
loop:
229
	MOVOU 0(AX), X0
230
	MOVOU 0(BX), X1
231
	MOVOU 0(CX), X2
232
	MOVOU 0(DX), X3
233
	PXOR  X1, X0
234
	PXOR  X2, X0
235
	PXOR  X3, X0
236
	MOVOU X0, 0(DX)
237
	ADDQ  $16, AX
238
	ADDQ  $16, BX
239
	ADDQ  $16, CX
240
	ADDQ  $16, DX
241
	SUBQ  $2, DI
242
	JA    loop
243
	RET
244

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.