cubefs
1// +build gc,!purego,!noasm
2
3#include "textflag.h"
4
5// Register allocation.
6#define digest R1
7#define h R2 // Return value.
8#define p R3 // Input pointer.
9#define len R4
10#define nblocks R5 // len / 32.
11#define prime1 R7
12#define prime2 R8
13#define prime3 R9
14#define prime4 R10
15#define prime5 R11
16#define v1 R12
17#define v2 R13
18#define v3 R14
19#define v4 R15
20#define x1 R20
21#define x2 R21
22#define x3 R22
23#define x4 R23
24
25#define round(acc, x) \
26MADD prime2, acc, x, acc \
27ROR $64-31, acc \
28MUL prime1, acc \
29
30// x = round(0, x).
31#define round0(x) \
32MUL prime2, x \
33ROR $64-31, x \
34MUL prime1, x \
35
36#define mergeRound(x) \
37round0(x) \
38EOR x, h \
39MADD h, prime4, prime1, h \
40
41// Update v[1-4] with 32-byte blocks. Assumes len >= 32.
42#define blocksLoop() \
43LSR $5, len, nblocks \
44PCALIGN $16 \
45loop: \
46LDP.P 32(p), (x1, x2) \
47round(v1, x1) \
48LDP -16(p), (x3, x4) \
49round(v2, x2) \
50SUB $1, nblocks \
51round(v3, x3) \
52round(v4, x4) \
53CBNZ nblocks, loop \
54
55// The primes are repeated here to ensure that they're stored
56// in a contiguous array, so we can load them with LDP.
57DATA primes<> +0(SB)/8, $11400714785074694791
58DATA primes<> +8(SB)/8, $14029467366897019727
59DATA primes<>+16(SB)/8, $1609587929392839161
60DATA primes<>+24(SB)/8, $9650029242287828579
61DATA primes<>+32(SB)/8, $2870177450012600261
62GLOBL primes<>(SB), NOPTR+RODATA, $40
63
64// func Sum64(b []byte) uint64
65TEXT ·Sum64(SB), NOFRAME+NOSPLIT, $0-32
66LDP b_base+0(FP), (p, len)
67
68LDP primes<> +0(SB), (prime1, prime2)
69LDP primes<>+16(SB), (prime3, prime4)
70MOVD primes<>+32(SB), prime5
71
72CMP $32, len
73CSEL LO, prime5, ZR, h // if len < 32 { h = prime5 } else { h = 0 }
74BLO afterLoop
75
76ADD prime1, prime2, v1
77MOVD prime2, v2
78MOVD $0, v3
79NEG prime1, v4
80
81blocksLoop()
82
83ROR $64-1, v1, x1
84ROR $64-7, v2, x2
85ADD x1, x2
86ROR $64-12, v3, x3
87ROR $64-18, v4, x4
88ADD x3, x4
89ADD x2, x4, h
90
91mergeRound(v1)
92mergeRound(v2)
93mergeRound(v3)
94mergeRound(v4)
95
96afterLoop:
97ADD len, h
98
99TBZ $4, len, try8
100LDP.P 16(p), (x1, x2)
101
102round0(x1)
103ROR $64-27, h
104EOR x1 @> 64-27, h, h
105MADD h, prime4, prime1, h
106
107round0(x2)
108ROR $64-27, h
109EOR x2 @> 64-27, h
110MADD h, prime4, prime1, h
111
112try8:
113TBZ $3, len, try4
114MOVD.P 8(p), x1
115
116round0(x1)
117ROR $64-27, h
118EOR x1 @> 64-27, h
119MADD h, prime4, prime1, h
120
121try4:
122TBZ $2, len, try2
123MOVWU.P 4(p), x2
124
125MUL prime1, x2
126ROR $64-23, h
127EOR x2 @> 64-23, h
128MADD h, prime3, prime2, h
129
130try2:
131TBZ $1, len, try1
132MOVHU.P 2(p), x3
133AND $255, x3, x1
134LSR $8, x3, x2
135
136MUL prime5, x1
137ROR $64-11, h
138EOR x1 @> 64-11, h
139MUL prime1, h
140
141MUL prime5, x2
142ROR $64-11, h
143EOR x2 @> 64-11, h
144MUL prime1, h
145
146try1:
147TBZ $0, len, end
148MOVBU (p), x4
149
150MUL prime5, x4
151ROR $64-11, h
152EOR x4 @> 64-11, h
153MUL prime1, h
154
155end:
156EOR h >> 33, h
157MUL prime2, h
158EOR h >> 29, h
159MUL prime3, h
160EOR h >> 32, h
161
162MOVD h, ret+24(FP)
163RET
164
165// func writeBlocks(d *Digest, b []byte) int
166//
167// Assumes len(b) >= 32.
168TEXT ·writeBlocks(SB), NOFRAME+NOSPLIT, $0-40
169LDP primes<>(SB), (prime1, prime2)
170
171// Load state. Assume v[1-4] are stored contiguously.
172MOVD d+0(FP), digest
173LDP 0(digest), (v1, v2)
174LDP 16(digest), (v3, v4)
175
176LDP b_base+8(FP), (p, len)
177
178blocksLoop()
179
180// Store updated state.
181STP (v1, v2), 0(digest)
182STP (v3, v4), 16(digest)
183
184BIC $31, len
185MOVD len, ret+32(FP)
186RET
187