podman

sum_generic.go
312 строк · 9.6 Кб
Перенос по словам
1
// Copyright 2018 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4

5
// This file provides the generic implementation of Sum and MAC. Other files
6
// might provide optimized assembly implementations of some of this code.
7

8
package poly1305
9

10
import (
11
	"encoding/binary"
12
	"math/bits"
13
)
14

15
// Poly1305 [RFC 7539] is a relatively simple algorithm: the authentication tag
16
// for a 64 bytes message is approximately
17
//
18
//     s + m[0:16] * r⁴ + m[16:32] * r³ + m[32:48] * r² + m[48:64] * r  mod  2¹³⁰ - 5
19
//
20
// for some secret r and s. It can be computed sequentially like
21
//
22
//     for len(msg) > 0:
23
//         h += read(msg, 16)
24
//         h *= r
25
//         h %= 2¹³⁰ - 5
26
//     return h + s
27
//
28
// All the complexity is about doing performant constant-time math on numbers
29
// larger than any available numeric type.
30

31
func sumGeneric(out *[TagSize]byte, msg []byte, key *[32]byte) {
32
	h := newMACGeneric(key)
33
	h.Write(msg)
34
	h.Sum(out)
35
}
36

37
func newMACGeneric(key *[32]byte) macGeneric {
38
	m := macGeneric{}
39
	initialize(key, &m.macState)
40
	return m
41
}
42

43
// macState holds numbers in saturated 64-bit little-endian limbs. That is,
44
// the value of [x0, x1, x2] is x[0] + x[1] * 2⁶⁴ + x[2] * 2¹²⁸.
45
type macState struct {
46
	// h is the main accumulator. It is to be interpreted modulo 2¹³⁰ - 5, but
47
	// can grow larger during and after rounds. It must, however, remain below
48
	// 2 * (2¹³⁰ - 5).
49
	h [3]uint64
50
	// r and s are the private key components.
51
	r [2]uint64
52
	s [2]uint64
53
}
54

55
type macGeneric struct {
56
	macState
57

58
	buffer [TagSize]byte
59
	offset int
60
}
61

62
// Write splits the incoming message into TagSize chunks, and passes them to
63
// update. It buffers incomplete chunks.
64
func (h *macGeneric) Write(p []byte) (int, error) {
65
	nn := len(p)
66
	if h.offset > 0 {
67
		n := copy(h.buffer[h.offset:], p)
68
		if h.offset+n < TagSize {
69
			h.offset += n
70
			return nn, nil
71
		}
72
		p = p[n:]
73
		h.offset = 0
74
		updateGeneric(&h.macState, h.buffer[:])
75
	}
76
	if n := len(p) - (len(p) % TagSize); n > 0 {
77
		updateGeneric(&h.macState, p[:n])
78
		p = p[n:]
79
	}
80
	if len(p) > 0 {
81
		h.offset += copy(h.buffer[h.offset:], p)
82
	}
83
	return nn, nil
84
}
85

86
// Sum flushes the last incomplete chunk from the buffer, if any, and generates
87
// the MAC output. It does not modify its state, in order to allow for multiple
88
// calls to Sum, even if no Write is allowed after Sum.
89
func (h *macGeneric) Sum(out *[TagSize]byte) {
90
	state := h.macState
91
	if h.offset > 0 {
92
		updateGeneric(&state, h.buffer[:h.offset])
93
	}
94
	finalize(out, &state.h, &state.s)
95
}
96

97
// [rMask0, rMask1] is the specified Poly1305 clamping mask in little-endian. It
98
// clears some bits of the secret coefficient to make it possible to implement
99
// multiplication more efficiently.
100
const (
101
	rMask0 = 0x0FFFFFFC0FFFFFFF
102
	rMask1 = 0x0FFFFFFC0FFFFFFC
103
)
104

105
// initialize loads the 256-bit key into the two 128-bit secret values r and s.
106
func initialize(key *[32]byte, m *macState) {
107
	m.r[0] = binary.LittleEndian.Uint64(key[0:8]) & rMask0
108
	m.r[1] = binary.LittleEndian.Uint64(key[8:16]) & rMask1
109
	m.s[0] = binary.LittleEndian.Uint64(key[16:24])
110
	m.s[1] = binary.LittleEndian.Uint64(key[24:32])
111
}
112

113
// uint128 holds a 128-bit number as two 64-bit limbs, for use with the
114
// bits.Mul64 and bits.Add64 intrinsics.
115
type uint128 struct {
116
	lo, hi uint64
117
}
118

119
func mul64(a, b uint64) uint128 {
120
	hi, lo := bits.Mul64(a, b)
121
	return uint128{lo, hi}
122
}
123

124
func add128(a, b uint128) uint128 {
125
	lo, c := bits.Add64(a.lo, b.lo, 0)
126
	hi, c := bits.Add64(a.hi, b.hi, c)
127
	if c != 0 {
128
		panic("poly1305: unexpected overflow")
129
	}
130
	return uint128{lo, hi}
131
}
132

133
func shiftRightBy2(a uint128) uint128 {
134
	a.lo = a.lo>>2 | (a.hi&3)<<62
135
	a.hi = a.hi >> 2
136
	return a
137
}
138

139
// updateGeneric absorbs msg into the state.h accumulator. For each chunk m of
140
// 128 bits of message, it computes
141
//
142
//	h₊ = (h + m) * r  mod  2¹³⁰ - 5
143
//
144
// If the msg length is not a multiple of TagSize, it assumes the last
145
// incomplete chunk is the final one.
146
func updateGeneric(state *macState, msg []byte) {
147
	h0, h1, h2 := state.h[0], state.h[1], state.h[2]
148
	r0, r1 := state.r[0], state.r[1]
149

150
	for len(msg) > 0 {
151
		var c uint64
152

153
		// For the first step, h + m, we use a chain of bits.Add64 intrinsics.
154
		// The resulting value of h might exceed 2¹³⁰ - 5, but will be partially
155
		// reduced at the end of the multiplication below.
156
		//
157
		// The spec requires us to set a bit just above the message size, not to
158
		// hide leading zeroes. For full chunks, that's 1 << 128, so we can just
159
		// add 1 to the most significant (2¹²⁸) limb, h2.
160
		if len(msg) >= TagSize {
161
			h0, c = bits.Add64(h0, binary.LittleEndian.Uint64(msg[0:8]), 0)
162
			h1, c = bits.Add64(h1, binary.LittleEndian.Uint64(msg[8:16]), c)
163
			h2 += c + 1
164

165
			msg = msg[TagSize:]
166
		} else {
167
			var buf [TagSize]byte
168
			copy(buf[:], msg)
169
			buf[len(msg)] = 1
170

171
			h0, c = bits.Add64(h0, binary.LittleEndian.Uint64(buf[0:8]), 0)
172
			h1, c = bits.Add64(h1, binary.LittleEndian.Uint64(buf[8:16]), c)
173
			h2 += c
174

175
			msg = nil
176
		}
177

178
		// Multiplication of big number limbs is similar to elementary school
179
		// columnar multiplication. Instead of digits, there are 64-bit limbs.
180
		//
181
		// We are multiplying a 3 limbs number, h, by a 2 limbs number, r.
182
		//
183
		//                        h2    h1    h0  x
184
		//                              r1    r0  =
185
		//                       ----------------
186
		//                      h2r0  h1r0  h0r0     <-- individual 128-bit products
187
		//            +   h2r1  h1r1  h0r1
188
		//               ------------------------
189
		//                 m3    m2    m1    m0      <-- result in 128-bit overlapping limbs
190
		//               ------------------------
191
		//         m3.hi m2.hi m1.hi m0.hi           <-- carry propagation
192
		//     +         m3.lo m2.lo m1.lo m0.lo
193
		//        -------------------------------
194
		//           t4    t3    t2    t1    t0      <-- final result in 64-bit limbs
195
		//
196
		// The main difference from pen-and-paper multiplication is that we do
197
		// carry propagation in a separate step, as if we wrote two digit sums
198
		// at first (the 128-bit limbs), and then carried the tens all at once.
199

200
		h0r0 := mul64(h0, r0)
201
		h1r0 := mul64(h1, r0)
202
		h2r0 := mul64(h2, r0)
203
		h0r1 := mul64(h0, r1)
204
		h1r1 := mul64(h1, r1)
205
		h2r1 := mul64(h2, r1)
206

207
		// Since h2 is known to be at most 7 (5 + 1 + 1), and r0 and r1 have their
208
		// top 4 bits cleared by rMask{0,1}, we know that their product is not going
209
		// to overflow 64 bits, so we can ignore the high part of the products.
210
		//
211
		// This also means that the product doesn't have a fifth limb (t4).
212
		if h2r0.hi != 0 {
213
			panic("poly1305: unexpected overflow")
214
		}
215
		if h2r1.hi != 0 {
216
			panic("poly1305: unexpected overflow")
217
		}
218

219
		m0 := h0r0
220
		m1 := add128(h1r0, h0r1) // These two additions don't overflow thanks again
221
		m2 := add128(h2r0, h1r1) // to the 4 masked bits at the top of r0 and r1.
222
		m3 := h2r1
223

224
		t0 := m0.lo
225
		t1, c := bits.Add64(m1.lo, m0.hi, 0)
226
		t2, c := bits.Add64(m2.lo, m1.hi, c)
227
		t3, _ := bits.Add64(m3.lo, m2.hi, c)
228

229
		// Now we have the result as 4 64-bit limbs, and we need to reduce it
230
		// modulo 2¹³⁰ - 5. The special shape of this Crandall prime lets us do
231
		// a cheap partial reduction according to the reduction identity
232
		//
233
		//     c * 2¹³⁰ + n  =  c * 5 + n  mod  2¹³⁰ - 5
234
		//
235
		// because 2¹³⁰ = 5 mod 2¹³⁰ - 5. Partial reduction since the result is
236
		// likely to be larger than 2¹³⁰ - 5, but still small enough to fit the
237
		// assumptions we make about h in the rest of the code.
238
		//
239
		// See also https://speakerdeck.com/gtank/engineering-prime-numbers?slide=23
240

241
		// We split the final result at the 2¹³⁰ mark into h and cc, the carry.
242
		// Note that the carry bits are effectively shifted left by 2, in other
243
		// words, cc = c * 4 for the c in the reduction identity.
244
		h0, h1, h2 = t0, t1, t2&maskLow2Bits
245
		cc := uint128{t2 & maskNotLow2Bits, t3}
246

247
		// To add c * 5 to h, we first add cc = c * 4, and then add (cc >> 2) = c.
248

249
		h0, c = bits.Add64(h0, cc.lo, 0)
250
		h1, c = bits.Add64(h1, cc.hi, c)
251
		h2 += c
252

253
		cc = shiftRightBy2(cc)
254

255
		h0, c = bits.Add64(h0, cc.lo, 0)
256
		h1, c = bits.Add64(h1, cc.hi, c)
257
		h2 += c
258

259
		// h2 is at most 3 + 1 + 1 = 5, making the whole of h at most
260
		//
261
		//     5 * 2¹²⁸ + (2¹²⁸ - 1) = 6 * 2¹²⁸ - 1
262
	}
263

264
	state.h[0], state.h[1], state.h[2] = h0, h1, h2
265
}
266

267
const (
268
	maskLow2Bits    uint64 = 0x0000000000000003
269
	maskNotLow2Bits uint64 = ^maskLow2Bits
270
)
271

272
// select64 returns x if v == 1 and y if v == 0, in constant time.
273
func select64(v, x, y uint64) uint64 { return ^(v-1)&x | (v-1)&y }
274

275
// [p0, p1, p2] is 2¹³⁰ - 5 in little endian order.
276
const (
277
	p0 = 0xFFFFFFFFFFFFFFFB
278
	p1 = 0xFFFFFFFFFFFFFFFF
279
	p2 = 0x0000000000000003
280
)
281

282
// finalize completes the modular reduction of h and computes
283
//
284
//	out = h + s  mod  2¹²⁸
285
func finalize(out *[TagSize]byte, h *[3]uint64, s *[2]uint64) {
286
	h0, h1, h2 := h[0], h[1], h[2]
287

288
	// After the partial reduction in updateGeneric, h might be more than
289
	// 2¹³⁰ - 5, but will be less than 2 * (2¹³⁰ - 5). To complete the reduction
290
	// in constant time, we compute t = h - (2¹³⁰ - 5), and select h as the
291
	// result if the subtraction underflows, and t otherwise.
292

293
	hMinusP0, b := bits.Sub64(h0, p0, 0)
294
	hMinusP1, b := bits.Sub64(h1, p1, b)
295
	_, b = bits.Sub64(h2, p2, b)
296

297
	// h = h if h < p else h - p
298
	h0 = select64(b, h0, hMinusP0)
299
	h1 = select64(b, h1, hMinusP1)
300

301
	// Finally, we compute the last Poly1305 step
302
	//
303
	//     tag = h + s  mod  2¹²⁸
304
	//
305
	// by just doing a wide addition with the 128 low bits of h and discarding
306
	// the overflow.
307
	h0, c := bits.Add64(h0, s[0], 0)
308
	h1, _ = bits.Add64(h1, s[1], c)
309

310
	binary.LittleEndian.PutUint64(out[0:8], h0)
311
	binary.LittleEndian.PutUint64(out[8:16], h1)
312
}
313
podman

Использование cookies