cubefs

decode.go
1726 строк · 45.2 Кб
Перенос по словам
1
// Copyright 2014 The Go Authors.  All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4

5
// Table-driven decoding of x86 instructions.
6

7
package x86asm
8

9
import (
10
	"encoding/binary"
11
	"errors"
12
	"fmt"
13
	"runtime"
14
)
15

16
// Set trace to true to cause the decoder to print the PC sequence
17
// of the executed instruction codes. This is typically only useful
18
// when you are running a test of a single input case.
19
const trace = false
20

21
// A decodeOp is a single instruction in the decoder bytecode program.
22
//
23
// The decodeOps correspond to consuming and conditionally branching
24
// on input bytes, consuming additional fields, and then interpreting
25
// consumed data as instruction arguments. The names of the xRead and xArg
26
// operations are taken from the Intel manual conventions, for example
27
// Volume 2, Section 3.1.1, page 487 of
28
// http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf
29
//
30
// The actual decoding program is generated by ../x86map.
31
//
32
// TODO(rsc): We may be able to merge various of the memory operands
33
// since we don't care about, say, the distinction between m80dec and m80bcd.
34
// Similarly, mm and mm1 have identical meaning, as do xmm and xmm1.
35

36
type decodeOp uint16
37

38
const (
39
	xFail  decodeOp = iota // invalid instruction (return)
40
	xMatch                 // completed match
41
	xJump                  // jump to pc
42

43
	xCondByte     // switch on instruction byte value
44
	xCondSlashR   // read and switch on instruction /r value
45
	xCondPrefix   // switch on presence of instruction prefix
46
	xCondIs64     // switch on 64-bit processor mode
47
	xCondDataSize // switch on operand size
48
	xCondAddrSize // switch on address size
49
	xCondIsMem    // switch on memory vs register argument
50

51
	xSetOp // set instruction opcode
52

53
	xReadSlashR // read /r
54
	xReadIb     // read ib
55
	xReadIw     // read iw
56
	xReadId     // read id
57
	xReadIo     // read io
58
	xReadCb     // read cb
59
	xReadCw     // read cw
60
	xReadCd     // read cd
61
	xReadCp     // read cp
62
	xReadCm     // read cm
63

64
	xArg1            // arg 1
65
	xArg3            // arg 3
66
	xArgAL           // arg AL
67
	xArgAX           // arg AX
68
	xArgCL           // arg CL
69
	xArgCR0dashCR7   // arg CR0-CR7
70
	xArgCS           // arg CS
71
	xArgDR0dashDR7   // arg DR0-DR7
72
	xArgDS           // arg DS
73
	xArgDX           // arg DX
74
	xArgEAX          // arg EAX
75
	xArgEDX          // arg EDX
76
	xArgES           // arg ES
77
	xArgFS           // arg FS
78
	xArgGS           // arg GS
79
	xArgImm16        // arg imm16
80
	xArgImm32        // arg imm32
81
	xArgImm64        // arg imm64
82
	xArgImm8         // arg imm8
83
	xArgImm8u        // arg imm8 but record as unsigned
84
	xArgImm16u       // arg imm8 but record as unsigned
85
	xArgM            // arg m
86
	xArgM128         // arg m128
87
	xArgM256         // arg m256
88
	xArgM1428byte    // arg m14/28byte
89
	xArgM16          // arg m16
90
	xArgM16and16     // arg m16&16
91
	xArgM16and32     // arg m16&32
92
	xArgM16and64     // arg m16&64
93
	xArgM16colon16   // arg m16:16
94
	xArgM16colon32   // arg m16:32
95
	xArgM16colon64   // arg m16:64
96
	xArgM16int       // arg m16int
97
	xArgM2byte       // arg m2byte
98
	xArgM32          // arg m32
99
	xArgM32and32     // arg m32&32
100
	xArgM32fp        // arg m32fp
101
	xArgM32int       // arg m32int
102
	xArgM512byte     // arg m512byte
103
	xArgM64          // arg m64
104
	xArgM64fp        // arg m64fp
105
	xArgM64int       // arg m64int
106
	xArgM8           // arg m8
107
	xArgM80bcd       // arg m80bcd
108
	xArgM80dec       // arg m80dec
109
	xArgM80fp        // arg m80fp
110
	xArgM94108byte   // arg m94/108byte
111
	xArgMm           // arg mm
112
	xArgMm1          // arg mm1
113
	xArgMm2          // arg mm2
114
	xArgMm2M64       // arg mm2/m64
115
	xArgMmM32        // arg mm/m32
116
	xArgMmM64        // arg mm/m64
117
	xArgMem          // arg mem
118
	xArgMoffs16      // arg moffs16
119
	xArgMoffs32      // arg moffs32
120
	xArgMoffs64      // arg moffs64
121
	xArgMoffs8       // arg moffs8
122
	xArgPtr16colon16 // arg ptr16:16
123
	xArgPtr16colon32 // arg ptr16:32
124
	xArgR16          // arg r16
125
	xArgR16op        // arg r16 with +rw in opcode
126
	xArgR32          // arg r32
127
	xArgR32M16       // arg r32/m16
128
	xArgR32M8        // arg r32/m8
129
	xArgR32op        // arg r32 with +rd in opcode
130
	xArgR64          // arg r64
131
	xArgR64M16       // arg r64/m16
132
	xArgR64op        // arg r64 with +rd in opcode
133
	xArgR8           // arg r8
134
	xArgR8op         // arg r8 with +rb in opcode
135
	xArgRAX          // arg RAX
136
	xArgRDX          // arg RDX
137
	xArgRM           // arg r/m
138
	xArgRM16         // arg r/m16
139
	xArgRM32         // arg r/m32
140
	xArgRM64         // arg r/m64
141
	xArgRM8          // arg r/m8
142
	xArgReg          // arg reg
143
	xArgRegM16       // arg reg/m16
144
	xArgRegM32       // arg reg/m32
145
	xArgRegM8        // arg reg/m8
146
	xArgRel16        // arg rel16
147
	xArgRel32        // arg rel32
148
	xArgRel8         // arg rel8
149
	xArgSS           // arg SS
150
	xArgST           // arg ST, aka ST(0)
151
	xArgSTi          // arg ST(i) with +i in opcode
152
	xArgSreg         // arg Sreg
153
	xArgTR0dashTR7   // arg TR0-TR7
154
	xArgXmm          // arg xmm
155
	xArgXMM0         // arg <XMM0>
156
	xArgXmm1         // arg xmm1
157
	xArgXmm2         // arg xmm2
158
	xArgXmm2M128     // arg xmm2/m128
159
	xArgYmm2M256     // arg ymm2/m256
160
	xArgXmm2M16      // arg xmm2/m16
161
	xArgXmm2M32      // arg xmm2/m32
162
	xArgXmm2M64      // arg xmm2/m64
163
	xArgXmmM128      // arg xmm/m128
164
	xArgXmmM32       // arg xmm/m32
165
	xArgXmmM64       // arg xmm/m64
166
	xArgYmm1         // arg ymm1
167
	xArgRmf16        // arg r/m16 but force mod=3
168
	xArgRmf32        // arg r/m32 but force mod=3
169
	xArgRmf64        // arg r/m64 but force mod=3
170
)
171

172
// instPrefix returns an Inst describing just one prefix byte.
173
// It is only used if there is a prefix followed by an unintelligible
174
// or invalid instruction byte sequence.
175
func instPrefix(b byte, mode int) (Inst, error) {
176
	// When tracing it is useful to see what called instPrefix to report an error.
177
	if trace {
178
		_, file, line, _ := runtime.Caller(1)
179
		fmt.Printf("%s:%d\n", file, line)
180
	}
181
	p := Prefix(b)
182
	switch p {
183
	case PrefixDataSize:
184
		if mode == 16 {
185
			p = PrefixData32
186
		} else {
187
			p = PrefixData16
188
		}
189
	case PrefixAddrSize:
190
		if mode == 32 {
191
			p = PrefixAddr16
192
		} else {
193
			p = PrefixAddr32
194
		}
195
	}
196
	// Note: using composite literal with Prefix key confuses 'bundle' tool.
197
	inst := Inst{Len: 1}
198
	inst.Prefix = Prefixes{p}
199
	return inst, nil
200
}
201

202
// truncated reports a truncated instruction.
203
// For now we use instPrefix but perhaps later we will return
204
// a specific error here.
205
func truncated(src []byte, mode int) (Inst, error) {
206
	if len(src) == 0 {
207
		return Inst{}, ErrTruncated
208
	}
209
	return instPrefix(src[0], mode) // too long
210
}
211

212
// These are the errors returned by Decode.
213
var (
214
	ErrInvalidMode  = errors.New("invalid x86 mode in Decode")
215
	ErrTruncated    = errors.New("truncated instruction")
216
	ErrUnrecognized = errors.New("unrecognized instruction")
217
)
218

219
// decoderCover records coverage information for which parts
220
// of the byte code have been executed.
221
// TODO(rsc): This is for testing. Only use this if a flag is given.
222
var decoderCover []bool
223

224
// Decode decodes the leading bytes in src as a single instruction.
225
// The mode arguments specifies the assumed processor mode:
226
// 16, 32, or 64 for 16-, 32-, and 64-bit execution modes.
227
func Decode(src []byte, mode int) (inst Inst, err error) {
228
	return decode1(src, mode, false)
229
}
230

231
// decode1 is the implementation of Decode but takes an extra
232
// gnuCompat flag to cause it to change its behavior to mimic
233
// bugs (or at least unique features) of GNU libopcodes as used
234
// by objdump. We don't believe that logic is the right thing to do
235
// in general, but when testing against libopcodes it simplifies the
236
// comparison if we adjust a few small pieces of logic.
237
// The affected logic is in the conditional branch for "mandatory" prefixes,
238
// case xCondPrefix.
239
func decode1(src []byte, mode int, gnuCompat bool) (Inst, error) {
240
	switch mode {
241
	case 16, 32, 64:
242
		// ok
243
		// TODO(rsc): 64-bit mode not tested, probably not working.
244
	default:
245
		return Inst{}, ErrInvalidMode
246
	}
247

248
	// Maximum instruction size is 15 bytes.
249
	// If we need to read more, return 'truncated instruction.
250
	if len(src) > 15 {
251
		src = src[:15]
252
	}
253

254
	var (
255
		// prefix decoding information
256
		pos           = 0    // position reading src
257
		nprefix       = 0    // number of prefixes
258
		lockIndex     = -1   // index of LOCK prefix in src and inst.Prefix
259
		repIndex      = -1   // index of REP/REPN prefix in src and inst.Prefix
260
		segIndex      = -1   // index of Group 2 prefix in src and inst.Prefix
261
		dataSizeIndex = -1   // index of Group 3 prefix in src and inst.Prefix
262
		addrSizeIndex = -1   // index of Group 4 prefix in src and inst.Prefix
263
		rex           Prefix // rex byte if present (or 0)
264
		rexUsed       Prefix // bits used in rex byte
265
		rexIndex      = -1   // index of rex byte
266
		vex           Prefix // use vex encoding
267
		vexIndex      = -1   // index of vex prefix
268

269
		addrMode = mode // address mode (width in bits)
270
		dataMode = mode // operand mode (width in bits)
271

272
		// decoded ModR/M fields
273
		haveModrm bool
274
		modrm     int
275
		mod       int
276
		regop     int
277
		rm        int
278

279
		// if ModR/M is memory reference, Mem form
280
		mem     Mem
281
		haveMem bool
282

283
		// decoded SIB fields
284
		haveSIB bool
285
		sib     int
286
		scale   int
287
		index   int
288
		base    int
289
		displen int
290
		dispoff int
291

292
		// decoded immediate values
293
		imm     int64
294
		imm8    int8
295
		immc    int64
296
		immcpos int
297

298
		// output
299
		opshift int
300
		inst    Inst
301
		narg    int // number of arguments written to inst
302
	)
303

304
	if mode == 64 {
305
		dataMode = 32
306
	}
307

308
	// Prefixes are certainly the most complex and underspecified part of
309
	// decoding x86 instructions. Although the manuals say things like
310
	// up to four prefixes, one from each group, nearly everyone seems to
311
	// agree that in practice as many prefixes as possible, including multiple
312
	// from a particular group or repetitions of a given prefix, can be used on
313
	// an instruction, provided the total instruction length including prefixes
314
	// does not exceed the agreed-upon maximum of 15 bytes.
315
	// Everyone also agrees that if one of these prefixes is the LOCK prefix
316
	// and the instruction is not one of the instructions that can be used with
317
	// the LOCK prefix or if the destination is not a memory operand,
318
	// then the instruction is invalid and produces the #UD exception.
319
	// However, that is the end of any semblance of agreement.
320
	//
321
	// What happens if prefixes are given that conflict with other prefixes?
322
	// For example, the memory segment overrides CS, DS, ES, FS, GS, SS
323
	// conflict with each other: only one segment can be in effect.
324
	// Disassemblers seem to agree that later prefixes take priority over
325
	// earlier ones. I have not taken the time to write assembly programs
326
	// to check to see if the hardware agrees.
327
	//
328
	// What happens if prefixes are given that have no meaning for the
329
	// specific instruction to which they are attached? It depends.
330
	// If they really have no meaning, they are ignored. However, a future
331
	// processor may assign a different meaning. As a disassembler, we
332
	// don't really know whether we're seeing a meaningless prefix or one
333
	// whose meaning we simply haven't been told yet.
334
	//
335
	// Combining the two questions, what happens when conflicting
336
	// extension prefixes are given? No one seems to know for sure.
337
	// For example, MOVQ is 66 0F D6 /r, MOVDQ2Q is F2 0F D6 /r,
338
	// and MOVQ2DQ is F3 0F D6 /r. What is '66 F2 F3 0F D6 /r'?
339
	// Which prefix wins? See the xCondPrefix prefix for more.
340
	//
341
	// Writing assembly test cases to divine which interpretation the
342
	// CPU uses might clarify the situation, but more likely it would
343
	// make the situation even less clear.
344

345
	// Read non-REX prefixes.
346
ReadPrefixes:
347
	for ; pos < len(src); pos++ {
348
		p := Prefix(src[pos])
349
		switch p {
350
		default:
351
			nprefix = pos
352
			break ReadPrefixes
353

354
		// Group 1 - lock and repeat prefixes
355
		// According to Intel, there should only be one from this set,
356
		// but according to AMD both can be present.
357
		case 0xF0:
358
			if lockIndex >= 0 {
359
				inst.Prefix[lockIndex] |= PrefixIgnored
360
			}
361
			lockIndex = pos
362
		case 0xF2, 0xF3:
363
			if repIndex >= 0 {
364
				inst.Prefix[repIndex] |= PrefixIgnored
365
			}
366
			repIndex = pos
367

368
		// Group 2 - segment override / branch hints
369
		case 0x26, 0x2E, 0x36, 0x3E:
370
			if mode == 64 {
371
				p |= PrefixIgnored
372
				break
373
			}
374
			fallthrough
375
		case 0x64, 0x65:
376
			if segIndex >= 0 {
377
				inst.Prefix[segIndex] |= PrefixIgnored
378
			}
379
			segIndex = pos
380

381
		// Group 3 - operand size override
382
		case 0x66:
383
			if mode == 16 {
384
				dataMode = 32
385
				p = PrefixData32
386
			} else {
387
				dataMode = 16
388
				p = PrefixData16
389
			}
390
			if dataSizeIndex >= 0 {
391
				inst.Prefix[dataSizeIndex] |= PrefixIgnored
392
			}
393
			dataSizeIndex = pos
394

395
		// Group 4 - address size override
396
		case 0x67:
397
			if mode == 32 {
398
				addrMode = 16
399
				p = PrefixAddr16
400
			} else {
401
				addrMode = 32
402
				p = PrefixAddr32
403
			}
404
			if addrSizeIndex >= 0 {
405
				inst.Prefix[addrSizeIndex] |= PrefixIgnored
406
			}
407
			addrSizeIndex = pos
408

409
		//Group 5 - Vex encoding
410
		case 0xC5:
411
			if pos == 0 && pos+1 < len(src) && (mode == 64 || (mode == 32 && src[pos+1]&0xc0 == 0xc0)) {
412
				vex = p
413
				vexIndex = pos
414
				inst.Prefix[pos] = p
415
				inst.Prefix[pos+1] = Prefix(src[pos+1])
416
				pos += 1
417
				continue
418
			} else {
419
				nprefix = pos
420
				break ReadPrefixes
421
			}
422
		case 0xC4:
423
			if pos == 0 && pos+2 < len(src) && (mode == 64 || (mode == 32 && src[pos+1]&0xc0 == 0xc0)) {
424
				vex = p
425
				vexIndex = pos
426
				inst.Prefix[pos] = p
427
				inst.Prefix[pos+1] = Prefix(src[pos+1])
428
				inst.Prefix[pos+2] = Prefix(src[pos+2])
429
				pos += 2
430
				continue
431
			} else {
432
				nprefix = pos
433
				break ReadPrefixes
434
			}
435
		}
436

437
		if pos >= len(inst.Prefix) {
438
			return instPrefix(src[0], mode) // too long
439
		}
440

441
		inst.Prefix[pos] = p
442
	}
443

444
	// Read REX prefix.
445
	if pos < len(src) && mode == 64 && Prefix(src[pos]).IsREX() && vex == 0 {
446
		rex = Prefix(src[pos])
447
		rexIndex = pos
448
		if pos >= len(inst.Prefix) {
449
			return instPrefix(src[0], mode) // too long
450
		}
451
		inst.Prefix[pos] = rex
452
		pos++
453
		if rex&PrefixREXW != 0 {
454
			dataMode = 64
455
			if dataSizeIndex >= 0 {
456
				inst.Prefix[dataSizeIndex] |= PrefixIgnored
457
			}
458
		}
459
	}
460

461
	// Decode instruction stream, interpreting decoding instructions.
462
	// opshift gives the shift to use when saving the next
463
	// opcode byte into inst.Opcode.
464
	opshift = 24
465
	if decoderCover == nil {
466
		decoderCover = make([]bool, len(decoder))
467
	}
468

469
	// Decode loop, executing decoder program.
470
	var oldPC, prevPC int
471
Decode:
472
	for pc := 1; ; { // TODO uint
473
		oldPC = prevPC
474
		prevPC = pc
475
		if trace {
476
			println("run", pc)
477
		}
478
		x := decoder[pc]
479
		decoderCover[pc] = true
480
		pc++
481

482
		// Read and decode ModR/M if needed by opcode.
483
		switch decodeOp(x) {
484
		case xCondSlashR, xReadSlashR:
485
			if haveModrm {
486
				return Inst{Len: pos}, errInternal
487
			}
488
			haveModrm = true
489
			if pos >= len(src) {
490
				return truncated(src, mode)
491
			}
492
			modrm = int(src[pos])
493
			pos++
494
			if opshift >= 0 {
495
				inst.Opcode |= uint32(modrm) << uint(opshift)
496
				opshift -= 8
497
			}
498
			mod = modrm >> 6
499
			regop = (modrm >> 3) & 07
500
			rm = modrm & 07
501
			if rex&PrefixREXR != 0 {
502
				rexUsed |= PrefixREXR
503
				regop |= 8
504
			}
505
			if addrMode == 16 {
506
				// 16-bit modrm form
507
				if mod != 3 {
508
					haveMem = true
509
					mem = addr16[rm]
510
					if rm == 6 && mod == 0 {
511
						mem.Base = 0
512
					}
513

514
					// Consume disp16 if present.
515
					if mod == 0 && rm == 6 || mod == 2 {
516
						if pos+2 > len(src) {
517
							return truncated(src, mode)
518
						}
519
						mem.Disp = int64(binary.LittleEndian.Uint16(src[pos:]))
520
						pos += 2
521
					}
522

523
					// Consume disp8 if present.
524
					if mod == 1 {
525
						if pos >= len(src) {
526
							return truncated(src, mode)
527
						}
528
						mem.Disp = int64(int8(src[pos]))
529
						pos++
530
					}
531
				}
532
			} else {
533
				haveMem = mod != 3
534

535
				// 32-bit or 64-bit form
536
				// Consume SIB encoding if present.
537
				if rm == 4 && mod != 3 {
538
					haveSIB = true
539
					if pos >= len(src) {
540
						return truncated(src, mode)
541
					}
542
					sib = int(src[pos])
543
					pos++
544
					if opshift >= 0 {
545
						inst.Opcode |= uint32(sib) << uint(opshift)
546
						opshift -= 8
547
					}
548
					scale = sib >> 6
549
					index = (sib >> 3) & 07
550
					base = sib & 07
551
					if rex&PrefixREXB != 0 || vex == 0xC4 && inst.Prefix[vexIndex+1]&0x20 == 0 {
552
						rexUsed |= PrefixREXB
553
						base |= 8
554
					}
555
					if rex&PrefixREXX != 0 || vex == 0xC4 && inst.Prefix[vexIndex+1]&0x40 == 0 {
556
						rexUsed |= PrefixREXX
557
						index |= 8
558
					}
559

560
					mem.Scale = 1 << uint(scale)
561
					if index == 4 {
562
						// no mem.Index
563
					} else {
564
						mem.Index = baseRegForBits(addrMode) + Reg(index)
565
					}
566
					if base&7 == 5 && mod == 0 {
567
						// no mem.Base
568
					} else {
569
						mem.Base = baseRegForBits(addrMode) + Reg(base)
570
					}
571
				} else {
572
					if rex&PrefixREXB != 0 {
573
						rexUsed |= PrefixREXB
574
						rm |= 8
575
					}
576
					if mod == 0 && rm&7 == 5 || rm&7 == 4 {
577
						// base omitted
578
					} else if mod != 3 {
579
						mem.Base = baseRegForBits(addrMode) + Reg(rm)
580
					}
581
				}
582

583
				// Consume disp32 if present.
584
				if mod == 0 && (rm&7 == 5 || haveSIB && base&7 == 5) || mod == 2 {
585
					if pos+4 > len(src) {
586
						return truncated(src, mode)
587
					}
588
					dispoff = pos
589
					displen = 4
590
					mem.Disp = int64(binary.LittleEndian.Uint32(src[pos:]))
591
					pos += 4
592
				}
593

594
				// Consume disp8 if present.
595
				if mod == 1 {
596
					if pos >= len(src) {
597
						return truncated(src, mode)
598
					}
599
					dispoff = pos
600
					displen = 1
601
					mem.Disp = int64(int8(src[pos]))
602
					pos++
603
				}
604

605
				// In 64-bit, mod=0 rm=5 is PC-relative instead of just disp.
606
				// See Vol 2A. Table 2-7.
607
				if mode == 64 && mod == 0 && rm&7 == 5 {
608
					if addrMode == 32 {
609
						mem.Base = EIP
610
					} else {
611
						mem.Base = RIP
612
					}
613
				}
614
			}
615

616
			if segIndex >= 0 {
617
				mem.Segment = prefixToSegment(inst.Prefix[segIndex])
618
			}
619
		}
620

621
		// Execute single opcode.
622
		switch decodeOp(x) {
623
		default:
624
			println("bad op", x, "at", pc-1, "from", oldPC)
625
			return Inst{Len: pos}, errInternal
626

627
		case xFail:
628
			inst.Op = 0
629
			break Decode
630

631
		case xMatch:
632
			break Decode
633

634
		case xJump:
635
			pc = int(decoder[pc])
636

637
		// Conditional branches.
638

639
		case xCondByte:
640
			if pos >= len(src) {
641
				return truncated(src, mode)
642
			}
643
			b := src[pos]
644
			n := int(decoder[pc])
645
			pc++
646
			for i := 0; i < n; i++ {
647
				xb, xpc := decoder[pc], int(decoder[pc+1])
648
				pc += 2
649
				if b == byte(xb) {
650
					pc = xpc
651
					pos++
652
					if opshift >= 0 {
653
						inst.Opcode |= uint32(b) << uint(opshift)
654
						opshift -= 8
655
					}
656
					continue Decode
657
				}
658
			}
659
			// xCondByte is the only conditional with a fall through,
660
			// so that it can be used to pick off special cases before
661
			// an xCondSlash. If the fallthrough instruction is xFail,
662
			// advance the position so that the decoded instruction
663
			// size includes the byte we just compared against.
664
			if decodeOp(decoder[pc]) == xJump {
665
				pc = int(decoder[pc+1])
666
			}
667
			if decodeOp(decoder[pc]) == xFail {
668
				pos++
669
			}
670

671
		case xCondIs64:
672
			if mode == 64 {
673
				pc = int(decoder[pc+1])
674
			} else {
675
				pc = int(decoder[pc])
676
			}
677

678
		case xCondIsMem:
679
			mem := haveMem
680
			if !haveModrm {
681
				if pos >= len(src) {
682
					return instPrefix(src[0], mode) // too long
683
				}
684
				mem = src[pos]>>6 != 3
685
			}
686
			if mem {
687
				pc = int(decoder[pc+1])
688
			} else {
689
				pc = int(decoder[pc])
690
			}
691

692
		case xCondDataSize:
693
			switch dataMode {
694
			case 16:
695
				if dataSizeIndex >= 0 {
696
					inst.Prefix[dataSizeIndex] |= PrefixImplicit
697
				}
698
				pc = int(decoder[pc])
699
			case 32:
700
				if dataSizeIndex >= 0 {
701
					inst.Prefix[dataSizeIndex] |= PrefixImplicit
702
				}
703
				pc = int(decoder[pc+1])
704
			case 64:
705
				rexUsed |= PrefixREXW
706
				pc = int(decoder[pc+2])
707
			}
708

709
		case xCondAddrSize:
710
			switch addrMode {
711
			case 16:
712
				if addrSizeIndex >= 0 {
713
					inst.Prefix[addrSizeIndex] |= PrefixImplicit
714
				}
715
				pc = int(decoder[pc])
716
			case 32:
717
				if addrSizeIndex >= 0 {
718
					inst.Prefix[addrSizeIndex] |= PrefixImplicit
719
				}
720
				pc = int(decoder[pc+1])
721
			case 64:
722
				pc = int(decoder[pc+2])
723
			}
724

725
		case xCondPrefix:
726
			// Conditional branch based on presence or absence of prefixes.
727
			// The conflict cases here are completely undocumented and
728
			// differ significantly between GNU libopcodes and Intel xed.
729
			// I have not written assembly code to divine what various CPUs
730
			// do, but it wouldn't surprise me if they are not consistent either.
731
			//
732
			// The basic idea is to switch on the presence of a prefix, so that
733
			// for example:
734
			//
735
			//	xCondPrefix, 4
736
			//	0xF3, 123,
737
			//	0xF2, 234,
738
			//	0x66, 345,
739
			//	0, 456
740
			//
741
			// branch to 123 if the F3 prefix is present, 234 if the F2 prefix
742
			// is present, 66 if the 345 prefix is present, and 456 otherwise.
743
			// The prefixes are given in descending order so that the 0 will be last.
744
			//
745
			// It is unclear what should happen if multiple conditions are
746
			// satisfied: what if F2 and F3 are both present, or if 66 and F2
747
			// are present, or if all three are present? The one chosen becomes
748
			// part of the opcode and the others do not. Perhaps the answer
749
			// depends on the specific opcodes in question.
750
			//
751
			// The only clear example is that CRC32 is F2 0F 38 F1 /r, and
752
			// it comes in 16-bit and 32-bit forms based on the 66 prefix,
753
			// so 66 F2 0F 38 F1 /r should be treated as F2 taking priority,
754
			// with the 66 being only an operand size override, and probably
755
			// F2 66 0F 38 F1 /r should be treated the same.
756
			// Perhaps that rule is specific to the case of CRC32, since no
757
			// 66 0F 38 F1 instruction is defined (today) (that we know of).
758
			// However, both libopcodes and xed seem to generalize this
759
			// example and choose F2/F3 in preference to 66, and we
760
			// do the same.
761
			//
762
			// Next, what if both F2 and F3 are present? Which wins?
763
			// The Intel xed rule, and ours, is that the one that occurs last wins.
764
			// The GNU libopcodes rule, which we implement only in gnuCompat mode,
765
			// is that F3 beats F2 unless F3 has no special meaning, in which
766
			// case F3 can be a modified on an F2 special meaning.
767
			//
768
			// Concretely,
769
			//	66 0F D6 /r is MOVQ
770
			//	F2 0F D6 /r is MOVDQ2Q
771
			//	F3 0F D6 /r is MOVQ2DQ.
772
			//
773
			//	F2 66 0F D6 /r is 66 + MOVDQ2Q always.
774
			//	66 F2 0F D6 /r is 66 + MOVDQ2Q always.
775
			//	F3 66 0F D6 /r is 66 + MOVQ2DQ always.
776
			//	66 F3 0F D6 /r is 66 + MOVQ2DQ always.
777
			//	F2 F3 0F D6 /r is F2 + MOVQ2DQ always.
778
			//	F3 F2 0F D6 /r is F3 + MOVQ2DQ in Intel xed, but F2 + MOVQ2DQ in GNU libopcodes.
779
			//	Adding 66 anywhere in the prefix section of the
780
			//	last two cases does not change the outcome.
781
			//
782
			// Finally, what if there is a variant in which 66 is a mandatory
783
			// prefix rather than an operand size override, but we know of
784
			// no corresponding F2/F3 form, and we see both F2/F3 and 66.
785
			// Does F2/F3 still take priority, so that the result is an unknown
786
			// instruction, or does the 66 take priority, so that the extended
787
			// 66 instruction should be interpreted as having a REP/REPN prefix?
788
			// Intel xed does the former and GNU libopcodes does the latter.
789
			// We side with Intel xed, unless we are trying to match libopcodes
790
			// more closely during the comparison-based test suite.
791
			//
792
			// In 64-bit mode REX.W is another valid prefix to test for, but
793
			// there is less ambiguity about that. When present, REX.W is
794
			// always the first entry in the table.
795
			n := int(decoder[pc])
796
			pc++
797
			sawF3 := false
798
			for j := 0; j < n; j++ {
799
				prefix := Prefix(decoder[pc+2*j])
800
				if prefix.IsREX() {
801
					rexUsed |= prefix
802
					if rex&prefix == prefix {
803
						pc = int(decoder[pc+2*j+1])
804
						continue Decode
805
					}
806
					continue
807
				}
808
				ok := false
809
				if prefix == 0 {
810
					ok = true
811
				} else if prefix.IsREX() {
812
					rexUsed |= prefix
813
					if rex&prefix == prefix {
814
						ok = true
815
					}
816
				} else if prefix == 0xC5 || prefix == 0xC4 {
817
					if vex == prefix {
818
						ok = true
819
					}
820
				} else if vex != 0 && (prefix == 0x0F || prefix == 0x0F38 || prefix == 0x0F3A ||
821
					prefix == 0x66 || prefix == 0xF2 || prefix == 0xF3) {
822
					var vexM, vexP Prefix
823
					if vex == 0xC5 {
824
						vexM = 1 // 2 byte vex always implies 0F
825
						vexP = inst.Prefix[vexIndex+1]
826
					} else {
827
						vexM = inst.Prefix[vexIndex+1]
828
						vexP = inst.Prefix[vexIndex+2]
829
					}
830
					switch prefix {
831
					case 0x66:
832
						ok = vexP&3 == 1
833
					case 0xF3:
834
						ok = vexP&3 == 2
835
					case 0xF2:
836
						ok = vexP&3 == 3
837
					case 0x0F:
838
						ok = vexM&3 == 1
839
					case 0x0F38:
840
						ok = vexM&3 == 2
841
					case 0x0F3A:
842
						ok = vexM&3 == 3
843
					}
844
				} else {
845
					if prefix == 0xF3 {
846
						sawF3 = true
847
					}
848
					switch prefix {
849
					case PrefixLOCK:
850
						if lockIndex >= 0 {
851
							inst.Prefix[lockIndex] |= PrefixImplicit
852
							ok = true
853
						}
854
					case PrefixREP, PrefixREPN:
855
						if repIndex >= 0 && inst.Prefix[repIndex]&0xFF == prefix {
856
							inst.Prefix[repIndex] |= PrefixImplicit
857
							ok = true
858
						}
859
						if gnuCompat && !ok && prefix == 0xF3 && repIndex >= 0 && (j+1 >= n || decoder[pc+2*(j+1)] != 0xF2) {
860
							// Check to see if earlier prefix F3 is present.
861
							for i := repIndex - 1; i >= 0; i-- {
862
								if inst.Prefix[i]&0xFF == prefix {
863
									inst.Prefix[i] |= PrefixImplicit
864
									ok = true
865
								}
866
							}
867
						}
868
						if gnuCompat && !ok && prefix == 0xF2 && repIndex >= 0 && !sawF3 && inst.Prefix[repIndex]&0xFF == 0xF3 {
869
							// Check to see if earlier prefix F2 is present.
870
							for i := repIndex - 1; i >= 0; i-- {
871
								if inst.Prefix[i]&0xFF == prefix {
872
									inst.Prefix[i] |= PrefixImplicit
873
									ok = true
874
								}
875
							}
876
						}
877
					case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
878
						if segIndex >= 0 && inst.Prefix[segIndex]&0xFF == prefix {
879
							inst.Prefix[segIndex] |= PrefixImplicit
880
							ok = true
881
						}
882
					case PrefixDataSize:
883
						// Looking for 66 mandatory prefix.
884
						// The F2/F3 mandatory prefixes take priority when both are present.
885
						// If we got this far in the xCondPrefix table and an F2/F3 is present,
886
						// it means the table didn't have any entry for that prefix. But if 66 has
887
						// special meaning, perhaps F2/F3 have special meaning that we don't know.
888
						// Intel xed works this way, treating the F2/F3 as inhibiting the 66.
889
						// GNU libopcodes allows the 66 to match. We do what Intel xed does
890
						// except in gnuCompat mode.
891
						if repIndex >= 0 && !gnuCompat {
892
							inst.Op = 0
893
							break Decode
894
						}
895
						if dataSizeIndex >= 0 {
896
							inst.Prefix[dataSizeIndex] |= PrefixImplicit
897
							ok = true
898
						}
899
					case PrefixAddrSize:
900
						if addrSizeIndex >= 0 {
901
							inst.Prefix[addrSizeIndex] |= PrefixImplicit
902
							ok = true
903
						}
904
					}
905
				}
906
				if ok {
907
					pc = int(decoder[pc+2*j+1])
908
					continue Decode
909
				}
910
			}
911
			inst.Op = 0
912
			break Decode
913

914
		case xCondSlashR:
915
			pc = int(decoder[pc+regop&7])
916

917
		// Input.
918

919
		case xReadSlashR:
920
			// done above
921

922
		case xReadIb:
923
			if pos >= len(src) {
924
				return truncated(src, mode)
925
			}
926
			imm8 = int8(src[pos])
927
			pos++
928

929
		case xReadIw:
930
			if pos+2 > len(src) {
931
				return truncated(src, mode)
932
			}
933
			imm = int64(binary.LittleEndian.Uint16(src[pos:]))
934
			pos += 2
935

936
		case xReadId:
937
			if pos+4 > len(src) {
938
				return truncated(src, mode)
939
			}
940
			imm = int64(binary.LittleEndian.Uint32(src[pos:]))
941
			pos += 4
942

943
		case xReadIo:
944
			if pos+8 > len(src) {
945
				return truncated(src, mode)
946
			}
947
			imm = int64(binary.LittleEndian.Uint64(src[pos:]))
948
			pos += 8
949

950
		case xReadCb:
951
			if pos >= len(src) {
952
				return truncated(src, mode)
953
			}
954
			immcpos = pos
955
			immc = int64(src[pos])
956
			pos++
957

958
		case xReadCw:
959
			if pos+2 > len(src) {
960
				return truncated(src, mode)
961
			}
962
			immcpos = pos
963
			immc = int64(binary.LittleEndian.Uint16(src[pos:]))
964
			pos += 2
965

966
		case xReadCm:
967
			immcpos = pos
968
			if addrMode == 16 {
969
				if pos+2 > len(src) {
970
					return truncated(src, mode)
971
				}
972
				immc = int64(binary.LittleEndian.Uint16(src[pos:]))
973
				pos += 2
974
			} else if addrMode == 32 {
975
				if pos+4 > len(src) {
976
					return truncated(src, mode)
977
				}
978
				immc = int64(binary.LittleEndian.Uint32(src[pos:]))
979
				pos += 4
980
			} else {
981
				if pos+8 > len(src) {
982
					return truncated(src, mode)
983
				}
984
				immc = int64(binary.LittleEndian.Uint64(src[pos:]))
985
				pos += 8
986
			}
987
		case xReadCd:
988
			immcpos = pos
989
			if pos+4 > len(src) {
990
				return truncated(src, mode)
991
			}
992
			immc = int64(binary.LittleEndian.Uint32(src[pos:]))
993
			pos += 4
994

995
		case xReadCp:
996
			immcpos = pos
997
			if pos+6 > len(src) {
998
				return truncated(src, mode)
999
			}
1000
			w := binary.LittleEndian.Uint32(src[pos:])
1001
			w2 := binary.LittleEndian.Uint16(src[pos+4:])
1002
			immc = int64(w2)<<32 | int64(w)
1003
			pos += 6
1004

1005
		// Output.
1006

1007
		case xSetOp:
1008
			inst.Op = Op(decoder[pc])
1009
			pc++
1010

1011
		case xArg1,
1012
			xArg3,
1013
			xArgAL,
1014
			xArgAX,
1015
			xArgCL,
1016
			xArgCS,
1017
			xArgDS,
1018
			xArgDX,
1019
			xArgEAX,
1020
			xArgEDX,
1021
			xArgES,
1022
			xArgFS,
1023
			xArgGS,
1024
			xArgRAX,
1025
			xArgRDX,
1026
			xArgSS,
1027
			xArgST,
1028
			xArgXMM0:
1029
			inst.Args[narg] = fixedArg[x]
1030
			narg++
1031

1032
		case xArgImm8:
1033
			inst.Args[narg] = Imm(imm8)
1034
			narg++
1035

1036
		case xArgImm8u:
1037
			inst.Args[narg] = Imm(uint8(imm8))
1038
			narg++
1039

1040
		case xArgImm16:
1041
			inst.Args[narg] = Imm(int16(imm))
1042
			narg++
1043

1044
		case xArgImm16u:
1045
			inst.Args[narg] = Imm(uint16(imm))
1046
			narg++
1047

1048
		case xArgImm32:
1049
			inst.Args[narg] = Imm(int32(imm))
1050
			narg++
1051

1052
		case xArgImm64:
1053
			inst.Args[narg] = Imm(imm)
1054
			narg++
1055

1056
		case xArgM,
1057
			xArgM128,
1058
			xArgM256,
1059
			xArgM1428byte,
1060
			xArgM16,
1061
			xArgM16and16,
1062
			xArgM16and32,
1063
			xArgM16and64,
1064
			xArgM16colon16,
1065
			xArgM16colon32,
1066
			xArgM16colon64,
1067
			xArgM16int,
1068
			xArgM2byte,
1069
			xArgM32,
1070
			xArgM32and32,
1071
			xArgM32fp,
1072
			xArgM32int,
1073
			xArgM512byte,
1074
			xArgM64,
1075
			xArgM64fp,
1076
			xArgM64int,
1077
			xArgM8,
1078
			xArgM80bcd,
1079
			xArgM80dec,
1080
			xArgM80fp,
1081
			xArgM94108byte,
1082
			xArgMem:
1083
			if !haveMem {
1084
				inst.Op = 0
1085
				break Decode
1086
			}
1087
			inst.Args[narg] = mem
1088
			inst.MemBytes = int(memBytes[decodeOp(x)])
1089
			if mem.Base == RIP {
1090
				inst.PCRel = displen
1091
				inst.PCRelOff = dispoff
1092
			}
1093
			narg++
1094

1095
		case xArgPtr16colon16:
1096
			inst.Args[narg] = Imm(immc >> 16)
1097
			inst.Args[narg+1] = Imm(immc & (1<<16 - 1))
1098
			narg += 2
1099

1100
		case xArgPtr16colon32:
1101
			inst.Args[narg] = Imm(immc >> 32)
1102
			inst.Args[narg+1] = Imm(immc & (1<<32 - 1))
1103
			narg += 2
1104

1105
		case xArgMoffs8, xArgMoffs16, xArgMoffs32, xArgMoffs64:
1106
			// TODO(rsc): Can address be 64 bits?
1107
			mem = Mem{Disp: int64(immc)}
1108
			if segIndex >= 0 {
1109
				mem.Segment = prefixToSegment(inst.Prefix[segIndex])
1110
				inst.Prefix[segIndex] |= PrefixImplicit
1111
			}
1112
			inst.Args[narg] = mem
1113
			inst.MemBytes = int(memBytes[decodeOp(x)])
1114
			if mem.Base == RIP {
1115
				inst.PCRel = displen
1116
				inst.PCRelOff = dispoff
1117
			}
1118
			narg++
1119

1120
		case xArgYmm1:
1121
			base := baseReg[x]
1122
			index := Reg(regop)
1123
			if inst.Prefix[vexIndex+1]&0x80 == 0 {
1124
				index += 8
1125
			}
1126
			inst.Args[narg] = base + index
1127
			narg++
1128

1129
		case xArgR8, xArgR16, xArgR32, xArgR64, xArgXmm, xArgXmm1, xArgDR0dashDR7:
1130
			base := baseReg[x]
1131
			index := Reg(regop)
1132
			if rex != 0 && base == AL && index >= 4 {
1133
				rexUsed |= PrefixREX
1134
				index -= 4
1135
				base = SPB
1136
			}
1137
			inst.Args[narg] = base + index
1138
			narg++
1139

1140
		case xArgMm, xArgMm1, xArgTR0dashTR7:
1141
			inst.Args[narg] = baseReg[x] + Reg(regop&7)
1142
			narg++
1143

1144
		case xArgCR0dashCR7:
1145
			// AMD documents an extension that the LOCK prefix
1146
			// can be used in place of a REX prefix in order to access
1147
			// CR8 from 32-bit mode. The LOCK prefix is allowed in
1148
			// all modes, provided the corresponding CPUID bit is set.
1149
			if lockIndex >= 0 {
1150
				inst.Prefix[lockIndex] |= PrefixImplicit
1151
				regop += 8
1152
			}
1153
			inst.Args[narg] = CR0 + Reg(regop)
1154
			narg++
1155

1156
		case xArgSreg:
1157
			regop &= 7
1158
			if regop >= 6 {
1159
				inst.Op = 0
1160
				break Decode
1161
			}
1162
			inst.Args[narg] = ES + Reg(regop)
1163
			narg++
1164

1165
		case xArgRmf16, xArgRmf32, xArgRmf64:
1166
			base := baseReg[x]
1167
			index := Reg(modrm & 07)
1168
			if rex&PrefixREXB != 0 {
1169
				rexUsed |= PrefixREXB
1170
				index += 8
1171
			}
1172
			inst.Args[narg] = base + index
1173
			narg++
1174

1175
		case xArgR8op, xArgR16op, xArgR32op, xArgR64op, xArgSTi:
1176
			n := inst.Opcode >> uint(opshift+8) & 07
1177
			base := baseReg[x]
1178
			index := Reg(n)
1179
			if rex&PrefixREXB != 0 && decodeOp(x) != xArgSTi {
1180
				rexUsed |= PrefixREXB
1181
				index += 8
1182
			}
1183
			if rex != 0 && base == AL && index >= 4 {
1184
				rexUsed |= PrefixREX
1185
				index -= 4
1186
				base = SPB
1187
			}
1188
			inst.Args[narg] = base + index
1189
			narg++
1190
		case xArgRM8, xArgRM16, xArgRM32, xArgRM64, xArgR32M16, xArgR32M8, xArgR64M16,
1191
			xArgMmM32, xArgMmM64, xArgMm2M64,
1192
			xArgXmm2M16, xArgXmm2M32, xArgXmm2M64, xArgXmmM64, xArgXmmM128, xArgXmmM32, xArgXmm2M128,
1193
			xArgYmm2M256:
1194
			if haveMem {
1195
				inst.Args[narg] = mem
1196
				inst.MemBytes = int(memBytes[decodeOp(x)])
1197
				if mem.Base == RIP {
1198
					inst.PCRel = displen
1199
					inst.PCRelOff = dispoff
1200
				}
1201
			} else {
1202
				base := baseReg[x]
1203
				index := Reg(rm)
1204
				switch decodeOp(x) {
1205
				case xArgMmM32, xArgMmM64, xArgMm2M64:
1206
					// There are only 8 MMX registers, so these ignore the REX.X bit.
1207
					index &= 7
1208
				case xArgRM8:
1209
					if rex != 0 && index >= 4 {
1210
						rexUsed |= PrefixREX
1211
						index -= 4
1212
						base = SPB
1213
					}
1214
				case xArgYmm2M256:
1215
					if vex == 0xC4 && inst.Prefix[vexIndex+1]&0x40 == 0x40 {
1216
						index += 8
1217
					}
1218
				}
1219
				inst.Args[narg] = base + index
1220
			}
1221
			narg++
1222

1223
		case xArgMm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
1224
			if haveMem {
1225
				inst.Op = 0
1226
				break Decode
1227
			}
1228
			inst.Args[narg] = baseReg[x] + Reg(rm&7)
1229
			narg++
1230

1231
		case xArgXmm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
1232
			if haveMem {
1233
				inst.Op = 0
1234
				break Decode
1235
			}
1236
			inst.Args[narg] = baseReg[x] + Reg(rm)
1237
			narg++
1238

1239
		case xArgRel8:
1240
			inst.PCRelOff = immcpos
1241
			inst.PCRel = 1
1242
			inst.Args[narg] = Rel(int8(immc))
1243
			narg++
1244

1245
		case xArgRel16:
1246
			inst.PCRelOff = immcpos
1247
			inst.PCRel = 2
1248
			inst.Args[narg] = Rel(int16(immc))
1249
			narg++
1250

1251
		case xArgRel32:
1252
			inst.PCRelOff = immcpos
1253
			inst.PCRel = 4
1254
			inst.Args[narg] = Rel(int32(immc))
1255
			narg++
1256
		}
1257
	}
1258

1259
	if inst.Op == 0 {
1260
		// Invalid instruction.
1261
		if nprefix > 0 {
1262
			return instPrefix(src[0], mode) // invalid instruction
1263
		}
1264
		return Inst{Len: pos}, ErrUnrecognized
1265
	}
1266

1267
	// Matched! Hooray!
1268

1269
	// 90 decodes as XCHG EAX, EAX but is NOP.
1270
	// 66 90 decodes as XCHG AX, AX and is NOP too.
1271
	// 48 90 decodes as XCHG RAX, RAX and is NOP too.
1272
	// 43 90 decodes as XCHG R8D, EAX and is *not* NOP.
1273
	// F3 90 decodes as REP XCHG EAX, EAX but is PAUSE.
1274
	// It's all too special to handle in the decoding tables, at least for now.
1275
	if inst.Op == XCHG && inst.Opcode>>24 == 0x90 {
1276
		if inst.Args[0] == RAX || inst.Args[0] == EAX || inst.Args[0] == AX {
1277
			inst.Op = NOP
1278
			if dataSizeIndex >= 0 {
1279
				inst.Prefix[dataSizeIndex] &^= PrefixImplicit
1280
			}
1281
			inst.Args[0] = nil
1282
			inst.Args[1] = nil
1283
		}
1284
		if repIndex >= 0 && inst.Prefix[repIndex] == 0xF3 {
1285
			inst.Prefix[repIndex] |= PrefixImplicit
1286
			inst.Op = PAUSE
1287
			inst.Args[0] = nil
1288
			inst.Args[1] = nil
1289
		} else if gnuCompat {
1290
			for i := nprefix - 1; i >= 0; i-- {
1291
				if inst.Prefix[i]&0xFF == 0xF3 {
1292
					inst.Prefix[i] |= PrefixImplicit
1293
					inst.Op = PAUSE
1294
					inst.Args[0] = nil
1295
					inst.Args[1] = nil
1296
					break
1297
				}
1298
			}
1299
		}
1300
	}
1301

1302
	// defaultSeg returns the default segment for an implicit
1303
	// memory reference: the final override if present, or else DS.
1304
	defaultSeg := func() Reg {
1305
		if segIndex >= 0 {
1306
			inst.Prefix[segIndex] |= PrefixImplicit
1307
			return prefixToSegment(inst.Prefix[segIndex])
1308
		}
1309
		return DS
1310
	}
1311

1312
	// Add implicit arguments not present in the tables.
1313
	// Normally we shy away from making implicit arguments explicit,
1314
	// following the Intel manuals, but adding the arguments seems
1315
	// the best way to express the effect of the segment override prefixes.
1316
	// TODO(rsc): Perhaps add these to the tables and
1317
	// create bytecode instructions for them.
1318
	usedAddrSize := false
1319
	switch inst.Op {
1320
	case INSB, INSW, INSD:
1321
		inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
1322
		inst.Args[1] = DX
1323
		usedAddrSize = true
1324

1325
	case OUTSB, OUTSW, OUTSD:
1326
		inst.Args[0] = DX
1327
		inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
1328
		usedAddrSize = true
1329

1330
	case MOVSB, MOVSW, MOVSD, MOVSQ:
1331
		inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
1332
		inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
1333
		usedAddrSize = true
1334

1335
	case CMPSB, CMPSW, CMPSD, CMPSQ:
1336
		inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
1337
		inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
1338
		usedAddrSize = true
1339

1340
	case LODSB, LODSW, LODSD, LODSQ:
1341
		switch inst.Op {
1342
		case LODSB:
1343
			inst.Args[0] = AL
1344
		case LODSW:
1345
			inst.Args[0] = AX
1346
		case LODSD:
1347
			inst.Args[0] = EAX
1348
		case LODSQ:
1349
			inst.Args[0] = RAX
1350
		}
1351
		inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
1352
		usedAddrSize = true
1353

1354
	case STOSB, STOSW, STOSD, STOSQ:
1355
		inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
1356
		switch inst.Op {
1357
		case STOSB:
1358
			inst.Args[1] = AL
1359
		case STOSW:
1360
			inst.Args[1] = AX
1361
		case STOSD:
1362
			inst.Args[1] = EAX
1363
		case STOSQ:
1364
			inst.Args[1] = RAX
1365
		}
1366
		usedAddrSize = true
1367

1368
	case SCASB, SCASW, SCASD, SCASQ:
1369
		inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
1370
		switch inst.Op {
1371
		case SCASB:
1372
			inst.Args[0] = AL
1373
		case SCASW:
1374
			inst.Args[0] = AX
1375
		case SCASD:
1376
			inst.Args[0] = EAX
1377
		case SCASQ:
1378
			inst.Args[0] = RAX
1379
		}
1380
		usedAddrSize = true
1381

1382
	case XLATB:
1383
		inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + BX - AX}
1384
		usedAddrSize = true
1385
	}
1386

1387
	// If we used the address size annotation to construct the
1388
	// argument list, mark that prefix as implicit: it doesn't need
1389
	// to be shown when printing the instruction.
1390
	if haveMem || usedAddrSize {
1391
		if addrSizeIndex >= 0 {
1392
			inst.Prefix[addrSizeIndex] |= PrefixImplicit
1393
		}
1394
	}
1395

1396
	// Similarly, if there's some memory operand, the segment
1397
	// will be shown there and doesn't need to be shown as an
1398
	// explicit prefix.
1399
	if haveMem {
1400
		if segIndex >= 0 {
1401
			inst.Prefix[segIndex] |= PrefixImplicit
1402
		}
1403
	}
1404

1405
	// Branch predict prefixes are overloaded segment prefixes,
1406
	// since segment prefixes don't make sense on conditional jumps.
1407
	// Rewrite final instance to prediction prefix.
1408
	// The set of instructions to which the prefixes apply (other then the
1409
	// Jcc conditional jumps) is not 100% clear from the manuals, but
1410
	// the disassemblers seem to agree about the LOOP and JCXZ instructions,
1411
	// so we'll follow along.
1412
	// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
1413
	if isCondJmp[inst.Op] || isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ {
1414
	PredictLoop:
1415
		for i := nprefix - 1; i >= 0; i-- {
1416
			p := inst.Prefix[i]
1417
			switch p & 0xFF {
1418
			case PrefixCS:
1419
				inst.Prefix[i] = PrefixPN
1420
				break PredictLoop
1421
			case PrefixDS:
1422
				inst.Prefix[i] = PrefixPT
1423
				break PredictLoop
1424
			}
1425
		}
1426
	}
1427

1428
	// The BND prefix is part of the Intel Memory Protection Extensions (MPX).
1429
	// A REPN applied to certain control transfers is a BND prefix to bound
1430
	// the range of possible destinations. There's surprisingly little documentation
1431
	// about this, so we just do what libopcodes and xed agree on.
1432
	// In particular, it's unclear why a REPN applied to LOOP or JCXZ instructions
1433
	// does not turn into a BND.
1434
	// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
1435
	if isCondJmp[inst.Op] || inst.Op == JMP || inst.Op == CALL || inst.Op == RET {
1436
		for i := nprefix - 1; i >= 0; i-- {
1437
			p := inst.Prefix[i]
1438
			if p&^PrefixIgnored == PrefixREPN {
1439
				inst.Prefix[i] = PrefixBND
1440
				break
1441
			}
1442
		}
1443
	}
1444

1445
	// The LOCK prefix only applies to certain instructions, and then only
1446
	// to instances of the instruction with a memory destination.
1447
	// Other uses of LOCK are invalid and cause a processor exception,
1448
	// in contrast to the "just ignore it" spirit applied to all other prefixes.
1449
	// Mark invalid lock prefixes.
1450
	hasLock := false
1451
	if lockIndex >= 0 && inst.Prefix[lockIndex]&PrefixImplicit == 0 {
1452
		switch inst.Op {
1453
		// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
1454
		case ADD, ADC, AND, BTC, BTR, BTS, CMPXCHG, CMPXCHG8B, CMPXCHG16B, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD, XCHG:
1455
			if isMem(inst.Args[0]) {
1456
				hasLock = true
1457
				break
1458
			}
1459
			fallthrough
1460
		default:
1461
			inst.Prefix[lockIndex] |= PrefixInvalid
1462
		}
1463
	}
1464

1465
	// In certain cases, all of which require a memory destination,
1466
	// the REPN and REP prefixes are interpreted as XACQUIRE and XRELEASE
1467
	// from the Intel Transactional Synchroniation Extensions (TSX).
1468
	//
1469
	// The specific rules are:
1470
	// (1) Any instruction with a valid LOCK prefix can have XACQUIRE or XRELEASE.
1471
	// (2) Any XCHG, which always has an implicit LOCK, can have XACQUIRE or XRELEASE.
1472
	// (3) Any 0x88-, 0x89-, 0xC6-, or 0xC7-opcode MOV can have XRELEASE.
1473
	if isMem(inst.Args[0]) {
1474
		if inst.Op == XCHG {
1475
			hasLock = true
1476
		}
1477

1478
		for i := len(inst.Prefix) - 1; i >= 0; i-- {
1479
			p := inst.Prefix[i] &^ PrefixIgnored
1480
			switch p {
1481
			case PrefixREPN:
1482
				if hasLock {
1483
					inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXACQUIRE
1484
				}
1485

1486
			case PrefixREP:
1487
				if hasLock {
1488
					inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
1489
				}
1490

1491
				if inst.Op == MOV {
1492
					op := (inst.Opcode >> 24) &^ 1
1493
					if op == 0x88 || op == 0xC6 {
1494
						inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
1495
					}
1496
				}
1497
			}
1498
		}
1499
	}
1500

1501
	// If REP is used on a non-REP-able instruction, mark the prefix as ignored.
1502
	if repIndex >= 0 {
1503
		switch inst.Prefix[repIndex] {
1504
		case PrefixREP, PrefixREPN:
1505
			switch inst.Op {
1506
			// According to the manuals, the REP/REPE prefix applies to all of these,
1507
			// while the REPN applies only to some of them. However, both libopcodes
1508
			// and xed show both prefixes explicitly for all instructions, so we do the same.
1509
			// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
1510
			case INSB, INSW, INSD,
1511
				MOVSB, MOVSW, MOVSD, MOVSQ,
1512
				OUTSB, OUTSW, OUTSD,
1513
				LODSB, LODSW, LODSD, LODSQ,
1514
				CMPSB, CMPSW, CMPSD, CMPSQ,
1515
				SCASB, SCASW, SCASD, SCASQ,
1516
				STOSB, STOSW, STOSD, STOSQ:
1517
				// ok
1518
			default:
1519
				inst.Prefix[repIndex] |= PrefixIgnored
1520
			}
1521
		}
1522
	}
1523

1524
	// If REX was present, mark implicit if all the 1 bits were consumed.
1525
	if rexIndex >= 0 {
1526
		if rexUsed != 0 {
1527
			rexUsed |= PrefixREX
1528
		}
1529
		if rex&^rexUsed == 0 {
1530
			inst.Prefix[rexIndex] |= PrefixImplicit
1531
		}
1532
	}
1533

1534
	inst.DataSize = dataMode
1535
	inst.AddrSize = addrMode
1536
	inst.Mode = mode
1537
	inst.Len = pos
1538
	return inst, nil
1539
}
1540

1541
var errInternal = errors.New("internal error")
1542

1543
// addr16 records the eight 16-bit addressing modes.
1544
var addr16 = [8]Mem{
1545
	{Base: BX, Scale: 1, Index: SI},
1546
	{Base: BX, Scale: 1, Index: DI},
1547
	{Base: BP, Scale: 1, Index: SI},
1548
	{Base: BP, Scale: 1, Index: DI},
1549
	{Base: SI},
1550
	{Base: DI},
1551
	{Base: BP},
1552
	{Base: BX},
1553
}
1554

1555
// baseReg returns the base register for a given register size in bits.
1556
func baseRegForBits(bits int) Reg {
1557
	switch bits {
1558
	case 8:
1559
		return AL
1560
	case 16:
1561
		return AX
1562
	case 32:
1563
		return EAX
1564
	case 64:
1565
		return RAX
1566
	}
1567
	return 0
1568
}
1569

1570
// baseReg records the base register for argument types that specify
1571
// a range of registers indexed by op, regop, or rm.
1572
var baseReg = [...]Reg{
1573
	xArgDR0dashDR7: DR0,
1574
	xArgMm1:        M0,
1575
	xArgMm2:        M0,
1576
	xArgMm2M64:     M0,
1577
	xArgMm:         M0,
1578
	xArgMmM32:      M0,
1579
	xArgMmM64:      M0,
1580
	xArgR16:        AX,
1581
	xArgR16op:      AX,
1582
	xArgR32:        EAX,
1583
	xArgR32M16:     EAX,
1584
	xArgR32M8:      EAX,
1585
	xArgR32op:      EAX,
1586
	xArgR64:        RAX,
1587
	xArgR64M16:     RAX,
1588
	xArgR64op:      RAX,
1589
	xArgR8:         AL,
1590
	xArgR8op:       AL,
1591
	xArgRM16:       AX,
1592
	xArgRM32:       EAX,
1593
	xArgRM64:       RAX,
1594
	xArgRM8:        AL,
1595
	xArgRmf16:      AX,
1596
	xArgRmf32:      EAX,
1597
	xArgRmf64:      RAX,
1598
	xArgSTi:        F0,
1599
	xArgTR0dashTR7: TR0,
1600
	xArgXmm1:       X0,
1601
	xArgYmm1:       X0,
1602
	xArgXmm2:       X0,
1603
	xArgXmm2M128:   X0,
1604
	xArgYmm2M256:   X0,
1605
	xArgXmm2M16:    X0,
1606
	xArgXmm2M32:    X0,
1607
	xArgXmm2M64:    X0,
1608
	xArgXmm:        X0,
1609
	xArgXmmM128:    X0,
1610
	xArgXmmM32:     X0,
1611
	xArgXmmM64:     X0,
1612
}
1613

1614
// prefixToSegment returns the segment register
1615
// corresponding to a particular segment prefix.
1616
func prefixToSegment(p Prefix) Reg {
1617
	switch p &^ PrefixImplicit {
1618
	case PrefixCS:
1619
		return CS
1620
	case PrefixDS:
1621
		return DS
1622
	case PrefixES:
1623
		return ES
1624
	case PrefixFS:
1625
		return FS
1626
	case PrefixGS:
1627
		return GS
1628
	case PrefixSS:
1629
		return SS
1630
	}
1631
	return 0
1632
}
1633

1634
// fixedArg records the fixed arguments corresponding to the given bytecodes.
1635
var fixedArg = [...]Arg{
1636
	xArg1:    Imm(1),
1637
	xArg3:    Imm(3),
1638
	xArgAL:   AL,
1639
	xArgAX:   AX,
1640
	xArgDX:   DX,
1641
	xArgEAX:  EAX,
1642
	xArgEDX:  EDX,
1643
	xArgRAX:  RAX,
1644
	xArgRDX:  RDX,
1645
	xArgCL:   CL,
1646
	xArgCS:   CS,
1647
	xArgDS:   DS,
1648
	xArgES:   ES,
1649
	xArgFS:   FS,
1650
	xArgGS:   GS,
1651
	xArgSS:   SS,
1652
	xArgST:   F0,
1653
	xArgXMM0: X0,
1654
}
1655

1656
// memBytes records the size of the memory pointed at
1657
// by a memory argument of the given form.
1658
var memBytes = [...]int8{
1659
	xArgM128:       128 / 8,
1660
	xArgM256:       256 / 8,
1661
	xArgM16:        16 / 8,
1662
	xArgM16and16:   (16 + 16) / 8,
1663
	xArgM16colon16: (16 + 16) / 8,
1664
	xArgM16colon32: (16 + 32) / 8,
1665
	xArgM16int:     16 / 8,
1666
	xArgM2byte:     2,
1667
	xArgM32:        32 / 8,
1668
	xArgM32and32:   (32 + 32) / 8,
1669
	xArgM32fp:      32 / 8,
1670
	xArgM32int:     32 / 8,
1671
	xArgM64:        64 / 8,
1672
	xArgM64fp:      64 / 8,
1673
	xArgM64int:     64 / 8,
1674
	xArgMm2M64:     64 / 8,
1675
	xArgMmM32:      32 / 8,
1676
	xArgMmM64:      64 / 8,
1677
	xArgMoffs16:    16 / 8,
1678
	xArgMoffs32:    32 / 8,
1679
	xArgMoffs64:    64 / 8,
1680
	xArgMoffs8:     8 / 8,
1681
	xArgR32M16:     16 / 8,
1682
	xArgR32M8:      8 / 8,
1683
	xArgR64M16:     16 / 8,
1684
	xArgRM16:       16 / 8,
1685
	xArgRM32:       32 / 8,
1686
	xArgRM64:       64 / 8,
1687
	xArgRM8:        8 / 8,
1688
	xArgXmm2M128:   128 / 8,
1689
	xArgYmm2M256:   256 / 8,
1690
	xArgXmm2M16:    16 / 8,
1691
	xArgXmm2M32:    32 / 8,
1692
	xArgXmm2M64:    64 / 8,
1693
	xArgXmm:        128 / 8,
1694
	xArgXmmM128:    128 / 8,
1695
	xArgXmmM32:     32 / 8,
1696
	xArgXmmM64:     64 / 8,
1697
}
1698

1699
// isCondJmp records the conditional jumps.
1700
var isCondJmp = [maxOp + 1]bool{
1701
	JA:  true,
1702
	JAE: true,
1703
	JB:  true,
1704
	JBE: true,
1705
	JE:  true,
1706
	JG:  true,
1707
	JGE: true,
1708
	JL:  true,
1709
	JLE: true,
1710
	JNE: true,
1711
	JNO: true,
1712
	JNP: true,
1713
	JNS: true,
1714
	JO:  true,
1715
	JP:  true,
1716
	JS:  true,
1717
}
1718

1719
// isLoop records the loop operators.
1720
var isLoop = [maxOp + 1]bool{
1721
	LOOP:   true,
1722
	LOOPE:  true,
1723
	LOOPNE: true,
1724
	JECXZ:  true,
1725
	JRCXZ:  true,
1726
}
1727
cubefs

Использование cookies