podman

Форк
0
1724 строки · 45.1 Кб
1
// Copyright 2014 The Go Authors.  All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4

5
// Table-driven decoding of x86 instructions.
6

7
package x86asm
8

9
import (
10
	"encoding/binary"
11
	"errors"
12
	"fmt"
13
	"runtime"
14
)
15

16
// Set trace to true to cause the decoder to print the PC sequence
17
// of the executed instruction codes. This is typically only useful
18
// when you are running a test of a single input case.
19
const trace = false
20

21
// A decodeOp is a single instruction in the decoder bytecode program.
22
//
23
// The decodeOps correspond to consuming and conditionally branching
24
// on input bytes, consuming additional fields, and then interpreting
25
// consumed data as instruction arguments. The names of the xRead and xArg
26
// operations are taken from the Intel manual conventions, for example
27
// Volume 2, Section 3.1.1, page 487 of
28
// http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf
29
//
30
// The actual decoding program is generated by ../x86map.
31
//
32
// TODO(rsc): We may be able to merge various of the memory operands
33
// since we don't care about, say, the distinction between m80dec and m80bcd.
34
// Similarly, mm and mm1 have identical meaning, as do xmm and xmm1.
35

36
type decodeOp uint16
37

38
const (
39
	xFail  decodeOp = iota // invalid instruction (return)
40
	xMatch                 // completed match
41
	xJump                  // jump to pc
42

43
	xCondByte     // switch on instruction byte value
44
	xCondSlashR   // read and switch on instruction /r value
45
	xCondPrefix   // switch on presence of instruction prefix
46
	xCondIs64     // switch on 64-bit processor mode
47
	xCondDataSize // switch on operand size
48
	xCondAddrSize // switch on address size
49
	xCondIsMem    // switch on memory vs register argument
50

51
	xSetOp // set instruction opcode
52

53
	xReadSlashR // read /r
54
	xReadIb     // read ib
55
	xReadIw     // read iw
56
	xReadId     // read id
57
	xReadIo     // read io
58
	xReadCb     // read cb
59
	xReadCw     // read cw
60
	xReadCd     // read cd
61
	xReadCp     // read cp
62
	xReadCm     // read cm
63

64
	xArg1            // arg 1
65
	xArg3            // arg 3
66
	xArgAL           // arg AL
67
	xArgAX           // arg AX
68
	xArgCL           // arg CL
69
	xArgCR0dashCR7   // arg CR0-CR7
70
	xArgCS           // arg CS
71
	xArgDR0dashDR7   // arg DR0-DR7
72
	xArgDS           // arg DS
73
	xArgDX           // arg DX
74
	xArgEAX          // arg EAX
75
	xArgEDX          // arg EDX
76
	xArgES           // arg ES
77
	xArgFS           // arg FS
78
	xArgGS           // arg GS
79
	xArgImm16        // arg imm16
80
	xArgImm32        // arg imm32
81
	xArgImm64        // arg imm64
82
	xArgImm8         // arg imm8
83
	xArgImm8u        // arg imm8 but record as unsigned
84
	xArgImm16u       // arg imm8 but record as unsigned
85
	xArgM            // arg m
86
	xArgM128         // arg m128
87
	xArgM256         // arg m256
88
	xArgM1428byte    // arg m14/28byte
89
	xArgM16          // arg m16
90
	xArgM16and16     // arg m16&16
91
	xArgM16and32     // arg m16&32
92
	xArgM16and64     // arg m16&64
93
	xArgM16colon16   // arg m16:16
94
	xArgM16colon32   // arg m16:32
95
	xArgM16colon64   // arg m16:64
96
	xArgM16int       // arg m16int
97
	xArgM2byte       // arg m2byte
98
	xArgM32          // arg m32
99
	xArgM32and32     // arg m32&32
100
	xArgM32fp        // arg m32fp
101
	xArgM32int       // arg m32int
102
	xArgM512byte     // arg m512byte
103
	xArgM64          // arg m64
104
	xArgM64fp        // arg m64fp
105
	xArgM64int       // arg m64int
106
	xArgM8           // arg m8
107
	xArgM80bcd       // arg m80bcd
108
	xArgM80dec       // arg m80dec
109
	xArgM80fp        // arg m80fp
110
	xArgM94108byte   // arg m94/108byte
111
	xArgMm           // arg mm
112
	xArgMm1          // arg mm1
113
	xArgMm2          // arg mm2
114
	xArgMm2M64       // arg mm2/m64
115
	xArgMmM32        // arg mm/m32
116
	xArgMmM64        // arg mm/m64
117
	xArgMem          // arg mem
118
	xArgMoffs16      // arg moffs16
119
	xArgMoffs32      // arg moffs32
120
	xArgMoffs64      // arg moffs64
121
	xArgMoffs8       // arg moffs8
122
	xArgPtr16colon16 // arg ptr16:16
123
	xArgPtr16colon32 // arg ptr16:32
124
	xArgR16          // arg r16
125
	xArgR16op        // arg r16 with +rw in opcode
126
	xArgR32          // arg r32
127
	xArgR32M16       // arg r32/m16
128
	xArgR32M8        // arg r32/m8
129
	xArgR32op        // arg r32 with +rd in opcode
130
	xArgR64          // arg r64
131
	xArgR64M16       // arg r64/m16
132
	xArgR64op        // arg r64 with +rd in opcode
133
	xArgR8           // arg r8
134
	xArgR8op         // arg r8 with +rb in opcode
135
	xArgRAX          // arg RAX
136
	xArgRDX          // arg RDX
137
	xArgRM           // arg r/m
138
	xArgRM16         // arg r/m16
139
	xArgRM32         // arg r/m32
140
	xArgRM64         // arg r/m64
141
	xArgRM8          // arg r/m8
142
	xArgReg          // arg reg
143
	xArgRegM16       // arg reg/m16
144
	xArgRegM32       // arg reg/m32
145
	xArgRegM8        // arg reg/m8
146
	xArgRel16        // arg rel16
147
	xArgRel32        // arg rel32
148
	xArgRel8         // arg rel8
149
	xArgSS           // arg SS
150
	xArgST           // arg ST, aka ST(0)
151
	xArgSTi          // arg ST(i) with +i in opcode
152
	xArgSreg         // arg Sreg
153
	xArgTR0dashTR7   // arg TR0-TR7
154
	xArgXmm          // arg xmm
155
	xArgXMM0         // arg <XMM0>
156
	xArgXmm1         // arg xmm1
157
	xArgXmm2         // arg xmm2
158
	xArgXmm2M128     // arg xmm2/m128
159
	xArgYmm2M256     // arg ymm2/m256
160
	xArgXmm2M16      // arg xmm2/m16
161
	xArgXmm2M32      // arg xmm2/m32
162
	xArgXmm2M64      // arg xmm2/m64
163
	xArgXmmM128      // arg xmm/m128
164
	xArgXmmM32       // arg xmm/m32
165
	xArgXmmM64       // arg xmm/m64
166
	xArgYmm1         // arg ymm1
167
	xArgRmf16        // arg r/m16 but force mod=3
168
	xArgRmf32        // arg r/m32 but force mod=3
169
	xArgRmf64        // arg r/m64 but force mod=3
170
)
171

172
// instPrefix returns an Inst describing just one prefix byte.
173
// It is only used if there is a prefix followed by an unintelligible
174
// or invalid instruction byte sequence.
175
func instPrefix(b byte, mode int) (Inst, error) {
176
	// When tracing it is useful to see what called instPrefix to report an error.
177
	if trace {
178
		_, file, line, _ := runtime.Caller(1)
179
		fmt.Printf("%s:%d\n", file, line)
180
	}
181
	p := Prefix(b)
182
	switch p {
183
	case PrefixDataSize:
184
		if mode == 16 {
185
			p = PrefixData32
186
		} else {
187
			p = PrefixData16
188
		}
189
	case PrefixAddrSize:
190
		if mode == 32 {
191
			p = PrefixAddr16
192
		} else {
193
			p = PrefixAddr32
194
		}
195
	}
196
	// Note: using composite literal with Prefix key confuses 'bundle' tool.
197
	inst := Inst{Len: 1}
198
	inst.Prefix = Prefixes{p}
199
	return inst, nil
200
}
201

202
// truncated reports a truncated instruction.
203
// For now we use instPrefix but perhaps later we will return
204
// a specific error here.
205
func truncated(src []byte, mode int) (Inst, error) {
206
	if len(src) == 0 {
207
		return Inst{}, ErrTruncated
208
	}
209
	return instPrefix(src[0], mode) // too long
210
}
211

212
// These are the errors returned by Decode.
213
var (
214
	ErrInvalidMode  = errors.New("invalid x86 mode in Decode")
215
	ErrTruncated    = errors.New("truncated instruction")
216
	ErrUnrecognized = errors.New("unrecognized instruction")
217
)
218

219
// decoderCover records coverage information for which parts
220
// of the byte code have been executed.
221
var decoderCover []bool
222

223
// Decode decodes the leading bytes in src as a single instruction.
224
// The mode arguments specifies the assumed processor mode:
225
// 16, 32, or 64 for 16-, 32-, and 64-bit execution modes.
226
func Decode(src []byte, mode int) (inst Inst, err error) {
227
	return decode1(src, mode, false)
228
}
229

230
// decode1 is the implementation of Decode but takes an extra
231
// gnuCompat flag to cause it to change its behavior to mimic
232
// bugs (or at least unique features) of GNU libopcodes as used
233
// by objdump. We don't believe that logic is the right thing to do
234
// in general, but when testing against libopcodes it simplifies the
235
// comparison if we adjust a few small pieces of logic.
236
// The affected logic is in the conditional branch for "mandatory" prefixes,
237
// case xCondPrefix.
238
func decode1(src []byte, mode int, gnuCompat bool) (Inst, error) {
239
	switch mode {
240
	case 16, 32, 64:
241
		// ok
242
		// TODO(rsc): 64-bit mode not tested, probably not working.
243
	default:
244
		return Inst{}, ErrInvalidMode
245
	}
246

247
	// Maximum instruction size is 15 bytes.
248
	// If we need to read more, return 'truncated instruction.
249
	if len(src) > 15 {
250
		src = src[:15]
251
	}
252

253
	var (
254
		// prefix decoding information
255
		pos           = 0    // position reading src
256
		nprefix       = 0    // number of prefixes
257
		lockIndex     = -1   // index of LOCK prefix in src and inst.Prefix
258
		repIndex      = -1   // index of REP/REPN prefix in src and inst.Prefix
259
		segIndex      = -1   // index of Group 2 prefix in src and inst.Prefix
260
		dataSizeIndex = -1   // index of Group 3 prefix in src and inst.Prefix
261
		addrSizeIndex = -1   // index of Group 4 prefix in src and inst.Prefix
262
		rex           Prefix // rex byte if present (or 0)
263
		rexUsed       Prefix // bits used in rex byte
264
		rexIndex      = -1   // index of rex byte
265
		vex           Prefix // use vex encoding
266
		vexIndex      = -1   // index of vex prefix
267

268
		addrMode = mode // address mode (width in bits)
269
		dataMode = mode // operand mode (width in bits)
270

271
		// decoded ModR/M fields
272
		haveModrm bool
273
		modrm     int
274
		mod       int
275
		regop     int
276
		rm        int
277

278
		// if ModR/M is memory reference, Mem form
279
		mem     Mem
280
		haveMem bool
281

282
		// decoded SIB fields
283
		haveSIB bool
284
		sib     int
285
		scale   int
286
		index   int
287
		base    int
288
		displen int
289
		dispoff int
290

291
		// decoded immediate values
292
		imm     int64
293
		imm8    int8
294
		immc    int64
295
		immcpos int
296

297
		// output
298
		opshift int
299
		inst    Inst
300
		narg    int // number of arguments written to inst
301
	)
302

303
	if mode == 64 {
304
		dataMode = 32
305
	}
306

307
	// Prefixes are certainly the most complex and underspecified part of
308
	// decoding x86 instructions. Although the manuals say things like
309
	// up to four prefixes, one from each group, nearly everyone seems to
310
	// agree that in practice as many prefixes as possible, including multiple
311
	// from a particular group or repetitions of a given prefix, can be used on
312
	// an instruction, provided the total instruction length including prefixes
313
	// does not exceed the agreed-upon maximum of 15 bytes.
314
	// Everyone also agrees that if one of these prefixes is the LOCK prefix
315
	// and the instruction is not one of the instructions that can be used with
316
	// the LOCK prefix or if the destination is not a memory operand,
317
	// then the instruction is invalid and produces the #UD exception.
318
	// However, that is the end of any semblance of agreement.
319
	//
320
	// What happens if prefixes are given that conflict with other prefixes?
321
	// For example, the memory segment overrides CS, DS, ES, FS, GS, SS
322
	// conflict with each other: only one segment can be in effect.
323
	// Disassemblers seem to agree that later prefixes take priority over
324
	// earlier ones. I have not taken the time to write assembly programs
325
	// to check to see if the hardware agrees.
326
	//
327
	// What happens if prefixes are given that have no meaning for the
328
	// specific instruction to which they are attached? It depends.
329
	// If they really have no meaning, they are ignored. However, a future
330
	// processor may assign a different meaning. As a disassembler, we
331
	// don't really know whether we're seeing a meaningless prefix or one
332
	// whose meaning we simply haven't been told yet.
333
	//
334
	// Combining the two questions, what happens when conflicting
335
	// extension prefixes are given? No one seems to know for sure.
336
	// For example, MOVQ is 66 0F D6 /r, MOVDQ2Q is F2 0F D6 /r,
337
	// and MOVQ2DQ is F3 0F D6 /r. What is '66 F2 F3 0F D6 /r'?
338
	// Which prefix wins? See the xCondPrefix prefix for more.
339
	//
340
	// Writing assembly test cases to divine which interpretation the
341
	// CPU uses might clarify the situation, but more likely it would
342
	// make the situation even less clear.
343

344
	// Read non-REX prefixes.
345
ReadPrefixes:
346
	for ; pos < len(src); pos++ {
347
		p := Prefix(src[pos])
348
		switch p {
349
		default:
350
			nprefix = pos
351
			break ReadPrefixes
352

353
		// Group 1 - lock and repeat prefixes
354
		// According to Intel, there should only be one from this set,
355
		// but according to AMD both can be present.
356
		case 0xF0:
357
			if lockIndex >= 0 {
358
				inst.Prefix[lockIndex] |= PrefixIgnored
359
			}
360
			lockIndex = pos
361
		case 0xF2, 0xF3:
362
			if repIndex >= 0 {
363
				inst.Prefix[repIndex] |= PrefixIgnored
364
			}
365
			repIndex = pos
366

367
		// Group 2 - segment override / branch hints
368
		case 0x26, 0x2E, 0x36, 0x3E:
369
			if mode == 64 {
370
				p |= PrefixIgnored
371
				break
372
			}
373
			fallthrough
374
		case 0x64, 0x65:
375
			if segIndex >= 0 {
376
				inst.Prefix[segIndex] |= PrefixIgnored
377
			}
378
			segIndex = pos
379

380
		// Group 3 - operand size override
381
		case 0x66:
382
			if mode == 16 {
383
				dataMode = 32
384
				p = PrefixData32
385
			} else {
386
				dataMode = 16
387
				p = PrefixData16
388
			}
389
			if dataSizeIndex >= 0 {
390
				inst.Prefix[dataSizeIndex] |= PrefixIgnored
391
			}
392
			dataSizeIndex = pos
393

394
		// Group 4 - address size override
395
		case 0x67:
396
			if mode == 32 {
397
				addrMode = 16
398
				p = PrefixAddr16
399
			} else {
400
				addrMode = 32
401
				p = PrefixAddr32
402
			}
403
			if addrSizeIndex >= 0 {
404
				inst.Prefix[addrSizeIndex] |= PrefixIgnored
405
			}
406
			addrSizeIndex = pos
407

408
		//Group 5 - Vex encoding
409
		case 0xC5:
410
			if pos == 0 && pos+1 < len(src) && (mode == 64 || (mode == 32 && src[pos+1]&0xc0 == 0xc0)) {
411
				vex = p
412
				vexIndex = pos
413
				inst.Prefix[pos] = p
414
				inst.Prefix[pos+1] = Prefix(src[pos+1])
415
				pos += 1
416
				continue
417
			} else {
418
				nprefix = pos
419
				break ReadPrefixes
420
			}
421
		case 0xC4:
422
			if pos == 0 && pos+2 < len(src) && (mode == 64 || (mode == 32 && src[pos+1]&0xc0 == 0xc0)) {
423
				vex = p
424
				vexIndex = pos
425
				inst.Prefix[pos] = p
426
				inst.Prefix[pos+1] = Prefix(src[pos+1])
427
				inst.Prefix[pos+2] = Prefix(src[pos+2])
428
				pos += 2
429
				continue
430
			} else {
431
				nprefix = pos
432
				break ReadPrefixes
433
			}
434
		}
435

436
		if pos >= len(inst.Prefix) {
437
			return instPrefix(src[0], mode) // too long
438
		}
439

440
		inst.Prefix[pos] = p
441
	}
442

443
	// Read REX prefix.
444
	if pos < len(src) && mode == 64 && Prefix(src[pos]).IsREX() && vex == 0 {
445
		rex = Prefix(src[pos])
446
		rexIndex = pos
447
		if pos >= len(inst.Prefix) {
448
			return instPrefix(src[0], mode) // too long
449
		}
450
		inst.Prefix[pos] = rex
451
		pos++
452
		if rex&PrefixREXW != 0 {
453
			dataMode = 64
454
			if dataSizeIndex >= 0 {
455
				inst.Prefix[dataSizeIndex] |= PrefixIgnored
456
			}
457
		}
458
	}
459

460
	// Decode instruction stream, interpreting decoding instructions.
461
	// opshift gives the shift to use when saving the next
462
	// opcode byte into inst.Opcode.
463
	opshift = 24
464

465
	// Decode loop, executing decoder program.
466
	var oldPC, prevPC int
467
Decode:
468
	for pc := 1; ; { // TODO uint
469
		oldPC = prevPC
470
		prevPC = pc
471
		if trace {
472
			println("run", pc)
473
		}
474
		x := decoder[pc]
475
		if decoderCover != nil {
476
			decoderCover[pc] = true
477
		}
478
		pc++
479

480
		// Read and decode ModR/M if needed by opcode.
481
		switch decodeOp(x) {
482
		case xCondSlashR, xReadSlashR:
483
			if haveModrm {
484
				return Inst{Len: pos}, errInternal
485
			}
486
			haveModrm = true
487
			if pos >= len(src) {
488
				return truncated(src, mode)
489
			}
490
			modrm = int(src[pos])
491
			pos++
492
			if opshift >= 0 {
493
				inst.Opcode |= uint32(modrm) << uint(opshift)
494
				opshift -= 8
495
			}
496
			mod = modrm >> 6
497
			regop = (modrm >> 3) & 07
498
			rm = modrm & 07
499
			if rex&PrefixREXR != 0 {
500
				rexUsed |= PrefixREXR
501
				regop |= 8
502
			}
503
			if addrMode == 16 {
504
				// 16-bit modrm form
505
				if mod != 3 {
506
					haveMem = true
507
					mem = addr16[rm]
508
					if rm == 6 && mod == 0 {
509
						mem.Base = 0
510
					}
511

512
					// Consume disp16 if present.
513
					if mod == 0 && rm == 6 || mod == 2 {
514
						if pos+2 > len(src) {
515
							return truncated(src, mode)
516
						}
517
						mem.Disp = int64(binary.LittleEndian.Uint16(src[pos:]))
518
						pos += 2
519
					}
520

521
					// Consume disp8 if present.
522
					if mod == 1 {
523
						if pos >= len(src) {
524
							return truncated(src, mode)
525
						}
526
						mem.Disp = int64(int8(src[pos]))
527
						pos++
528
					}
529
				}
530
			} else {
531
				haveMem = mod != 3
532

533
				// 32-bit or 64-bit form
534
				// Consume SIB encoding if present.
535
				if rm == 4 && mod != 3 {
536
					haveSIB = true
537
					if pos >= len(src) {
538
						return truncated(src, mode)
539
					}
540
					sib = int(src[pos])
541
					pos++
542
					if opshift >= 0 {
543
						inst.Opcode |= uint32(sib) << uint(opshift)
544
						opshift -= 8
545
					}
546
					scale = sib >> 6
547
					index = (sib >> 3) & 07
548
					base = sib & 07
549
					if rex&PrefixREXB != 0 || vex == 0xC4 && inst.Prefix[vexIndex+1]&0x20 == 0 {
550
						rexUsed |= PrefixREXB
551
						base |= 8
552
					}
553
					if rex&PrefixREXX != 0 || vex == 0xC4 && inst.Prefix[vexIndex+1]&0x40 == 0 {
554
						rexUsed |= PrefixREXX
555
						index |= 8
556
					}
557

558
					mem.Scale = 1 << uint(scale)
559
					if index == 4 {
560
						// no mem.Index
561
					} else {
562
						mem.Index = baseRegForBits(addrMode) + Reg(index)
563
					}
564
					if base&7 == 5 && mod == 0 {
565
						// no mem.Base
566
					} else {
567
						mem.Base = baseRegForBits(addrMode) + Reg(base)
568
					}
569
				} else {
570
					if rex&PrefixREXB != 0 {
571
						rexUsed |= PrefixREXB
572
						rm |= 8
573
					}
574
					if mod == 0 && rm&7 == 5 || rm&7 == 4 {
575
						// base omitted
576
					} else if mod != 3 {
577
						mem.Base = baseRegForBits(addrMode) + Reg(rm)
578
					}
579
				}
580

581
				// Consume disp32 if present.
582
				if mod == 0 && (rm&7 == 5 || haveSIB && base&7 == 5) || mod == 2 {
583
					if pos+4 > len(src) {
584
						return truncated(src, mode)
585
					}
586
					dispoff = pos
587
					displen = 4
588
					mem.Disp = int64(binary.LittleEndian.Uint32(src[pos:]))
589
					pos += 4
590
				}
591

592
				// Consume disp8 if present.
593
				if mod == 1 {
594
					if pos >= len(src) {
595
						return truncated(src, mode)
596
					}
597
					dispoff = pos
598
					displen = 1
599
					mem.Disp = int64(int8(src[pos]))
600
					pos++
601
				}
602

603
				// In 64-bit, mod=0 rm=5 is PC-relative instead of just disp.
604
				// See Vol 2A. Table 2-7.
605
				if mode == 64 && mod == 0 && rm&7 == 5 {
606
					if addrMode == 32 {
607
						mem.Base = EIP
608
					} else {
609
						mem.Base = RIP
610
					}
611
				}
612
			}
613

614
			if segIndex >= 0 {
615
				mem.Segment = prefixToSegment(inst.Prefix[segIndex])
616
			}
617
		}
618

619
		// Execute single opcode.
620
		switch decodeOp(x) {
621
		default:
622
			println("bad op", x, "at", pc-1, "from", oldPC)
623
			return Inst{Len: pos}, errInternal
624

625
		case xFail:
626
			inst.Op = 0
627
			break Decode
628

629
		case xMatch:
630
			break Decode
631

632
		case xJump:
633
			pc = int(decoder[pc])
634

635
		// Conditional branches.
636

637
		case xCondByte:
638
			if pos >= len(src) {
639
				return truncated(src, mode)
640
			}
641
			b := src[pos]
642
			n := int(decoder[pc])
643
			pc++
644
			for i := 0; i < n; i++ {
645
				xb, xpc := decoder[pc], int(decoder[pc+1])
646
				pc += 2
647
				if b == byte(xb) {
648
					pc = xpc
649
					pos++
650
					if opshift >= 0 {
651
						inst.Opcode |= uint32(b) << uint(opshift)
652
						opshift -= 8
653
					}
654
					continue Decode
655
				}
656
			}
657
			// xCondByte is the only conditional with a fall through,
658
			// so that it can be used to pick off special cases before
659
			// an xCondSlash. If the fallthrough instruction is xFail,
660
			// advance the position so that the decoded instruction
661
			// size includes the byte we just compared against.
662
			if decodeOp(decoder[pc]) == xJump {
663
				pc = int(decoder[pc+1])
664
			}
665
			if decodeOp(decoder[pc]) == xFail {
666
				pos++
667
			}
668

669
		case xCondIs64:
670
			if mode == 64 {
671
				pc = int(decoder[pc+1])
672
			} else {
673
				pc = int(decoder[pc])
674
			}
675

676
		case xCondIsMem:
677
			mem := haveMem
678
			if !haveModrm {
679
				if pos >= len(src) {
680
					return instPrefix(src[0], mode) // too long
681
				}
682
				mem = src[pos]>>6 != 3
683
			}
684
			if mem {
685
				pc = int(decoder[pc+1])
686
			} else {
687
				pc = int(decoder[pc])
688
			}
689

690
		case xCondDataSize:
691
			switch dataMode {
692
			case 16:
693
				if dataSizeIndex >= 0 {
694
					inst.Prefix[dataSizeIndex] |= PrefixImplicit
695
				}
696
				pc = int(decoder[pc])
697
			case 32:
698
				if dataSizeIndex >= 0 {
699
					inst.Prefix[dataSizeIndex] |= PrefixImplicit
700
				}
701
				pc = int(decoder[pc+1])
702
			case 64:
703
				rexUsed |= PrefixREXW
704
				pc = int(decoder[pc+2])
705
			}
706

707
		case xCondAddrSize:
708
			switch addrMode {
709
			case 16:
710
				if addrSizeIndex >= 0 {
711
					inst.Prefix[addrSizeIndex] |= PrefixImplicit
712
				}
713
				pc = int(decoder[pc])
714
			case 32:
715
				if addrSizeIndex >= 0 {
716
					inst.Prefix[addrSizeIndex] |= PrefixImplicit
717
				}
718
				pc = int(decoder[pc+1])
719
			case 64:
720
				pc = int(decoder[pc+2])
721
			}
722

723
		case xCondPrefix:
724
			// Conditional branch based on presence or absence of prefixes.
725
			// The conflict cases here are completely undocumented and
726
			// differ significantly between GNU libopcodes and Intel xed.
727
			// I have not written assembly code to divine what various CPUs
728
			// do, but it wouldn't surprise me if they are not consistent either.
729
			//
730
			// The basic idea is to switch on the presence of a prefix, so that
731
			// for example:
732
			//
733
			//	xCondPrefix, 4
734
			//	0xF3, 123,
735
			//	0xF2, 234,
736
			//	0x66, 345,
737
			//	0, 456
738
			//
739
			// branch to 123 if the F3 prefix is present, 234 if the F2 prefix
740
			// is present, 66 if the 345 prefix is present, and 456 otherwise.
741
			// The prefixes are given in descending order so that the 0 will be last.
742
			//
743
			// It is unclear what should happen if multiple conditions are
744
			// satisfied: what if F2 and F3 are both present, or if 66 and F2
745
			// are present, or if all three are present? The one chosen becomes
746
			// part of the opcode and the others do not. Perhaps the answer
747
			// depends on the specific opcodes in question.
748
			//
749
			// The only clear example is that CRC32 is F2 0F 38 F1 /r, and
750
			// it comes in 16-bit and 32-bit forms based on the 66 prefix,
751
			// so 66 F2 0F 38 F1 /r should be treated as F2 taking priority,
752
			// with the 66 being only an operand size override, and probably
753
			// F2 66 0F 38 F1 /r should be treated the same.
754
			// Perhaps that rule is specific to the case of CRC32, since no
755
			// 66 0F 38 F1 instruction is defined (today) (that we know of).
756
			// However, both libopcodes and xed seem to generalize this
757
			// example and choose F2/F3 in preference to 66, and we
758
			// do the same.
759
			//
760
			// Next, what if both F2 and F3 are present? Which wins?
761
			// The Intel xed rule, and ours, is that the one that occurs last wins.
762
			// The GNU libopcodes rule, which we implement only in gnuCompat mode,
763
			// is that F3 beats F2 unless F3 has no special meaning, in which
764
			// case F3 can be a modified on an F2 special meaning.
765
			//
766
			// Concretely,
767
			//	66 0F D6 /r is MOVQ
768
			//	F2 0F D6 /r is MOVDQ2Q
769
			//	F3 0F D6 /r is MOVQ2DQ.
770
			//
771
			//	F2 66 0F D6 /r is 66 + MOVDQ2Q always.
772
			//	66 F2 0F D6 /r is 66 + MOVDQ2Q always.
773
			//	F3 66 0F D6 /r is 66 + MOVQ2DQ always.
774
			//	66 F3 0F D6 /r is 66 + MOVQ2DQ always.
775
			//	F2 F3 0F D6 /r is F2 + MOVQ2DQ always.
776
			//	F3 F2 0F D6 /r is F3 + MOVQ2DQ in Intel xed, but F2 + MOVQ2DQ in GNU libopcodes.
777
			//	Adding 66 anywhere in the prefix section of the
778
			//	last two cases does not change the outcome.
779
			//
780
			// Finally, what if there is a variant in which 66 is a mandatory
781
			// prefix rather than an operand size override, but we know of
782
			// no corresponding F2/F3 form, and we see both F2/F3 and 66.
783
			// Does F2/F3 still take priority, so that the result is an unknown
784
			// instruction, or does the 66 take priority, so that the extended
785
			// 66 instruction should be interpreted as having a REP/REPN prefix?
786
			// Intel xed does the former and GNU libopcodes does the latter.
787
			// We side with Intel xed, unless we are trying to match libopcodes
788
			// more closely during the comparison-based test suite.
789
			//
790
			// In 64-bit mode REX.W is another valid prefix to test for, but
791
			// there is less ambiguity about that. When present, REX.W is
792
			// always the first entry in the table.
793
			n := int(decoder[pc])
794
			pc++
795
			sawF3 := false
796
			for j := 0; j < n; j++ {
797
				prefix := Prefix(decoder[pc+2*j])
798
				if prefix.IsREX() {
799
					rexUsed |= prefix
800
					if rex&prefix == prefix {
801
						pc = int(decoder[pc+2*j+1])
802
						continue Decode
803
					}
804
					continue
805
				}
806
				ok := false
807
				if prefix == 0 {
808
					ok = true
809
				} else if prefix.IsREX() {
810
					rexUsed |= prefix
811
					if rex&prefix == prefix {
812
						ok = true
813
					}
814
				} else if prefix == 0xC5 || prefix == 0xC4 {
815
					if vex == prefix {
816
						ok = true
817
					}
818
				} else if vex != 0 && (prefix == 0x0F || prefix == 0x0F38 || prefix == 0x0F3A ||
819
					prefix == 0x66 || prefix == 0xF2 || prefix == 0xF3) {
820
					var vexM, vexP Prefix
821
					if vex == 0xC5 {
822
						vexM = 1 // 2 byte vex always implies 0F
823
						vexP = inst.Prefix[vexIndex+1]
824
					} else {
825
						vexM = inst.Prefix[vexIndex+1]
826
						vexP = inst.Prefix[vexIndex+2]
827
					}
828
					switch prefix {
829
					case 0x66:
830
						ok = vexP&3 == 1
831
					case 0xF3:
832
						ok = vexP&3 == 2
833
					case 0xF2:
834
						ok = vexP&3 == 3
835
					case 0x0F:
836
						ok = vexM&3 == 1
837
					case 0x0F38:
838
						ok = vexM&3 == 2
839
					case 0x0F3A:
840
						ok = vexM&3 == 3
841
					}
842
				} else {
843
					if prefix == 0xF3 {
844
						sawF3 = true
845
					}
846
					switch prefix {
847
					case PrefixLOCK:
848
						if lockIndex >= 0 {
849
							inst.Prefix[lockIndex] |= PrefixImplicit
850
							ok = true
851
						}
852
					case PrefixREP, PrefixREPN:
853
						if repIndex >= 0 && inst.Prefix[repIndex]&0xFF == prefix {
854
							inst.Prefix[repIndex] |= PrefixImplicit
855
							ok = true
856
						}
857
						if gnuCompat && !ok && prefix == 0xF3 && repIndex >= 0 && (j+1 >= n || decoder[pc+2*(j+1)] != 0xF2) {
858
							// Check to see if earlier prefix F3 is present.
859
							for i := repIndex - 1; i >= 0; i-- {
860
								if inst.Prefix[i]&0xFF == prefix {
861
									inst.Prefix[i] |= PrefixImplicit
862
									ok = true
863
								}
864
							}
865
						}
866
						if gnuCompat && !ok && prefix == 0xF2 && repIndex >= 0 && !sawF3 && inst.Prefix[repIndex]&0xFF == 0xF3 {
867
							// Check to see if earlier prefix F2 is present.
868
							for i := repIndex - 1; i >= 0; i-- {
869
								if inst.Prefix[i]&0xFF == prefix {
870
									inst.Prefix[i] |= PrefixImplicit
871
									ok = true
872
								}
873
							}
874
						}
875
					case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
876
						if segIndex >= 0 && inst.Prefix[segIndex]&0xFF == prefix {
877
							inst.Prefix[segIndex] |= PrefixImplicit
878
							ok = true
879
						}
880
					case PrefixDataSize:
881
						// Looking for 66 mandatory prefix.
882
						// The F2/F3 mandatory prefixes take priority when both are present.
883
						// If we got this far in the xCondPrefix table and an F2/F3 is present,
884
						// it means the table didn't have any entry for that prefix. But if 66 has
885
						// special meaning, perhaps F2/F3 have special meaning that we don't know.
886
						// Intel xed works this way, treating the F2/F3 as inhibiting the 66.
887
						// GNU libopcodes allows the 66 to match. We do what Intel xed does
888
						// except in gnuCompat mode.
889
						if repIndex >= 0 && !gnuCompat {
890
							inst.Op = 0
891
							break Decode
892
						}
893
						if dataSizeIndex >= 0 {
894
							inst.Prefix[dataSizeIndex] |= PrefixImplicit
895
							ok = true
896
						}
897
					case PrefixAddrSize:
898
						if addrSizeIndex >= 0 {
899
							inst.Prefix[addrSizeIndex] |= PrefixImplicit
900
							ok = true
901
						}
902
					}
903
				}
904
				if ok {
905
					pc = int(decoder[pc+2*j+1])
906
					continue Decode
907
				}
908
			}
909
			inst.Op = 0
910
			break Decode
911

912
		case xCondSlashR:
913
			pc = int(decoder[pc+regop&7])
914

915
		// Input.
916

917
		case xReadSlashR:
918
			// done above
919

920
		case xReadIb:
921
			if pos >= len(src) {
922
				return truncated(src, mode)
923
			}
924
			imm8 = int8(src[pos])
925
			pos++
926

927
		case xReadIw:
928
			if pos+2 > len(src) {
929
				return truncated(src, mode)
930
			}
931
			imm = int64(binary.LittleEndian.Uint16(src[pos:]))
932
			pos += 2
933

934
		case xReadId:
935
			if pos+4 > len(src) {
936
				return truncated(src, mode)
937
			}
938
			imm = int64(binary.LittleEndian.Uint32(src[pos:]))
939
			pos += 4
940

941
		case xReadIo:
942
			if pos+8 > len(src) {
943
				return truncated(src, mode)
944
			}
945
			imm = int64(binary.LittleEndian.Uint64(src[pos:]))
946
			pos += 8
947

948
		case xReadCb:
949
			if pos >= len(src) {
950
				return truncated(src, mode)
951
			}
952
			immcpos = pos
953
			immc = int64(src[pos])
954
			pos++
955

956
		case xReadCw:
957
			if pos+2 > len(src) {
958
				return truncated(src, mode)
959
			}
960
			immcpos = pos
961
			immc = int64(binary.LittleEndian.Uint16(src[pos:]))
962
			pos += 2
963

964
		case xReadCm:
965
			immcpos = pos
966
			if addrMode == 16 {
967
				if pos+2 > len(src) {
968
					return truncated(src, mode)
969
				}
970
				immc = int64(binary.LittleEndian.Uint16(src[pos:]))
971
				pos += 2
972
			} else if addrMode == 32 {
973
				if pos+4 > len(src) {
974
					return truncated(src, mode)
975
				}
976
				immc = int64(binary.LittleEndian.Uint32(src[pos:]))
977
				pos += 4
978
			} else {
979
				if pos+8 > len(src) {
980
					return truncated(src, mode)
981
				}
982
				immc = int64(binary.LittleEndian.Uint64(src[pos:]))
983
				pos += 8
984
			}
985
		case xReadCd:
986
			immcpos = pos
987
			if pos+4 > len(src) {
988
				return truncated(src, mode)
989
			}
990
			immc = int64(binary.LittleEndian.Uint32(src[pos:]))
991
			pos += 4
992

993
		case xReadCp:
994
			immcpos = pos
995
			if pos+6 > len(src) {
996
				return truncated(src, mode)
997
			}
998
			w := binary.LittleEndian.Uint32(src[pos:])
999
			w2 := binary.LittleEndian.Uint16(src[pos+4:])
1000
			immc = int64(w2)<<32 | int64(w)
1001
			pos += 6
1002

1003
		// Output.
1004

1005
		case xSetOp:
1006
			inst.Op = Op(decoder[pc])
1007
			pc++
1008

1009
		case xArg1,
1010
			xArg3,
1011
			xArgAL,
1012
			xArgAX,
1013
			xArgCL,
1014
			xArgCS,
1015
			xArgDS,
1016
			xArgDX,
1017
			xArgEAX,
1018
			xArgEDX,
1019
			xArgES,
1020
			xArgFS,
1021
			xArgGS,
1022
			xArgRAX,
1023
			xArgRDX,
1024
			xArgSS,
1025
			xArgST,
1026
			xArgXMM0:
1027
			inst.Args[narg] = fixedArg[x]
1028
			narg++
1029

1030
		case xArgImm8:
1031
			inst.Args[narg] = Imm(imm8)
1032
			narg++
1033

1034
		case xArgImm8u:
1035
			inst.Args[narg] = Imm(uint8(imm8))
1036
			narg++
1037

1038
		case xArgImm16:
1039
			inst.Args[narg] = Imm(int16(imm))
1040
			narg++
1041

1042
		case xArgImm16u:
1043
			inst.Args[narg] = Imm(uint16(imm))
1044
			narg++
1045

1046
		case xArgImm32:
1047
			inst.Args[narg] = Imm(int32(imm))
1048
			narg++
1049

1050
		case xArgImm64:
1051
			inst.Args[narg] = Imm(imm)
1052
			narg++
1053

1054
		case xArgM,
1055
			xArgM128,
1056
			xArgM256,
1057
			xArgM1428byte,
1058
			xArgM16,
1059
			xArgM16and16,
1060
			xArgM16and32,
1061
			xArgM16and64,
1062
			xArgM16colon16,
1063
			xArgM16colon32,
1064
			xArgM16colon64,
1065
			xArgM16int,
1066
			xArgM2byte,
1067
			xArgM32,
1068
			xArgM32and32,
1069
			xArgM32fp,
1070
			xArgM32int,
1071
			xArgM512byte,
1072
			xArgM64,
1073
			xArgM64fp,
1074
			xArgM64int,
1075
			xArgM8,
1076
			xArgM80bcd,
1077
			xArgM80dec,
1078
			xArgM80fp,
1079
			xArgM94108byte,
1080
			xArgMem:
1081
			if !haveMem {
1082
				inst.Op = 0
1083
				break Decode
1084
			}
1085
			inst.Args[narg] = mem
1086
			inst.MemBytes = int(memBytes[decodeOp(x)])
1087
			if mem.Base == RIP {
1088
				inst.PCRel = displen
1089
				inst.PCRelOff = dispoff
1090
			}
1091
			narg++
1092

1093
		case xArgPtr16colon16:
1094
			inst.Args[narg] = Imm(immc >> 16)
1095
			inst.Args[narg+1] = Imm(immc & (1<<16 - 1))
1096
			narg += 2
1097

1098
		case xArgPtr16colon32:
1099
			inst.Args[narg] = Imm(immc >> 32)
1100
			inst.Args[narg+1] = Imm(immc & (1<<32 - 1))
1101
			narg += 2
1102

1103
		case xArgMoffs8, xArgMoffs16, xArgMoffs32, xArgMoffs64:
1104
			// TODO(rsc): Can address be 64 bits?
1105
			mem = Mem{Disp: int64(immc)}
1106
			if segIndex >= 0 {
1107
				mem.Segment = prefixToSegment(inst.Prefix[segIndex])
1108
				inst.Prefix[segIndex] |= PrefixImplicit
1109
			}
1110
			inst.Args[narg] = mem
1111
			inst.MemBytes = int(memBytes[decodeOp(x)])
1112
			if mem.Base == RIP {
1113
				inst.PCRel = displen
1114
				inst.PCRelOff = dispoff
1115
			}
1116
			narg++
1117

1118
		case xArgYmm1:
1119
			base := baseReg[x]
1120
			index := Reg(regop)
1121
			if inst.Prefix[vexIndex+1]&0x80 == 0 {
1122
				index += 8
1123
			}
1124
			inst.Args[narg] = base + index
1125
			narg++
1126

1127
		case xArgR8, xArgR16, xArgR32, xArgR64, xArgXmm, xArgXmm1, xArgDR0dashDR7:
1128
			base := baseReg[x]
1129
			index := Reg(regop)
1130
			if rex != 0 && base == AL && index >= 4 {
1131
				rexUsed |= PrefixREX
1132
				index -= 4
1133
				base = SPB
1134
			}
1135
			inst.Args[narg] = base + index
1136
			narg++
1137

1138
		case xArgMm, xArgMm1, xArgTR0dashTR7:
1139
			inst.Args[narg] = baseReg[x] + Reg(regop&7)
1140
			narg++
1141

1142
		case xArgCR0dashCR7:
1143
			// AMD documents an extension that the LOCK prefix
1144
			// can be used in place of a REX prefix in order to access
1145
			// CR8 from 32-bit mode. The LOCK prefix is allowed in
1146
			// all modes, provided the corresponding CPUID bit is set.
1147
			if lockIndex >= 0 {
1148
				inst.Prefix[lockIndex] |= PrefixImplicit
1149
				regop += 8
1150
			}
1151
			inst.Args[narg] = CR0 + Reg(regop)
1152
			narg++
1153

1154
		case xArgSreg:
1155
			regop &= 7
1156
			if regop >= 6 {
1157
				inst.Op = 0
1158
				break Decode
1159
			}
1160
			inst.Args[narg] = ES + Reg(regop)
1161
			narg++
1162

1163
		case xArgRmf16, xArgRmf32, xArgRmf64:
1164
			base := baseReg[x]
1165
			index := Reg(modrm & 07)
1166
			if rex&PrefixREXB != 0 {
1167
				rexUsed |= PrefixREXB
1168
				index += 8
1169
			}
1170
			inst.Args[narg] = base + index
1171
			narg++
1172

1173
		case xArgR8op, xArgR16op, xArgR32op, xArgR64op, xArgSTi:
1174
			n := inst.Opcode >> uint(opshift+8) & 07
1175
			base := baseReg[x]
1176
			index := Reg(n)
1177
			if rex&PrefixREXB != 0 && decodeOp(x) != xArgSTi {
1178
				rexUsed |= PrefixREXB
1179
				index += 8
1180
			}
1181
			if rex != 0 && base == AL && index >= 4 {
1182
				rexUsed |= PrefixREX
1183
				index -= 4
1184
				base = SPB
1185
			}
1186
			inst.Args[narg] = base + index
1187
			narg++
1188
		case xArgRM8, xArgRM16, xArgRM32, xArgRM64, xArgR32M16, xArgR32M8, xArgR64M16,
1189
			xArgMmM32, xArgMmM64, xArgMm2M64,
1190
			xArgXmm2M16, xArgXmm2M32, xArgXmm2M64, xArgXmmM64, xArgXmmM128, xArgXmmM32, xArgXmm2M128,
1191
			xArgYmm2M256:
1192
			if haveMem {
1193
				inst.Args[narg] = mem
1194
				inst.MemBytes = int(memBytes[decodeOp(x)])
1195
				if mem.Base == RIP {
1196
					inst.PCRel = displen
1197
					inst.PCRelOff = dispoff
1198
				}
1199
			} else {
1200
				base := baseReg[x]
1201
				index := Reg(rm)
1202
				switch decodeOp(x) {
1203
				case xArgMmM32, xArgMmM64, xArgMm2M64:
1204
					// There are only 8 MMX registers, so these ignore the REX.X bit.
1205
					index &= 7
1206
				case xArgRM8:
1207
					if rex != 0 && index >= 4 {
1208
						rexUsed |= PrefixREX
1209
						index -= 4
1210
						base = SPB
1211
					}
1212
				case xArgYmm2M256:
1213
					if vex == 0xC4 && inst.Prefix[vexIndex+1]&0x40 == 0x40 {
1214
						index += 8
1215
					}
1216
				}
1217
				inst.Args[narg] = base + index
1218
			}
1219
			narg++
1220

1221
		case xArgMm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
1222
			if haveMem {
1223
				inst.Op = 0
1224
				break Decode
1225
			}
1226
			inst.Args[narg] = baseReg[x] + Reg(rm&7)
1227
			narg++
1228

1229
		case xArgXmm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
1230
			if haveMem {
1231
				inst.Op = 0
1232
				break Decode
1233
			}
1234
			inst.Args[narg] = baseReg[x] + Reg(rm)
1235
			narg++
1236

1237
		case xArgRel8:
1238
			inst.PCRelOff = immcpos
1239
			inst.PCRel = 1
1240
			inst.Args[narg] = Rel(int8(immc))
1241
			narg++
1242

1243
		case xArgRel16:
1244
			inst.PCRelOff = immcpos
1245
			inst.PCRel = 2
1246
			inst.Args[narg] = Rel(int16(immc))
1247
			narg++
1248

1249
		case xArgRel32:
1250
			inst.PCRelOff = immcpos
1251
			inst.PCRel = 4
1252
			inst.Args[narg] = Rel(int32(immc))
1253
			narg++
1254
		}
1255
	}
1256

1257
	if inst.Op == 0 {
1258
		// Invalid instruction.
1259
		if nprefix > 0 {
1260
			return instPrefix(src[0], mode) // invalid instruction
1261
		}
1262
		return Inst{Len: pos}, ErrUnrecognized
1263
	}
1264

1265
	// Matched! Hooray!
1266

1267
	// 90 decodes as XCHG EAX, EAX but is NOP.
1268
	// 66 90 decodes as XCHG AX, AX and is NOP too.
1269
	// 48 90 decodes as XCHG RAX, RAX and is NOP too.
1270
	// 43 90 decodes as XCHG R8D, EAX and is *not* NOP.
1271
	// F3 90 decodes as REP XCHG EAX, EAX but is PAUSE.
1272
	// It's all too special to handle in the decoding tables, at least for now.
1273
	if inst.Op == XCHG && inst.Opcode>>24 == 0x90 {
1274
		if inst.Args[0] == RAX || inst.Args[0] == EAX || inst.Args[0] == AX {
1275
			inst.Op = NOP
1276
			if dataSizeIndex >= 0 {
1277
				inst.Prefix[dataSizeIndex] &^= PrefixImplicit
1278
			}
1279
			inst.Args[0] = nil
1280
			inst.Args[1] = nil
1281
		}
1282
		if repIndex >= 0 && inst.Prefix[repIndex] == 0xF3 {
1283
			inst.Prefix[repIndex] |= PrefixImplicit
1284
			inst.Op = PAUSE
1285
			inst.Args[0] = nil
1286
			inst.Args[1] = nil
1287
		} else if gnuCompat {
1288
			for i := nprefix - 1; i >= 0; i-- {
1289
				if inst.Prefix[i]&0xFF == 0xF3 {
1290
					inst.Prefix[i] |= PrefixImplicit
1291
					inst.Op = PAUSE
1292
					inst.Args[0] = nil
1293
					inst.Args[1] = nil
1294
					break
1295
				}
1296
			}
1297
		}
1298
	}
1299

1300
	// defaultSeg returns the default segment for an implicit
1301
	// memory reference: the final override if present, or else DS.
1302
	defaultSeg := func() Reg {
1303
		if segIndex >= 0 {
1304
			inst.Prefix[segIndex] |= PrefixImplicit
1305
			return prefixToSegment(inst.Prefix[segIndex])
1306
		}
1307
		return DS
1308
	}
1309

1310
	// Add implicit arguments not present in the tables.
1311
	// Normally we shy away from making implicit arguments explicit,
1312
	// following the Intel manuals, but adding the arguments seems
1313
	// the best way to express the effect of the segment override prefixes.
1314
	// TODO(rsc): Perhaps add these to the tables and
1315
	// create bytecode instructions for them.
1316
	usedAddrSize := false
1317
	switch inst.Op {
1318
	case INSB, INSW, INSD:
1319
		inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
1320
		inst.Args[1] = DX
1321
		usedAddrSize = true
1322

1323
	case OUTSB, OUTSW, OUTSD:
1324
		inst.Args[0] = DX
1325
		inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
1326
		usedAddrSize = true
1327

1328
	case MOVSB, MOVSW, MOVSD, MOVSQ:
1329
		inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
1330
		inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
1331
		usedAddrSize = true
1332

1333
	case CMPSB, CMPSW, CMPSD, CMPSQ:
1334
		inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
1335
		inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
1336
		usedAddrSize = true
1337

1338
	case LODSB, LODSW, LODSD, LODSQ:
1339
		switch inst.Op {
1340
		case LODSB:
1341
			inst.Args[0] = AL
1342
		case LODSW:
1343
			inst.Args[0] = AX
1344
		case LODSD:
1345
			inst.Args[0] = EAX
1346
		case LODSQ:
1347
			inst.Args[0] = RAX
1348
		}
1349
		inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
1350
		usedAddrSize = true
1351

1352
	case STOSB, STOSW, STOSD, STOSQ:
1353
		inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
1354
		switch inst.Op {
1355
		case STOSB:
1356
			inst.Args[1] = AL
1357
		case STOSW:
1358
			inst.Args[1] = AX
1359
		case STOSD:
1360
			inst.Args[1] = EAX
1361
		case STOSQ:
1362
			inst.Args[1] = RAX
1363
		}
1364
		usedAddrSize = true
1365

1366
	case SCASB, SCASW, SCASD, SCASQ:
1367
		inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
1368
		switch inst.Op {
1369
		case SCASB:
1370
			inst.Args[0] = AL
1371
		case SCASW:
1372
			inst.Args[0] = AX
1373
		case SCASD:
1374
			inst.Args[0] = EAX
1375
		case SCASQ:
1376
			inst.Args[0] = RAX
1377
		}
1378
		usedAddrSize = true
1379

1380
	case XLATB:
1381
		inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + BX - AX}
1382
		usedAddrSize = true
1383
	}
1384

1385
	// If we used the address size annotation to construct the
1386
	// argument list, mark that prefix as implicit: it doesn't need
1387
	// to be shown when printing the instruction.
1388
	if haveMem || usedAddrSize {
1389
		if addrSizeIndex >= 0 {
1390
			inst.Prefix[addrSizeIndex] |= PrefixImplicit
1391
		}
1392
	}
1393

1394
	// Similarly, if there's some memory operand, the segment
1395
	// will be shown there and doesn't need to be shown as an
1396
	// explicit prefix.
1397
	if haveMem {
1398
		if segIndex >= 0 {
1399
			inst.Prefix[segIndex] |= PrefixImplicit
1400
		}
1401
	}
1402

1403
	// Branch predict prefixes are overloaded segment prefixes,
1404
	// since segment prefixes don't make sense on conditional jumps.
1405
	// Rewrite final instance to prediction prefix.
1406
	// The set of instructions to which the prefixes apply (other then the
1407
	// Jcc conditional jumps) is not 100% clear from the manuals, but
1408
	// the disassemblers seem to agree about the LOOP and JCXZ instructions,
1409
	// so we'll follow along.
1410
	// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
1411
	if isCondJmp[inst.Op] || isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ {
1412
	PredictLoop:
1413
		for i := nprefix - 1; i >= 0; i-- {
1414
			p := inst.Prefix[i]
1415
			switch p & 0xFF {
1416
			case PrefixCS:
1417
				inst.Prefix[i] = PrefixPN
1418
				break PredictLoop
1419
			case PrefixDS:
1420
				inst.Prefix[i] = PrefixPT
1421
				break PredictLoop
1422
			}
1423
		}
1424
	}
1425

1426
	// The BND prefix is part of the Intel Memory Protection Extensions (MPX).
1427
	// A REPN applied to certain control transfers is a BND prefix to bound
1428
	// the range of possible destinations. There's surprisingly little documentation
1429
	// about this, so we just do what libopcodes and xed agree on.
1430
	// In particular, it's unclear why a REPN applied to LOOP or JCXZ instructions
1431
	// does not turn into a BND.
1432
	// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
1433
	if isCondJmp[inst.Op] || inst.Op == JMP || inst.Op == CALL || inst.Op == RET {
1434
		for i := nprefix - 1; i >= 0; i-- {
1435
			p := inst.Prefix[i]
1436
			if p&^PrefixIgnored == PrefixREPN {
1437
				inst.Prefix[i] = PrefixBND
1438
				break
1439
			}
1440
		}
1441
	}
1442

1443
	// The LOCK prefix only applies to certain instructions, and then only
1444
	// to instances of the instruction with a memory destination.
1445
	// Other uses of LOCK are invalid and cause a processor exception,
1446
	// in contrast to the "just ignore it" spirit applied to all other prefixes.
1447
	// Mark invalid lock prefixes.
1448
	hasLock := false
1449
	if lockIndex >= 0 && inst.Prefix[lockIndex]&PrefixImplicit == 0 {
1450
		switch inst.Op {
1451
		// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
1452
		case ADD, ADC, AND, BTC, BTR, BTS, CMPXCHG, CMPXCHG8B, CMPXCHG16B, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD, XCHG:
1453
			if isMem(inst.Args[0]) {
1454
				hasLock = true
1455
				break
1456
			}
1457
			fallthrough
1458
		default:
1459
			inst.Prefix[lockIndex] |= PrefixInvalid
1460
		}
1461
	}
1462

1463
	// In certain cases, all of which require a memory destination,
1464
	// the REPN and REP prefixes are interpreted as XACQUIRE and XRELEASE
1465
	// from the Intel Transactional Synchroniation Extensions (TSX).
1466
	//
1467
	// The specific rules are:
1468
	// (1) Any instruction with a valid LOCK prefix can have XACQUIRE or XRELEASE.
1469
	// (2) Any XCHG, which always has an implicit LOCK, can have XACQUIRE or XRELEASE.
1470
	// (3) Any 0x88-, 0x89-, 0xC6-, or 0xC7-opcode MOV can have XRELEASE.
1471
	if isMem(inst.Args[0]) {
1472
		if inst.Op == XCHG {
1473
			hasLock = true
1474
		}
1475

1476
		for i := len(inst.Prefix) - 1; i >= 0; i-- {
1477
			p := inst.Prefix[i] &^ PrefixIgnored
1478
			switch p {
1479
			case PrefixREPN:
1480
				if hasLock {
1481
					inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXACQUIRE
1482
				}
1483

1484
			case PrefixREP:
1485
				if hasLock {
1486
					inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
1487
				}
1488

1489
				if inst.Op == MOV {
1490
					op := (inst.Opcode >> 24) &^ 1
1491
					if op == 0x88 || op == 0xC6 {
1492
						inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
1493
					}
1494
				}
1495
			}
1496
		}
1497
	}
1498

1499
	// If REP is used on a non-REP-able instruction, mark the prefix as ignored.
1500
	if repIndex >= 0 {
1501
		switch inst.Prefix[repIndex] {
1502
		case PrefixREP, PrefixREPN:
1503
			switch inst.Op {
1504
			// According to the manuals, the REP/REPE prefix applies to all of these,
1505
			// while the REPN applies only to some of them. However, both libopcodes
1506
			// and xed show both prefixes explicitly for all instructions, so we do the same.
1507
			// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
1508
			case INSB, INSW, INSD,
1509
				MOVSB, MOVSW, MOVSD, MOVSQ,
1510
				OUTSB, OUTSW, OUTSD,
1511
				LODSB, LODSW, LODSD, LODSQ,
1512
				CMPSB, CMPSW, CMPSD, CMPSQ,
1513
				SCASB, SCASW, SCASD, SCASQ,
1514
				STOSB, STOSW, STOSD, STOSQ:
1515
				// ok
1516
			default:
1517
				inst.Prefix[repIndex] |= PrefixIgnored
1518
			}
1519
		}
1520
	}
1521

1522
	// If REX was present, mark implicit if all the 1 bits were consumed.
1523
	if rexIndex >= 0 {
1524
		if rexUsed != 0 {
1525
			rexUsed |= PrefixREX
1526
		}
1527
		if rex&^rexUsed == 0 {
1528
			inst.Prefix[rexIndex] |= PrefixImplicit
1529
		}
1530
	}
1531

1532
	inst.DataSize = dataMode
1533
	inst.AddrSize = addrMode
1534
	inst.Mode = mode
1535
	inst.Len = pos
1536
	return inst, nil
1537
}
1538

1539
var errInternal = errors.New("internal error")
1540

1541
// addr16 records the eight 16-bit addressing modes.
1542
var addr16 = [8]Mem{
1543
	{Base: BX, Scale: 1, Index: SI},
1544
	{Base: BX, Scale: 1, Index: DI},
1545
	{Base: BP, Scale: 1, Index: SI},
1546
	{Base: BP, Scale: 1, Index: DI},
1547
	{Base: SI},
1548
	{Base: DI},
1549
	{Base: BP},
1550
	{Base: BX},
1551
}
1552

1553
// baseRegForBits returns the base register for a given register size in bits.
1554
func baseRegForBits(bits int) Reg {
1555
	switch bits {
1556
	case 8:
1557
		return AL
1558
	case 16:
1559
		return AX
1560
	case 32:
1561
		return EAX
1562
	case 64:
1563
		return RAX
1564
	}
1565
	return 0
1566
}
1567

1568
// baseReg records the base register for argument types that specify
1569
// a range of registers indexed by op, regop, or rm.
1570
var baseReg = [...]Reg{
1571
	xArgDR0dashDR7: DR0,
1572
	xArgMm1:        M0,
1573
	xArgMm2:        M0,
1574
	xArgMm2M64:     M0,
1575
	xArgMm:         M0,
1576
	xArgMmM32:      M0,
1577
	xArgMmM64:      M0,
1578
	xArgR16:        AX,
1579
	xArgR16op:      AX,
1580
	xArgR32:        EAX,
1581
	xArgR32M16:     EAX,
1582
	xArgR32M8:      EAX,
1583
	xArgR32op:      EAX,
1584
	xArgR64:        RAX,
1585
	xArgR64M16:     RAX,
1586
	xArgR64op:      RAX,
1587
	xArgR8:         AL,
1588
	xArgR8op:       AL,
1589
	xArgRM16:       AX,
1590
	xArgRM32:       EAX,
1591
	xArgRM64:       RAX,
1592
	xArgRM8:        AL,
1593
	xArgRmf16:      AX,
1594
	xArgRmf32:      EAX,
1595
	xArgRmf64:      RAX,
1596
	xArgSTi:        F0,
1597
	xArgTR0dashTR7: TR0,
1598
	xArgXmm1:       X0,
1599
	xArgYmm1:       X0,
1600
	xArgXmm2:       X0,
1601
	xArgXmm2M128:   X0,
1602
	xArgYmm2M256:   X0,
1603
	xArgXmm2M16:    X0,
1604
	xArgXmm2M32:    X0,
1605
	xArgXmm2M64:    X0,
1606
	xArgXmm:        X0,
1607
	xArgXmmM128:    X0,
1608
	xArgXmmM32:     X0,
1609
	xArgXmmM64:     X0,
1610
}
1611

1612
// prefixToSegment returns the segment register
1613
// corresponding to a particular segment prefix.
1614
func prefixToSegment(p Prefix) Reg {
1615
	switch p &^ PrefixImplicit {
1616
	case PrefixCS:
1617
		return CS
1618
	case PrefixDS:
1619
		return DS
1620
	case PrefixES:
1621
		return ES
1622
	case PrefixFS:
1623
		return FS
1624
	case PrefixGS:
1625
		return GS
1626
	case PrefixSS:
1627
		return SS
1628
	}
1629
	return 0
1630
}
1631

1632
// fixedArg records the fixed arguments corresponding to the given bytecodes.
1633
var fixedArg = [...]Arg{
1634
	xArg1:    Imm(1),
1635
	xArg3:    Imm(3),
1636
	xArgAL:   AL,
1637
	xArgAX:   AX,
1638
	xArgDX:   DX,
1639
	xArgEAX:  EAX,
1640
	xArgEDX:  EDX,
1641
	xArgRAX:  RAX,
1642
	xArgRDX:  RDX,
1643
	xArgCL:   CL,
1644
	xArgCS:   CS,
1645
	xArgDS:   DS,
1646
	xArgES:   ES,
1647
	xArgFS:   FS,
1648
	xArgGS:   GS,
1649
	xArgSS:   SS,
1650
	xArgST:   F0,
1651
	xArgXMM0: X0,
1652
}
1653

1654
// memBytes records the size of the memory pointed at
1655
// by a memory argument of the given form.
1656
var memBytes = [...]int8{
1657
	xArgM128:       128 / 8,
1658
	xArgM256:       256 / 8,
1659
	xArgM16:        16 / 8,
1660
	xArgM16and16:   (16 + 16) / 8,
1661
	xArgM16colon16: (16 + 16) / 8,
1662
	xArgM16colon32: (16 + 32) / 8,
1663
	xArgM16int:     16 / 8,
1664
	xArgM2byte:     2,
1665
	xArgM32:        32 / 8,
1666
	xArgM32and32:   (32 + 32) / 8,
1667
	xArgM32fp:      32 / 8,
1668
	xArgM32int:     32 / 8,
1669
	xArgM64:        64 / 8,
1670
	xArgM64fp:      64 / 8,
1671
	xArgM64int:     64 / 8,
1672
	xArgMm2M64:     64 / 8,
1673
	xArgMmM32:      32 / 8,
1674
	xArgMmM64:      64 / 8,
1675
	xArgMoffs16:    16 / 8,
1676
	xArgMoffs32:    32 / 8,
1677
	xArgMoffs64:    64 / 8,
1678
	xArgMoffs8:     8 / 8,
1679
	xArgR32M16:     16 / 8,
1680
	xArgR32M8:      8 / 8,
1681
	xArgR64M16:     16 / 8,
1682
	xArgRM16:       16 / 8,
1683
	xArgRM32:       32 / 8,
1684
	xArgRM64:       64 / 8,
1685
	xArgRM8:        8 / 8,
1686
	xArgXmm2M128:   128 / 8,
1687
	xArgYmm2M256:   256 / 8,
1688
	xArgXmm2M16:    16 / 8,
1689
	xArgXmm2M32:    32 / 8,
1690
	xArgXmm2M64:    64 / 8,
1691
	xArgXmm:        128 / 8,
1692
	xArgXmmM128:    128 / 8,
1693
	xArgXmmM32:     32 / 8,
1694
	xArgXmmM64:     64 / 8,
1695
}
1696

1697
// isCondJmp records the conditional jumps.
1698
var isCondJmp = [maxOp + 1]bool{
1699
	JA:  true,
1700
	JAE: true,
1701
	JB:  true,
1702
	JBE: true,
1703
	JE:  true,
1704
	JG:  true,
1705
	JGE: true,
1706
	JL:  true,
1707
	JLE: true,
1708
	JNE: true,
1709
	JNO: true,
1710
	JNP: true,
1711
	JNS: true,
1712
	JO:  true,
1713
	JP:  true,
1714
	JS:  true,
1715
}
1716

1717
// isLoop records the loop operators.
1718
var isLoop = [maxOp + 1]bool{
1719
	LOOP:   true,
1720
	LOOPE:  true,
1721
	LOOPNE: true,
1722
	JECXZ:  true,
1723
	JRCXZ:  true,
1724
}
1725

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.