1
// Copyright 2014 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
5
// Table-driven decoding of x86 instructions.
16
// Set trace to true to cause the decoder to print the PC sequence
17
// of the executed instruction codes. This is typically only useful
18
// when you are running a test of a single input case.
21
// A decodeOp is a single instruction in the decoder bytecode program.
23
// The decodeOps correspond to consuming and conditionally branching
24
// on input bytes, consuming additional fields, and then interpreting
25
// consumed data as instruction arguments. The names of the xRead and xArg
26
// operations are taken from the Intel manual conventions, for example
27
// Volume 2, Section 3.1.1, page 487 of
28
// http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf
30
// The actual decoding program is generated by ../x86map.
32
// TODO(rsc): We may be able to merge various of the memory operands
33
// since we don't care about, say, the distinction between m80dec and m80bcd.
34
// Similarly, mm and mm1 have identical meaning, as do xmm and xmm1.
39
xFail decodeOp = iota // invalid instruction (return)
40
xMatch // completed match
43
xCondByte // switch on instruction byte value
44
xCondSlashR // read and switch on instruction /r value
45
xCondPrefix // switch on presence of instruction prefix
46
xCondIs64 // switch on 64-bit processor mode
47
xCondDataSize // switch on operand size
48
xCondAddrSize // switch on address size
49
xCondIsMem // switch on memory vs register argument
51
xSetOp // set instruction opcode
53
xReadSlashR // read /r
69
xArgCR0dashCR7 // arg CR0-CR7
71
xArgDR0dashDR7 // arg DR0-DR7
79
xArgImm16 // arg imm16
80
xArgImm32 // arg imm32
81
xArgImm64 // arg imm64
83
xArgImm8u // arg imm8 but record as unsigned
84
xArgImm16u // arg imm8 but record as unsigned
88
xArgM1428byte // arg m14/28byte
90
xArgM16and16 // arg m16&16
91
xArgM16and32 // arg m16&32
92
xArgM16and64 // arg m16&64
93
xArgM16colon16 // arg m16:16
94
xArgM16colon32 // arg m16:32
95
xArgM16colon64 // arg m16:64
96
xArgM16int // arg m16int
97
xArgM2byte // arg m2byte
99
xArgM32and32 // arg m32&32
100
xArgM32fp // arg m32fp
101
xArgM32int // arg m32int
102
xArgM512byte // arg m512byte
104
xArgM64fp // arg m64fp
105
xArgM64int // arg m64int
107
xArgM80bcd // arg m80bcd
108
xArgM80dec // arg m80dec
109
xArgM80fp // arg m80fp
110
xArgM94108byte // arg m94/108byte
114
xArgMm2M64 // arg mm2/m64
115
xArgMmM32 // arg mm/m32
116
xArgMmM64 // arg mm/m64
118
xArgMoffs16 // arg moffs16
119
xArgMoffs32 // arg moffs32
120
xArgMoffs64 // arg moffs64
121
xArgMoffs8 // arg moffs8
122
xArgPtr16colon16 // arg ptr16:16
123
xArgPtr16colon32 // arg ptr16:32
125
xArgR16op // arg r16 with +rw in opcode
127
xArgR32M16 // arg r32/m16
128
xArgR32M8 // arg r32/m8
129
xArgR32op // arg r32 with +rd in opcode
131
xArgR64M16 // arg r64/m16
132
xArgR64op // arg r64 with +rd in opcode
134
xArgR8op // arg r8 with +rb in opcode
138
xArgRM16 // arg r/m16
139
xArgRM32 // arg r/m32
140
xArgRM64 // arg r/m64
143
xArgRegM16 // arg reg/m16
144
xArgRegM32 // arg reg/m32
145
xArgRegM8 // arg reg/m8
146
xArgRel16 // arg rel16
147
xArgRel32 // arg rel32
150
xArgST // arg ST, aka ST(0)
151
xArgSTi // arg ST(i) with +i in opcode
153
xArgTR0dashTR7 // arg TR0-TR7
155
xArgXMM0 // arg <XMM0>
158
xArgXmm2M128 // arg xmm2/m128
159
xArgYmm2M256 // arg ymm2/m256
160
xArgXmm2M16 // arg xmm2/m16
161
xArgXmm2M32 // arg xmm2/m32
162
xArgXmm2M64 // arg xmm2/m64
163
xArgXmmM128 // arg xmm/m128
164
xArgXmmM32 // arg xmm/m32
165
xArgXmmM64 // arg xmm/m64
167
xArgRmf16 // arg r/m16 but force mod=3
168
xArgRmf32 // arg r/m32 but force mod=3
169
xArgRmf64 // arg r/m64 but force mod=3
172
// instPrefix returns an Inst describing just one prefix byte.
173
// It is only used if there is a prefix followed by an unintelligible
174
// or invalid instruction byte sequence.
175
func instPrefix(b byte, mode int) (Inst, error) {
176
// When tracing it is useful to see what called instPrefix to report an error.
178
_, file, line, _ := runtime.Caller(1)
179
fmt.Printf("%s:%d\n", file, line)
196
// Note: using composite literal with Prefix key confuses 'bundle' tool.
198
inst.Prefix = Prefixes{p}
202
// truncated reports a truncated instruction.
203
// For now we use instPrefix but perhaps later we will return
204
// a specific error here.
205
func truncated(src []byte, mode int) (Inst, error) {
207
return Inst{}, ErrTruncated
209
return instPrefix(src[0], mode) // too long
212
// These are the errors returned by Decode.
214
ErrInvalidMode = errors.New("invalid x86 mode in Decode")
215
ErrTruncated = errors.New("truncated instruction")
216
ErrUnrecognized = errors.New("unrecognized instruction")
219
// decoderCover records coverage information for which parts
220
// of the byte code have been executed.
221
// TODO(rsc): This is for testing. Only use this if a flag is given.
222
var decoderCover []bool
224
// Decode decodes the leading bytes in src as a single instruction.
225
// The mode arguments specifies the assumed processor mode:
226
// 16, 32, or 64 for 16-, 32-, and 64-bit execution modes.
227
func Decode(src []byte, mode int) (inst Inst, err error) {
228
return decode1(src, mode, false)
231
// decode1 is the implementation of Decode but takes an extra
232
// gnuCompat flag to cause it to change its behavior to mimic
233
// bugs (or at least unique features) of GNU libopcodes as used
234
// by objdump. We don't believe that logic is the right thing to do
235
// in general, but when testing against libopcodes it simplifies the
236
// comparison if we adjust a few small pieces of logic.
237
// The affected logic is in the conditional branch for "mandatory" prefixes,
239
func decode1(src []byte, mode int, gnuCompat bool) (Inst, error) {
243
// TODO(rsc): 64-bit mode not tested, probably not working.
245
return Inst{}, ErrInvalidMode
248
// Maximum instruction size is 15 bytes.
249
// If we need to read more, return 'truncated instruction.
255
// prefix decoding information
256
pos = 0 // position reading src
257
nprefix = 0 // number of prefixes
258
lockIndex = -1 // index of LOCK prefix in src and inst.Prefix
259
repIndex = -1 // index of REP/REPN prefix in src and inst.Prefix
260
segIndex = -1 // index of Group 2 prefix in src and inst.Prefix
261
dataSizeIndex = -1 // index of Group 3 prefix in src and inst.Prefix
262
addrSizeIndex = -1 // index of Group 4 prefix in src and inst.Prefix
263
rex Prefix // rex byte if present (or 0)
264
rexUsed Prefix // bits used in rex byte
265
rexIndex = -1 // index of rex byte
266
vex Prefix // use vex encoding
267
vexIndex = -1 // index of vex prefix
269
addrMode = mode // address mode (width in bits)
270
dataMode = mode // operand mode (width in bits)
272
// decoded ModR/M fields
279
// if ModR/M is memory reference, Mem form
283
// decoded SIB fields
292
// decoded immediate values
301
narg int // number of arguments written to inst
308
// Prefixes are certainly the most complex and underspecified part of
309
// decoding x86 instructions. Although the manuals say things like
310
// up to four prefixes, one from each group, nearly everyone seems to
311
// agree that in practice as many prefixes as possible, including multiple
312
// from a particular group or repetitions of a given prefix, can be used on
313
// an instruction, provided the total instruction length including prefixes
314
// does not exceed the agreed-upon maximum of 15 bytes.
315
// Everyone also agrees that if one of these prefixes is the LOCK prefix
316
// and the instruction is not one of the instructions that can be used with
317
// the LOCK prefix or if the destination is not a memory operand,
318
// then the instruction is invalid and produces the #UD exception.
319
// However, that is the end of any semblance of agreement.
321
// What happens if prefixes are given that conflict with other prefixes?
322
// For example, the memory segment overrides CS, DS, ES, FS, GS, SS
323
// conflict with each other: only one segment can be in effect.
324
// Disassemblers seem to agree that later prefixes take priority over
325
// earlier ones. I have not taken the time to write assembly programs
326
// to check to see if the hardware agrees.
328
// What happens if prefixes are given that have no meaning for the
329
// specific instruction to which they are attached? It depends.
330
// If they really have no meaning, they are ignored. However, a future
331
// processor may assign a different meaning. As a disassembler, we
332
// don't really know whether we're seeing a meaningless prefix or one
333
// whose meaning we simply haven't been told yet.
335
// Combining the two questions, what happens when conflicting
336
// extension prefixes are given? No one seems to know for sure.
337
// For example, MOVQ is 66 0F D6 /r, MOVDQ2Q is F2 0F D6 /r,
338
// and MOVQ2DQ is F3 0F D6 /r. What is '66 F2 F3 0F D6 /r'?
339
// Which prefix wins? See the xCondPrefix prefix for more.
341
// Writing assembly test cases to divine which interpretation the
342
// CPU uses might clarify the situation, but more likely it would
343
// make the situation even less clear.
345
// Read non-REX prefixes.
347
for ; pos < len(src); pos++ {
348
p := Prefix(src[pos])
354
// Group 1 - lock and repeat prefixes
355
// According to Intel, there should only be one from this set,
356
// but according to AMD both can be present.
359
inst.Prefix[lockIndex] |= PrefixIgnored
364
inst.Prefix[repIndex] |= PrefixIgnored
368
// Group 2 - segment override / branch hints
369
case 0x26, 0x2E, 0x36, 0x3E:
377
inst.Prefix[segIndex] |= PrefixIgnored
381
// Group 3 - operand size override
390
if dataSizeIndex >= 0 {
391
inst.Prefix[dataSizeIndex] |= PrefixIgnored
395
// Group 4 - address size override
404
if addrSizeIndex >= 0 {
405
inst.Prefix[addrSizeIndex] |= PrefixIgnored
409
//Group 5 - Vex encoding
411
if pos == 0 && pos+1 < len(src) && (mode == 64 || (mode == 32 && src[pos+1]&0xc0 == 0xc0)) {
415
inst.Prefix[pos+1] = Prefix(src[pos+1])
423
if pos == 0 && pos+2 < len(src) && (mode == 64 || (mode == 32 && src[pos+1]&0xc0 == 0xc0)) {
427
inst.Prefix[pos+1] = Prefix(src[pos+1])
428
inst.Prefix[pos+2] = Prefix(src[pos+2])
437
if pos >= len(inst.Prefix) {
438
return instPrefix(src[0], mode) // too long
445
if pos < len(src) && mode == 64 && Prefix(src[pos]).IsREX() && vex == 0 {
446
rex = Prefix(src[pos])
448
if pos >= len(inst.Prefix) {
449
return instPrefix(src[0], mode) // too long
451
inst.Prefix[pos] = rex
453
if rex&PrefixREXW != 0 {
455
if dataSizeIndex >= 0 {
456
inst.Prefix[dataSizeIndex] |= PrefixIgnored
461
// Decode instruction stream, interpreting decoding instructions.
462
// opshift gives the shift to use when saving the next
463
// opcode byte into inst.Opcode.
465
if decoderCover == nil {
466
decoderCover = make([]bool, len(decoder))
469
// Decode loop, executing decoder program.
470
var oldPC, prevPC int
472
for pc := 1; ; { // TODO uint
479
decoderCover[pc] = true
482
// Read and decode ModR/M if needed by opcode.
484
case xCondSlashR, xReadSlashR:
486
return Inst{Len: pos}, errInternal
490
return truncated(src, mode)
492
modrm = int(src[pos])
495
inst.Opcode |= uint32(modrm) << uint(opshift)
499
regop = (modrm >> 3) & 07
501
if rex&PrefixREXR != 0 {
502
rexUsed |= PrefixREXR
510
if rm == 6 && mod == 0 {
514
// Consume disp16 if present.
515
if mod == 0 && rm == 6 || mod == 2 {
516
if pos+2 > len(src) {
517
return truncated(src, mode)
519
mem.Disp = int64(binary.LittleEndian.Uint16(src[pos:]))
523
// Consume disp8 if present.
526
return truncated(src, mode)
528
mem.Disp = int64(int8(src[pos]))
535
// 32-bit or 64-bit form
536
// Consume SIB encoding if present.
537
if rm == 4 && mod != 3 {
540
return truncated(src, mode)
545
inst.Opcode |= uint32(sib) << uint(opshift)
549
index = (sib >> 3) & 07
551
if rex&PrefixREXB != 0 || vex == 0xC4 && inst.Prefix[vexIndex+1]&0x20 == 0 {
552
rexUsed |= PrefixREXB
555
if rex&PrefixREXX != 0 || vex == 0xC4 && inst.Prefix[vexIndex+1]&0x40 == 0 {
556
rexUsed |= PrefixREXX
560
mem.Scale = 1 << uint(scale)
564
mem.Index = baseRegForBits(addrMode) + Reg(index)
566
if base&7 == 5 && mod == 0 {
569
mem.Base = baseRegForBits(addrMode) + Reg(base)
572
if rex&PrefixREXB != 0 {
573
rexUsed |= PrefixREXB
576
if mod == 0 && rm&7 == 5 || rm&7 == 4 {
579
mem.Base = baseRegForBits(addrMode) + Reg(rm)
583
// Consume disp32 if present.
584
if mod == 0 && (rm&7 == 5 || haveSIB && base&7 == 5) || mod == 2 {
585
if pos+4 > len(src) {
586
return truncated(src, mode)
590
mem.Disp = int64(binary.LittleEndian.Uint32(src[pos:]))
594
// Consume disp8 if present.
597
return truncated(src, mode)
601
mem.Disp = int64(int8(src[pos]))
605
// In 64-bit, mod=0 rm=5 is PC-relative instead of just disp.
606
// See Vol 2A. Table 2-7.
607
if mode == 64 && mod == 0 && rm&7 == 5 {
617
mem.Segment = prefixToSegment(inst.Prefix[segIndex])
621
// Execute single opcode.
624
println("bad op", x, "at", pc-1, "from", oldPC)
625
return Inst{Len: pos}, errInternal
635
pc = int(decoder[pc])
637
// Conditional branches.
641
return truncated(src, mode)
644
n := int(decoder[pc])
646
for i := 0; i < n; i++ {
647
xb, xpc := decoder[pc], int(decoder[pc+1])
653
inst.Opcode |= uint32(b) << uint(opshift)
659
// xCondByte is the only conditional with a fall through,
660
// so that it can be used to pick off special cases before
661
// an xCondSlash. If the fallthrough instruction is xFail,
662
// advance the position so that the decoded instruction
663
// size includes the byte we just compared against.
664
if decodeOp(decoder[pc]) == xJump {
665
pc = int(decoder[pc+1])
667
if decodeOp(decoder[pc]) == xFail {
673
pc = int(decoder[pc+1])
675
pc = int(decoder[pc])
682
return instPrefix(src[0], mode) // too long
684
mem = src[pos]>>6 != 3
687
pc = int(decoder[pc+1])
689
pc = int(decoder[pc])
695
if dataSizeIndex >= 0 {
696
inst.Prefix[dataSizeIndex] |= PrefixImplicit
698
pc = int(decoder[pc])
700
if dataSizeIndex >= 0 {
701
inst.Prefix[dataSizeIndex] |= PrefixImplicit
703
pc = int(decoder[pc+1])
705
rexUsed |= PrefixREXW
706
pc = int(decoder[pc+2])
712
if addrSizeIndex >= 0 {
713
inst.Prefix[addrSizeIndex] |= PrefixImplicit
715
pc = int(decoder[pc])
717
if addrSizeIndex >= 0 {
718
inst.Prefix[addrSizeIndex] |= PrefixImplicit
720
pc = int(decoder[pc+1])
722
pc = int(decoder[pc+2])
726
// Conditional branch based on presence or absence of prefixes.
727
// The conflict cases here are completely undocumented and
728
// differ significantly between GNU libopcodes and Intel xed.
729
// I have not written assembly code to divine what various CPUs
730
// do, but it wouldn't surprise me if they are not consistent either.
732
// The basic idea is to switch on the presence of a prefix, so that
741
// branch to 123 if the F3 prefix is present, 234 if the F2 prefix
742
// is present, 66 if the 345 prefix is present, and 456 otherwise.
743
// The prefixes are given in descending order so that the 0 will be last.
745
// It is unclear what should happen if multiple conditions are
746
// satisfied: what if F2 and F3 are both present, or if 66 and F2
747
// are present, or if all three are present? The one chosen becomes
748
// part of the opcode and the others do not. Perhaps the answer
749
// depends on the specific opcodes in question.
751
// The only clear example is that CRC32 is F2 0F 38 F1 /r, and
752
// it comes in 16-bit and 32-bit forms based on the 66 prefix,
753
// so 66 F2 0F 38 F1 /r should be treated as F2 taking priority,
754
// with the 66 being only an operand size override, and probably
755
// F2 66 0F 38 F1 /r should be treated the same.
756
// Perhaps that rule is specific to the case of CRC32, since no
757
// 66 0F 38 F1 instruction is defined (today) (that we know of).
758
// However, both libopcodes and xed seem to generalize this
759
// example and choose F2/F3 in preference to 66, and we
762
// Next, what if both F2 and F3 are present? Which wins?
763
// The Intel xed rule, and ours, is that the one that occurs last wins.
764
// The GNU libopcodes rule, which we implement only in gnuCompat mode,
765
// is that F3 beats F2 unless F3 has no special meaning, in which
766
// case F3 can be a modified on an F2 special meaning.
769
// 66 0F D6 /r is MOVQ
770
// F2 0F D6 /r is MOVDQ2Q
771
// F3 0F D6 /r is MOVQ2DQ.
773
// F2 66 0F D6 /r is 66 + MOVDQ2Q always.
774
// 66 F2 0F D6 /r is 66 + MOVDQ2Q always.
775
// F3 66 0F D6 /r is 66 + MOVQ2DQ always.
776
// 66 F3 0F D6 /r is 66 + MOVQ2DQ always.
777
// F2 F3 0F D6 /r is F2 + MOVQ2DQ always.
778
// F3 F2 0F D6 /r is F3 + MOVQ2DQ in Intel xed, but F2 + MOVQ2DQ in GNU libopcodes.
779
// Adding 66 anywhere in the prefix section of the
780
// last two cases does not change the outcome.
782
// Finally, what if there is a variant in which 66 is a mandatory
783
// prefix rather than an operand size override, but we know of
784
// no corresponding F2/F3 form, and we see both F2/F3 and 66.
785
// Does F2/F3 still take priority, so that the result is an unknown
786
// instruction, or does the 66 take priority, so that the extended
787
// 66 instruction should be interpreted as having a REP/REPN prefix?
788
// Intel xed does the former and GNU libopcodes does the latter.
789
// We side with Intel xed, unless we are trying to match libopcodes
790
// more closely during the comparison-based test suite.
792
// In 64-bit mode REX.W is another valid prefix to test for, but
793
// there is less ambiguity about that. When present, REX.W is
794
// always the first entry in the table.
795
n := int(decoder[pc])
798
for j := 0; j < n; j++ {
799
prefix := Prefix(decoder[pc+2*j])
802
if rex&prefix == prefix {
803
pc = int(decoder[pc+2*j+1])
811
} else if prefix.IsREX() {
813
if rex&prefix == prefix {
816
} else if prefix == 0xC5 || prefix == 0xC4 {
820
} else if vex != 0 && (prefix == 0x0F || prefix == 0x0F38 || prefix == 0x0F3A ||
821
prefix == 0x66 || prefix == 0xF2 || prefix == 0xF3) {
822
var vexM, vexP Prefix
824
vexM = 1 // 2 byte vex always implies 0F
825
vexP = inst.Prefix[vexIndex+1]
827
vexM = inst.Prefix[vexIndex+1]
828
vexP = inst.Prefix[vexIndex+2]
851
inst.Prefix[lockIndex] |= PrefixImplicit
854
case PrefixREP, PrefixREPN:
855
if repIndex >= 0 && inst.Prefix[repIndex]&0xFF == prefix {
856
inst.Prefix[repIndex] |= PrefixImplicit
859
if gnuCompat && !ok && prefix == 0xF3 && repIndex >= 0 && (j+1 >= n || decoder[pc+2*(j+1)] != 0xF2) {
860
// Check to see if earlier prefix F3 is present.
861
for i := repIndex - 1; i >= 0; i-- {
862
if inst.Prefix[i]&0xFF == prefix {
863
inst.Prefix[i] |= PrefixImplicit
868
if gnuCompat && !ok && prefix == 0xF2 && repIndex >= 0 && !sawF3 && inst.Prefix[repIndex]&0xFF == 0xF3 {
869
// Check to see if earlier prefix F2 is present.
870
for i := repIndex - 1; i >= 0; i-- {
871
if inst.Prefix[i]&0xFF == prefix {
872
inst.Prefix[i] |= PrefixImplicit
877
case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
878
if segIndex >= 0 && inst.Prefix[segIndex]&0xFF == prefix {
879
inst.Prefix[segIndex] |= PrefixImplicit
883
// Looking for 66 mandatory prefix.
884
// The F2/F3 mandatory prefixes take priority when both are present.
885
// If we got this far in the xCondPrefix table and an F2/F3 is present,
886
// it means the table didn't have any entry for that prefix. But if 66 has
887
// special meaning, perhaps F2/F3 have special meaning that we don't know.
888
// Intel xed works this way, treating the F2/F3 as inhibiting the 66.
889
// GNU libopcodes allows the 66 to match. We do what Intel xed does
890
// except in gnuCompat mode.
891
if repIndex >= 0 && !gnuCompat {
895
if dataSizeIndex >= 0 {
896
inst.Prefix[dataSizeIndex] |= PrefixImplicit
900
if addrSizeIndex >= 0 {
901
inst.Prefix[addrSizeIndex] |= PrefixImplicit
907
pc = int(decoder[pc+2*j+1])
915
pc = int(decoder[pc+regop&7])
924
return truncated(src, mode)
926
imm8 = int8(src[pos])
930
if pos+2 > len(src) {
931
return truncated(src, mode)
933
imm = int64(binary.LittleEndian.Uint16(src[pos:]))
937
if pos+4 > len(src) {
938
return truncated(src, mode)
940
imm = int64(binary.LittleEndian.Uint32(src[pos:]))
944
if pos+8 > len(src) {
945
return truncated(src, mode)
947
imm = int64(binary.LittleEndian.Uint64(src[pos:]))
952
return truncated(src, mode)
955
immc = int64(src[pos])
959
if pos+2 > len(src) {
960
return truncated(src, mode)
963
immc = int64(binary.LittleEndian.Uint16(src[pos:]))
969
if pos+2 > len(src) {
970
return truncated(src, mode)
972
immc = int64(binary.LittleEndian.Uint16(src[pos:]))
974
} else if addrMode == 32 {
975
if pos+4 > len(src) {
976
return truncated(src, mode)
978
immc = int64(binary.LittleEndian.Uint32(src[pos:]))
981
if pos+8 > len(src) {
982
return truncated(src, mode)
984
immc = int64(binary.LittleEndian.Uint64(src[pos:]))
989
if pos+4 > len(src) {
990
return truncated(src, mode)
992
immc = int64(binary.LittleEndian.Uint32(src[pos:]))
997
if pos+6 > len(src) {
998
return truncated(src, mode)
1000
w := binary.LittleEndian.Uint32(src[pos:])
1001
w2 := binary.LittleEndian.Uint16(src[pos+4:])
1002
immc = int64(w2)<<32 | int64(w)
1008
inst.Op = Op(decoder[pc])
1029
inst.Args[narg] = fixedArg[x]
1033
inst.Args[narg] = Imm(imm8)
1037
inst.Args[narg] = Imm(uint8(imm8))
1041
inst.Args[narg] = Imm(int16(imm))
1045
inst.Args[narg] = Imm(uint16(imm))
1049
inst.Args[narg] = Imm(int32(imm))
1053
inst.Args[narg] = Imm(imm)
1087
inst.Args[narg] = mem
1088
inst.MemBytes = int(memBytes[decodeOp(x)])
1089
if mem.Base == RIP {
1090
inst.PCRel = displen
1091
inst.PCRelOff = dispoff
1095
case xArgPtr16colon16:
1096
inst.Args[narg] = Imm(immc >> 16)
1097
inst.Args[narg+1] = Imm(immc & (1<<16 - 1))
1100
case xArgPtr16colon32:
1101
inst.Args[narg] = Imm(immc >> 32)
1102
inst.Args[narg+1] = Imm(immc & (1<<32 - 1))
1105
case xArgMoffs8, xArgMoffs16, xArgMoffs32, xArgMoffs64:
1106
// TODO(rsc): Can address be 64 bits?
1107
mem = Mem{Disp: int64(immc)}
1109
mem.Segment = prefixToSegment(inst.Prefix[segIndex])
1110
inst.Prefix[segIndex] |= PrefixImplicit
1112
inst.Args[narg] = mem
1113
inst.MemBytes = int(memBytes[decodeOp(x)])
1114
if mem.Base == RIP {
1115
inst.PCRel = displen
1116
inst.PCRelOff = dispoff
1123
if inst.Prefix[vexIndex+1]&0x80 == 0 {
1126
inst.Args[narg] = base + index
1129
case xArgR8, xArgR16, xArgR32, xArgR64, xArgXmm, xArgXmm1, xArgDR0dashDR7:
1132
if rex != 0 && base == AL && index >= 4 {
1133
rexUsed |= PrefixREX
1137
inst.Args[narg] = base + index
1140
case xArgMm, xArgMm1, xArgTR0dashTR7:
1141
inst.Args[narg] = baseReg[x] + Reg(regop&7)
1144
case xArgCR0dashCR7:
1145
// AMD documents an extension that the LOCK prefix
1146
// can be used in place of a REX prefix in order to access
1147
// CR8 from 32-bit mode. The LOCK prefix is allowed in
1148
// all modes, provided the corresponding CPUID bit is set.
1150
inst.Prefix[lockIndex] |= PrefixImplicit
1153
inst.Args[narg] = CR0 + Reg(regop)
1162
inst.Args[narg] = ES + Reg(regop)
1165
case xArgRmf16, xArgRmf32, xArgRmf64:
1167
index := Reg(modrm & 07)
1168
if rex&PrefixREXB != 0 {
1169
rexUsed |= PrefixREXB
1172
inst.Args[narg] = base + index
1175
case xArgR8op, xArgR16op, xArgR32op, xArgR64op, xArgSTi:
1176
n := inst.Opcode >> uint(opshift+8) & 07
1179
if rex&PrefixREXB != 0 && decodeOp(x) != xArgSTi {
1180
rexUsed |= PrefixREXB
1183
if rex != 0 && base == AL && index >= 4 {
1184
rexUsed |= PrefixREX
1188
inst.Args[narg] = base + index
1190
case xArgRM8, xArgRM16, xArgRM32, xArgRM64, xArgR32M16, xArgR32M8, xArgR64M16,
1191
xArgMmM32, xArgMmM64, xArgMm2M64,
1192
xArgXmm2M16, xArgXmm2M32, xArgXmm2M64, xArgXmmM64, xArgXmmM128, xArgXmmM32, xArgXmm2M128,
1195
inst.Args[narg] = mem
1196
inst.MemBytes = int(memBytes[decodeOp(x)])
1197
if mem.Base == RIP {
1198
inst.PCRel = displen
1199
inst.PCRelOff = dispoff
1204
switch decodeOp(x) {
1205
case xArgMmM32, xArgMmM64, xArgMm2M64:
1206
// There are only 8 MMX registers, so these ignore the REX.X bit.
1209
if rex != 0 && index >= 4 {
1210
rexUsed |= PrefixREX
1215
if vex == 0xC4 && inst.Prefix[vexIndex+1]&0x40 == 0x40 {
1219
inst.Args[narg] = base + index
1223
case xArgMm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
1228
inst.Args[narg] = baseReg[x] + Reg(rm&7)
1231
case xArgXmm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
1236
inst.Args[narg] = baseReg[x] + Reg(rm)
1240
inst.PCRelOff = immcpos
1242
inst.Args[narg] = Rel(int8(immc))
1246
inst.PCRelOff = immcpos
1248
inst.Args[narg] = Rel(int16(immc))
1252
inst.PCRelOff = immcpos
1254
inst.Args[narg] = Rel(int32(immc))
1260
// Invalid instruction.
1262
return instPrefix(src[0], mode) // invalid instruction
1264
return Inst{Len: pos}, ErrUnrecognized
1269
// 90 decodes as XCHG EAX, EAX but is NOP.
1270
// 66 90 decodes as XCHG AX, AX and is NOP too.
1271
// 48 90 decodes as XCHG RAX, RAX and is NOP too.
1272
// 43 90 decodes as XCHG R8D, EAX and is *not* NOP.
1273
// F3 90 decodes as REP XCHG EAX, EAX but is PAUSE.
1274
// It's all too special to handle in the decoding tables, at least for now.
1275
if inst.Op == XCHG && inst.Opcode>>24 == 0x90 {
1276
if inst.Args[0] == RAX || inst.Args[0] == EAX || inst.Args[0] == AX {
1278
if dataSizeIndex >= 0 {
1279
inst.Prefix[dataSizeIndex] &^= PrefixImplicit
1284
if repIndex >= 0 && inst.Prefix[repIndex] == 0xF3 {
1285
inst.Prefix[repIndex] |= PrefixImplicit
1289
} else if gnuCompat {
1290
for i := nprefix - 1; i >= 0; i-- {
1291
if inst.Prefix[i]&0xFF == 0xF3 {
1292
inst.Prefix[i] |= PrefixImplicit
1302
// defaultSeg returns the default segment for an implicit
1303
// memory reference: the final override if present, or else DS.
1304
defaultSeg := func() Reg {
1306
inst.Prefix[segIndex] |= PrefixImplicit
1307
return prefixToSegment(inst.Prefix[segIndex])
1312
// Add implicit arguments not present in the tables.
1313
// Normally we shy away from making implicit arguments explicit,
1314
// following the Intel manuals, but adding the arguments seems
1315
// the best way to express the effect of the segment override prefixes.
1316
// TODO(rsc): Perhaps add these to the tables and
1317
// create bytecode instructions for them.
1318
usedAddrSize := false
1320
case INSB, INSW, INSD:
1321
inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
1325
case OUTSB, OUTSW, OUTSD:
1327
inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
1330
case MOVSB, MOVSW, MOVSD, MOVSQ:
1331
inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
1332
inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
1335
case CMPSB, CMPSW, CMPSD, CMPSQ:
1336
inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
1337
inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
1340
case LODSB, LODSW, LODSD, LODSQ:
1351
inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
1354
case STOSB, STOSW, STOSD, STOSQ:
1355
inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
1368
case SCASB, SCASW, SCASD, SCASQ:
1369
inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
1383
inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + BX - AX}
1387
// If we used the address size annotation to construct the
1388
// argument list, mark that prefix as implicit: it doesn't need
1389
// to be shown when printing the instruction.
1390
if haveMem || usedAddrSize {
1391
if addrSizeIndex >= 0 {
1392
inst.Prefix[addrSizeIndex] |= PrefixImplicit
1396
// Similarly, if there's some memory operand, the segment
1397
// will be shown there and doesn't need to be shown as an
1401
inst.Prefix[segIndex] |= PrefixImplicit
1405
// Branch predict prefixes are overloaded segment prefixes,
1406
// since segment prefixes don't make sense on conditional jumps.
1407
// Rewrite final instance to prediction prefix.
1408
// The set of instructions to which the prefixes apply (other then the
1409
// Jcc conditional jumps) is not 100% clear from the manuals, but
1410
// the disassemblers seem to agree about the LOOP and JCXZ instructions,
1411
// so we'll follow along.
1412
// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
1413
if isCondJmp[inst.Op] || isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ {
1415
for i := nprefix - 1; i >= 0; i-- {
1419
inst.Prefix[i] = PrefixPN
1422
inst.Prefix[i] = PrefixPT
1428
// The BND prefix is part of the Intel Memory Protection Extensions (MPX).
1429
// A REPN applied to certain control transfers is a BND prefix to bound
1430
// the range of possible destinations. There's surprisingly little documentation
1431
// about this, so we just do what libopcodes and xed agree on.
1432
// In particular, it's unclear why a REPN applied to LOOP or JCXZ instructions
1433
// does not turn into a BND.
1434
// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
1435
if isCondJmp[inst.Op] || inst.Op == JMP || inst.Op == CALL || inst.Op == RET {
1436
for i := nprefix - 1; i >= 0; i-- {
1438
if p&^PrefixIgnored == PrefixREPN {
1439
inst.Prefix[i] = PrefixBND
1445
// The LOCK prefix only applies to certain instructions, and then only
1446
// to instances of the instruction with a memory destination.
1447
// Other uses of LOCK are invalid and cause a processor exception,
1448
// in contrast to the "just ignore it" spirit applied to all other prefixes.
1449
// Mark invalid lock prefixes.
1451
if lockIndex >= 0 && inst.Prefix[lockIndex]&PrefixImplicit == 0 {
1453
// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
1454
case ADD, ADC, AND, BTC, BTR, BTS, CMPXCHG, CMPXCHG8B, CMPXCHG16B, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD, XCHG:
1455
if isMem(inst.Args[0]) {
1461
inst.Prefix[lockIndex] |= PrefixInvalid
1465
// In certain cases, all of which require a memory destination,
1466
// the REPN and REP prefixes are interpreted as XACQUIRE and XRELEASE
1467
// from the Intel Transactional Synchroniation Extensions (TSX).
1469
// The specific rules are:
1470
// (1) Any instruction with a valid LOCK prefix can have XACQUIRE or XRELEASE.
1471
// (2) Any XCHG, which always has an implicit LOCK, can have XACQUIRE or XRELEASE.
1472
// (3) Any 0x88-, 0x89-, 0xC6-, or 0xC7-opcode MOV can have XRELEASE.
1473
if isMem(inst.Args[0]) {
1474
if inst.Op == XCHG {
1478
for i := len(inst.Prefix) - 1; i >= 0; i-- {
1479
p := inst.Prefix[i] &^ PrefixIgnored
1483
inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXACQUIRE
1488
inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
1492
op := (inst.Opcode >> 24) &^ 1
1493
if op == 0x88 || op == 0xC6 {
1494
inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
1501
// If REP is used on a non-REP-able instruction, mark the prefix as ignored.
1503
switch inst.Prefix[repIndex] {
1504
case PrefixREP, PrefixREPN:
1506
// According to the manuals, the REP/REPE prefix applies to all of these,
1507
// while the REPN applies only to some of them. However, both libopcodes
1508
// and xed show both prefixes explicitly for all instructions, so we do the same.
1509
// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
1510
case INSB, INSW, INSD,
1511
MOVSB, MOVSW, MOVSD, MOVSQ,
1512
OUTSB, OUTSW, OUTSD,
1513
LODSB, LODSW, LODSD, LODSQ,
1514
CMPSB, CMPSW, CMPSD, CMPSQ,
1515
SCASB, SCASW, SCASD, SCASQ,
1516
STOSB, STOSW, STOSD, STOSQ:
1519
inst.Prefix[repIndex] |= PrefixIgnored
1524
// If REX was present, mark implicit if all the 1 bits were consumed.
1527
rexUsed |= PrefixREX
1529
if rex&^rexUsed == 0 {
1530
inst.Prefix[rexIndex] |= PrefixImplicit
1534
inst.DataSize = dataMode
1535
inst.AddrSize = addrMode
1541
var errInternal = errors.New("internal error")
1543
// addr16 records the eight 16-bit addressing modes.
1545
{Base: BX, Scale: 1, Index: SI},
1546
{Base: BX, Scale: 1, Index: DI},
1547
{Base: BP, Scale: 1, Index: SI},
1548
{Base: BP, Scale: 1, Index: DI},
1555
// baseReg returns the base register for a given register size in bits.
1556
func baseRegForBits(bits int) Reg {
1570
// baseReg records the base register for argument types that specify
1571
// a range of registers indexed by op, regop, or rm.
1572
var baseReg = [...]Reg{
1573
xArgDR0dashDR7: DR0,
1599
xArgTR0dashTR7: TR0,
1614
// prefixToSegment returns the segment register
1615
// corresponding to a particular segment prefix.
1616
func prefixToSegment(p Prefix) Reg {
1617
switch p &^ PrefixImplicit {
1634
// fixedArg records the fixed arguments corresponding to the given bytecodes.
1635
var fixedArg = [...]Arg{
1656
// memBytes records the size of the memory pointed at
1657
// by a memory argument of the given form.
1658
var memBytes = [...]int8{
1662
xArgM16and16: (16 + 16) / 8,
1663
xArgM16colon16: (16 + 16) / 8,
1664
xArgM16colon32: (16 + 32) / 8,
1668
xArgM32and32: (32 + 32) / 8,
1677
xArgMoffs16: 16 / 8,
1678
xArgMoffs32: 32 / 8,
1679
xArgMoffs64: 64 / 8,
1688
xArgXmm2M128: 128 / 8,
1689
xArgYmm2M256: 256 / 8,
1690
xArgXmm2M16: 16 / 8,
1691
xArgXmm2M32: 32 / 8,
1692
xArgXmm2M64: 64 / 8,
1694
xArgXmmM128: 128 / 8,
1699
// isCondJmp records the conditional jumps.
1700
var isCondJmp = [maxOp + 1]bool{
1719
// isLoop records the loop operators.
1720
var isLoop = [maxOp + 1]bool{