podman
1261 строка · 33.4 Кб
1// Inferno utils/6l/pass.c
2// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/pass.c
3//
4// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
5// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
6// Portions Copyright © 1997-1999 Vita Nuova Limited
7// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
8// Portions Copyright © 2004,2006 Bruce Ellis
9// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
10// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
11// Portions Copyright © 2009 The Go Authors. All rights reserved.
12//
13// Permission is hereby granted, free of charge, to any person obtaining a copy
14// of this software and associated documentation files (the "Software"), to deal
15// in the Software without restriction, including without limitation the rights
16// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17// copies of the Software, and to permit persons to whom the Software is
18// furnished to do so, subject to the following conditions:
19//
20// The above copyright notice and this permission notice shall be included in
21// all copies or substantial portions of the Software.
22//
23// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29// THE SOFTWARE.
30
31package x86
32
33import (
34"github.com/twitchyliquid64/golang-asm/obj"
35"github.com/twitchyliquid64/golang-asm/objabi"
36"github.com/twitchyliquid64/golang-asm/src"
37"github.com/twitchyliquid64/golang-asm/sys"
38"math"
39"strings"
40)
41
42func CanUse1InsnTLS(ctxt *obj.Link) bool {
43if isAndroid {
44// Android uses a global variable for the tls offset.
45return false
46}
47
48if ctxt.Arch.Family == sys.I386 {
49switch ctxt.Headtype {
50case objabi.Hlinux,
51objabi.Hplan9,
52objabi.Hwindows:
53return false
54}
55
56return true
57}
58
59switch ctxt.Headtype {
60case objabi.Hplan9, objabi.Hwindows:
61return false
62case objabi.Hlinux, objabi.Hfreebsd:
63return !ctxt.Flag_shared
64}
65
66return true
67}
68
69func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
70// Thread-local storage references use the TLS pseudo-register.
71// As a register, TLS refers to the thread-local storage base, and it
72// can only be loaded into another register:
73//
74// MOVQ TLS, AX
75//
76// An offset from the thread-local storage base is written off(reg)(TLS*1).
77// Semantically it is off(reg), but the (TLS*1) annotation marks this as
78// indexing from the loaded TLS base. This emits a relocation so that
79// if the linker needs to adjust the offset, it can. For example:
80//
81// MOVQ TLS, AX
82// MOVQ 0(AX)(TLS*1), CX // load g into CX
83//
84// On systems that support direct access to the TLS memory, this
85// pair of instructions can be reduced to a direct TLS memory reference:
86//
87// MOVQ 0(TLS), CX // load g into CX
88//
89// The 2-instruction and 1-instruction forms correspond to the two code
90// sequences for loading a TLS variable in the local exec model given in "ELF
91// Handling For Thread-Local Storage".
92//
93// We apply this rewrite on systems that support the 1-instruction form.
94// The decision is made using only the operating system and the -shared flag,
95// not the link mode. If some link modes on a particular operating system
96// require the 2-instruction form, then all builds for that operating system
97// will use the 2-instruction form, so that the link mode decision can be
98// delayed to link time.
99//
100// In this way, all supported systems use identical instructions to
101// access TLS, and they are rewritten appropriately first here in
102// liblink and then finally using relocations in the linker.
103//
104// When -shared is passed, we leave the code in the 2-instruction form but
105// assemble (and relocate) them in different ways to generate the initial
106// exec code sequence. It's a bit of a fluke that this is possible without
107// rewriting the instructions more comprehensively, and it only does because
108// we only support a single TLS variable (g).
109
110if CanUse1InsnTLS(ctxt) {
111// Reduce 2-instruction sequence to 1-instruction sequence.
112// Sequences like
113// MOVQ TLS, BX
114// ... off(BX)(TLS*1) ...
115// become
116// NOP
117// ... off(TLS) ...
118//
119// TODO(rsc): Remove the Hsolaris special case. It exists only to
120// guarantee we are producing byte-identical binaries as before this code.
121// But it should be unnecessary.
122if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 && ctxt.Headtype != objabi.Hsolaris {
123obj.Nopout(p)
124}
125if p.From.Type == obj.TYPE_MEM && p.From.Index == REG_TLS && REG_AX <= p.From.Reg && p.From.Reg <= REG_R15 {
126p.From.Reg = REG_TLS
127p.From.Scale = 0
128p.From.Index = REG_NONE
129}
130
131if p.To.Type == obj.TYPE_MEM && p.To.Index == REG_TLS && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
132p.To.Reg = REG_TLS
133p.To.Scale = 0
134p.To.Index = REG_NONE
135}
136} else {
137// load_g_cx, below, always inserts the 1-instruction sequence. Rewrite it
138// as the 2-instruction sequence if necessary.
139// MOVQ 0(TLS), BX
140// becomes
141// MOVQ TLS, BX
142// MOVQ 0(BX)(TLS*1), BX
143if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
144q := obj.Appendp(p, newprog)
145q.As = p.As
146q.From = p.From
147q.From.Type = obj.TYPE_MEM
148q.From.Reg = p.To.Reg
149q.From.Index = REG_TLS
150q.From.Scale = 2 // TODO: use 1
151q.To = p.To
152p.From.Type = obj.TYPE_REG
153p.From.Reg = REG_TLS
154p.From.Index = REG_NONE
155p.From.Offset = 0
156}
157}
158
159// Android uses a tls offset determined at runtime. Rewrite
160// MOVQ TLS, BX
161// to
162// MOVQ runtime.tls_g(SB), BX
163if isAndroid && (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
164p.From.Type = obj.TYPE_MEM
165p.From.Name = obj.NAME_EXTERN
166p.From.Reg = REG_NONE
167p.From.Sym = ctxt.Lookup("runtime.tls_g")
168p.From.Index = REG_NONE
169}
170
171// TODO: Remove.
172if ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64 || ctxt.Headtype == objabi.Hplan9 {
173if p.From.Scale == 1 && p.From.Index == REG_TLS {
174p.From.Scale = 2
175}
176if p.To.Scale == 1 && p.To.Index == REG_TLS {
177p.To.Scale = 2
178}
179}
180
181// Rewrite 0 to $0 in 3rd argument to CMPPS etc.
182// That's what the tables expect.
183switch p.As {
184case ACMPPD, ACMPPS, ACMPSD, ACMPSS:
185if p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_NONE && p.To.Reg == REG_NONE && p.To.Index == REG_NONE && p.To.Sym == nil {
186p.To.Type = obj.TYPE_CONST
187}
188}
189
190// Rewrite CALL/JMP/RET to symbol as TYPE_BRANCH.
191switch p.As {
192case obj.ACALL, obj.AJMP, obj.ARET:
193if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil {
194p.To.Type = obj.TYPE_BRANCH
195}
196}
197
198// Rewrite MOVL/MOVQ $XXX(FP/SP) as LEAL/LEAQ.
199if p.From.Type == obj.TYPE_ADDR && (ctxt.Arch.Family == sys.AMD64 || p.From.Name != obj.NAME_EXTERN && p.From.Name != obj.NAME_STATIC) {
200switch p.As {
201case AMOVL:
202p.As = ALEAL
203p.From.Type = obj.TYPE_MEM
204case AMOVQ:
205p.As = ALEAQ
206p.From.Type = obj.TYPE_MEM
207}
208}
209
210// Rewrite float constants to values stored in memory.
211switch p.As {
212// Convert AMOVSS $(0), Xx to AXORPS Xx, Xx
213case AMOVSS:
214if p.From.Type == obj.TYPE_FCONST {
215// f == 0 can't be used here due to -0, so use Float64bits
216if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
217if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
218p.As = AXORPS
219p.From = p.To
220break
221}
222}
223}
224fallthrough
225
226case AFMOVF,
227AFADDF,
228AFSUBF,
229AFSUBRF,
230AFMULF,
231AFDIVF,
232AFDIVRF,
233AFCOMF,
234AFCOMFP,
235AADDSS,
236ASUBSS,
237AMULSS,
238ADIVSS,
239ACOMISS,
240AUCOMISS:
241if p.From.Type == obj.TYPE_FCONST {
242f32 := float32(p.From.Val.(float64))
243p.From.Type = obj.TYPE_MEM
244p.From.Name = obj.NAME_EXTERN
245p.From.Sym = ctxt.Float32Sym(f32)
246p.From.Offset = 0
247}
248
249case AMOVSD:
250// Convert AMOVSD $(0), Xx to AXORPS Xx, Xx
251if p.From.Type == obj.TYPE_FCONST {
252// f == 0 can't be used here due to -0, so use Float64bits
253if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
254if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
255p.As = AXORPS
256p.From = p.To
257break
258}
259}
260}
261fallthrough
262
263case AFMOVD,
264AFADDD,
265AFSUBD,
266AFSUBRD,
267AFMULD,
268AFDIVD,
269AFDIVRD,
270AFCOMD,
271AFCOMDP,
272AADDSD,
273ASUBSD,
274AMULSD,
275ADIVSD,
276ACOMISD,
277AUCOMISD:
278if p.From.Type == obj.TYPE_FCONST {
279f64 := p.From.Val.(float64)
280p.From.Type = obj.TYPE_MEM
281p.From.Name = obj.NAME_EXTERN
282p.From.Sym = ctxt.Float64Sym(f64)
283p.From.Offset = 0
284}
285}
286
287if ctxt.Flag_dynlink {
288rewriteToUseGot(ctxt, p, newprog)
289}
290
291if ctxt.Flag_shared && ctxt.Arch.Family == sys.I386 {
292rewriteToPcrel(ctxt, p, newprog)
293}
294}
295
296// Rewrite p, if necessary, to access global data via the global offset table.
297func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
298var lea, mov obj.As
299var reg int16
300if ctxt.Arch.Family == sys.AMD64 {
301lea = ALEAQ
302mov = AMOVQ
303reg = REG_R15
304} else {
305lea = ALEAL
306mov = AMOVL
307reg = REG_CX
308if p.As == ALEAL && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
309// Special case: clobber the destination register with
310// the PC so we don't have to clobber CX.
311// The SSA backend depends on CX not being clobbered across LEAL.
312// See cmd/compile/internal/ssa/gen/386.rules (search for Flag_shared).
313reg = p.To.Reg
314}
315}
316
317if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO {
318// ADUFFxxx $offset
319// becomes
320// $MOV runtime.duffxxx@GOT, $reg
321// $LEA $offset($reg), $reg
322// CALL $reg
323// (we use LEAx rather than ADDx because ADDx clobbers
324// flags and duffzero on 386 does not otherwise do so).
325var sym *obj.LSym
326if p.As == obj.ADUFFZERO {
327sym = ctxt.Lookup("runtime.duffzero")
328} else {
329sym = ctxt.Lookup("runtime.duffcopy")
330}
331offset := p.To.Offset
332p.As = mov
333p.From.Type = obj.TYPE_MEM
334p.From.Name = obj.NAME_GOTREF
335p.From.Sym = sym
336p.To.Type = obj.TYPE_REG
337p.To.Reg = reg
338p.To.Offset = 0
339p.To.Sym = nil
340p1 := obj.Appendp(p, newprog)
341p1.As = lea
342p1.From.Type = obj.TYPE_MEM
343p1.From.Offset = offset
344p1.From.Reg = reg
345p1.To.Type = obj.TYPE_REG
346p1.To.Reg = reg
347p2 := obj.Appendp(p1, newprog)
348p2.As = obj.ACALL
349p2.To.Type = obj.TYPE_REG
350p2.To.Reg = reg
351}
352
353// We only care about global data: NAME_EXTERN means a global
354// symbol in the Go sense, and p.Sym.Local is true for a few
355// internally defined symbols.
356if p.As == lea && p.From.Type == obj.TYPE_MEM && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
357// $LEA sym, Rx becomes $MOV $sym, Rx which will be rewritten below
358p.As = mov
359p.From.Type = obj.TYPE_ADDR
360}
361if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
362// $MOV $sym, Rx becomes $MOV sym@GOT, Rx
363// $MOV $sym+<off>, Rx becomes $MOV sym@GOT, Rx; $LEA <off>(Rx), Rx
364// On 386 only, more complicated things like PUSHL $sym become $MOV sym@GOT, CX; PUSHL CX
365cmplxdest := false
366pAs := p.As
367var dest obj.Addr
368if p.To.Type != obj.TYPE_REG || pAs != mov {
369if ctxt.Arch.Family == sys.AMD64 {
370ctxt.Diag("do not know how to handle LEA-type insn to non-register in %v with -dynlink", p)
371}
372cmplxdest = true
373dest = p.To
374p.As = mov
375p.To.Type = obj.TYPE_REG
376p.To.Reg = reg
377p.To.Sym = nil
378p.To.Name = obj.NAME_NONE
379}
380p.From.Type = obj.TYPE_MEM
381p.From.Name = obj.NAME_GOTREF
382q := p
383if p.From.Offset != 0 {
384q = obj.Appendp(p, newprog)
385q.As = lea
386q.From.Type = obj.TYPE_MEM
387q.From.Reg = p.To.Reg
388q.From.Offset = p.From.Offset
389q.To = p.To
390p.From.Offset = 0
391}
392if cmplxdest {
393q = obj.Appendp(q, newprog)
394q.As = pAs
395q.To = dest
396q.From.Type = obj.TYPE_REG
397q.From.Reg = reg
398}
399}
400if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN {
401ctxt.Diag("don't know how to handle %v with -dynlink", p)
402}
403var source *obj.Addr
404// MOVx sym, Ry becomes $MOV sym@GOT, R15; MOVx (R15), Ry
405// MOVx Ry, sym becomes $MOV sym@GOT, R15; MOVx Ry, (R15)
406// An addition may be inserted between the two MOVs if there is an offset.
407if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
408if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
409ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p)
410}
411source = &p.From
412} else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
413source = &p.To
414} else {
415return
416}
417if p.As == obj.ACALL {
418// When dynlinking on 386, almost any call might end up being a call
419// to a PLT, so make sure the GOT pointer is loaded into BX.
420// RegTo2 is set on the replacement call insn to stop it being
421// processed when it is in turn passed to progedit.
422//
423// We disable open-coded defers in buildssa() on 386 ONLY with shared
424// libraries because of this extra code added before deferreturn calls.
425if ctxt.Arch.Family == sys.AMD64 || (p.To.Sym != nil && p.To.Sym.Local()) || p.RegTo2 != 0 {
426return
427}
428p1 := obj.Appendp(p, newprog)
429p2 := obj.Appendp(p1, newprog)
430
431p1.As = ALEAL
432p1.From.Type = obj.TYPE_MEM
433p1.From.Name = obj.NAME_STATIC
434p1.From.Sym = ctxt.Lookup("_GLOBAL_OFFSET_TABLE_")
435p1.To.Type = obj.TYPE_REG
436p1.To.Reg = REG_BX
437
438p2.As = p.As
439p2.Scond = p.Scond
440p2.From = p.From
441if p.RestArgs != nil {
442p2.RestArgs = append(p2.RestArgs, p.RestArgs...)
443}
444p2.Reg = p.Reg
445p2.To = p.To
446// p.To.Type was set to TYPE_BRANCH above, but that makes checkaddr
447// in ../pass.go complain, so set it back to TYPE_MEM here, until p2
448// itself gets passed to progedit.
449p2.To.Type = obj.TYPE_MEM
450p2.RegTo2 = 1
451
452obj.Nopout(p)
453return
454
455}
456if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP {
457return
458}
459if source.Type != obj.TYPE_MEM {
460ctxt.Diag("don't know how to handle %v with -dynlink", p)
461}
462p1 := obj.Appendp(p, newprog)
463p2 := obj.Appendp(p1, newprog)
464
465p1.As = mov
466p1.From.Type = obj.TYPE_MEM
467p1.From.Sym = source.Sym
468p1.From.Name = obj.NAME_GOTREF
469p1.To.Type = obj.TYPE_REG
470p1.To.Reg = reg
471
472p2.As = p.As
473p2.From = p.From
474p2.To = p.To
475if p.From.Name == obj.NAME_EXTERN {
476p2.From.Reg = reg
477p2.From.Name = obj.NAME_NONE
478p2.From.Sym = nil
479} else if p.To.Name == obj.NAME_EXTERN {
480p2.To.Reg = reg
481p2.To.Name = obj.NAME_NONE
482p2.To.Sym = nil
483} else {
484return
485}
486obj.Nopout(p)
487}
488
489func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
490// RegTo2 is set on the instructions we insert here so they don't get
491// processed twice.
492if p.RegTo2 != 0 {
493return
494}
495if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP {
496return
497}
498// Any Prog (aside from the above special cases) with an Addr with Name ==
499// NAME_EXTERN, NAME_STATIC or NAME_GOTREF has a CALL __x86.get_pc_thunk.XX
500// inserted before it.
501isName := func(a *obj.Addr) bool {
502if a.Sym == nil || (a.Type != obj.TYPE_MEM && a.Type != obj.TYPE_ADDR) || a.Reg != 0 {
503return false
504}
505if a.Sym.Type == objabi.STLSBSS {
506return false
507}
508return a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_STATIC || a.Name == obj.NAME_GOTREF
509}
510
511if isName(&p.From) && p.From.Type == obj.TYPE_ADDR {
512// Handle things like "MOVL $sym, (SP)" or "PUSHL $sym" by rewriting
513// to "MOVL $sym, CX; MOVL CX, (SP)" or "MOVL $sym, CX; PUSHL CX"
514// respectively.
515if p.To.Type != obj.TYPE_REG {
516q := obj.Appendp(p, newprog)
517q.As = p.As
518q.From.Type = obj.TYPE_REG
519q.From.Reg = REG_CX
520q.To = p.To
521p.As = AMOVL
522p.To.Type = obj.TYPE_REG
523p.To.Reg = REG_CX
524p.To.Sym = nil
525p.To.Name = obj.NAME_NONE
526}
527}
528
529if !isName(&p.From) && !isName(&p.To) && (p.GetFrom3() == nil || !isName(p.GetFrom3())) {
530return
531}
532var dst int16 = REG_CX
533if (p.As == ALEAL || p.As == AMOVL) && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
534dst = p.To.Reg
535// Why? See the comment near the top of rewriteToUseGot above.
536// AMOVLs might be introduced by the GOT rewrites.
537}
538q := obj.Appendp(p, newprog)
539q.RegTo2 = 1
540r := obj.Appendp(q, newprog)
541r.RegTo2 = 1
542q.As = obj.ACALL
543thunkname := "__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))
544q.To.Sym = ctxt.LookupInit(thunkname, func(s *obj.LSym) { s.Set(obj.AttrLocal, true) })
545q.To.Type = obj.TYPE_MEM
546q.To.Name = obj.NAME_EXTERN
547r.As = p.As
548r.Scond = p.Scond
549r.From = p.From
550r.RestArgs = p.RestArgs
551r.Reg = p.Reg
552r.To = p.To
553if isName(&p.From) {
554r.From.Reg = dst
555}
556if isName(&p.To) {
557r.To.Reg = dst
558}
559if p.GetFrom3() != nil && isName(p.GetFrom3()) {
560r.GetFrom3().Reg = dst
561}
562obj.Nopout(p)
563}
564
565func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
566if cursym.Func.Text == nil || cursym.Func.Text.Link == nil {
567return
568}
569
570p := cursym.Func.Text
571autoffset := int32(p.To.Offset)
572if autoffset < 0 {
573autoffset = 0
574}
575
576hasCall := false
577for q := p; q != nil; q = q.Link {
578if q.As == obj.ACALL || q.As == obj.ADUFFCOPY || q.As == obj.ADUFFZERO {
579hasCall = true
580break
581}
582}
583
584var bpsize int
585if ctxt.Arch.Family == sys.AMD64 &&
586!p.From.Sym.NoFrame() && // (1) below
587!(autoffset == 0 && p.From.Sym.NoSplit()) && // (2) below
588!(autoffset == 0 && !hasCall) { // (3) below
589// Make room to save a base pointer.
590// There are 2 cases we must avoid:
591// 1) If noframe is set (which we do for functions which tail call).
592// 2) Scary runtime internals which would be all messed up by frame pointers.
593// We detect these using a heuristic: frameless nosplit functions.
594// TODO: Maybe someday we label them all with NOFRAME and get rid of this heuristic.
595// For performance, we also want to avoid:
596// 3) Frameless leaf functions
597bpsize = ctxt.Arch.PtrSize
598autoffset += int32(bpsize)
599p.To.Offset += int64(bpsize)
600} else {
601bpsize = 0
602}
603
604textarg := int64(p.To.Val.(int32))
605cursym.Func.Args = int32(textarg)
606cursym.Func.Locals = int32(p.To.Offset)
607
608// TODO(rsc): Remove.
609if ctxt.Arch.Family == sys.I386 && cursym.Func.Locals < 0 {
610cursym.Func.Locals = 0
611}
612
613// TODO(rsc): Remove 'ctxt.Arch.Family == sys.AMD64 &&'.
614if ctxt.Arch.Family == sys.AMD64 && autoffset < objabi.StackSmall && !p.From.Sym.NoSplit() {
615leaf := true
616LeafSearch:
617for q := p; q != nil; q = q.Link {
618switch q.As {
619case obj.ACALL:
620// Treat common runtime calls that take no arguments
621// the same as duffcopy and duffzero.
622if !isZeroArgRuntimeCall(q.To.Sym) {
623leaf = false
624break LeafSearch
625}
626fallthrough
627case obj.ADUFFCOPY, obj.ADUFFZERO:
628if autoffset >= objabi.StackSmall-8 {
629leaf = false
630break LeafSearch
631}
632}
633}
634
635if leaf {
636p.From.Sym.Set(obj.AttrNoSplit, true)
637}
638}
639
640if !p.From.Sym.NoSplit() || p.From.Sym.Wrapper() {
641p = obj.Appendp(p, newprog)
642p = load_g_cx(ctxt, p, newprog) // load g into CX
643}
644
645if !cursym.Func.Text.From.Sym.NoSplit() {
646p = stacksplit(ctxt, cursym, p, newprog, autoffset, int32(textarg)) // emit split check
647}
648
649// Delve debugger would like the next instruction to be noted as the end of the function prologue.
650// TODO: are there other cases (e.g., wrapper functions) that need marking?
651markedPrologue := false
652
653if autoffset != 0 {
654if autoffset%int32(ctxt.Arch.RegSize) != 0 {
655ctxt.Diag("unaligned stack size %d", autoffset)
656}
657p = obj.Appendp(p, newprog)
658p.As = AADJSP
659p.From.Type = obj.TYPE_CONST
660p.From.Offset = int64(autoffset)
661p.Spadj = autoffset
662p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
663markedPrologue = true
664}
665
666if bpsize > 0 {
667// Save caller's BP
668p = obj.Appendp(p, newprog)
669
670p.As = AMOVQ
671p.From.Type = obj.TYPE_REG
672p.From.Reg = REG_BP
673p.To.Type = obj.TYPE_MEM
674p.To.Reg = REG_SP
675p.To.Scale = 1
676p.To.Offset = int64(autoffset) - int64(bpsize)
677if !markedPrologue {
678p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
679}
680
681// Move current frame to BP
682p = obj.Appendp(p, newprog)
683
684p.As = ALEAQ
685p.From.Type = obj.TYPE_MEM
686p.From.Reg = REG_SP
687p.From.Scale = 1
688p.From.Offset = int64(autoffset) - int64(bpsize)
689p.To.Type = obj.TYPE_REG
690p.To.Reg = REG_BP
691}
692
693if cursym.Func.Text.From.Sym.Wrapper() {
694// if g._panic != nil && g._panic.argp == FP {
695// g._panic.argp = bottom-of-frame
696// }
697//
698// MOVQ g_panic(CX), BX
699// TESTQ BX, BX
700// JNE checkargp
701// end:
702// NOP
703// ... rest of function ...
704// checkargp:
705// LEAQ (autoffset+8)(SP), DI
706// CMPQ panic_argp(BX), DI
707// JNE end
708// MOVQ SP, panic_argp(BX)
709// JMP end
710//
711// The NOP is needed to give the jumps somewhere to land.
712// It is a liblink NOP, not an x86 NOP: it encodes to 0 instruction bytes.
713//
714// The layout is chosen to help static branch prediction:
715// Both conditional jumps are unlikely, so they are arranged to be forward jumps.
716
717// MOVQ g_panic(CX), BX
718p = obj.Appendp(p, newprog)
719p.As = AMOVQ
720p.From.Type = obj.TYPE_MEM
721p.From.Reg = REG_CX
722p.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // g_panic
723p.To.Type = obj.TYPE_REG
724p.To.Reg = REG_BX
725if ctxt.Arch.Family == sys.I386 {
726p.As = AMOVL
727}
728
729// TESTQ BX, BX
730p = obj.Appendp(p, newprog)
731p.As = ATESTQ
732p.From.Type = obj.TYPE_REG
733p.From.Reg = REG_BX
734p.To.Type = obj.TYPE_REG
735p.To.Reg = REG_BX
736if ctxt.Arch.Family == sys.I386 {
737p.As = ATESTL
738}
739
740// JNE checkargp (checkargp to be resolved later)
741jne := obj.Appendp(p, newprog)
742jne.As = AJNE
743jne.To.Type = obj.TYPE_BRANCH
744
745// end:
746// NOP
747end := obj.Appendp(jne, newprog)
748end.As = obj.ANOP
749
750// Fast forward to end of function.
751var last *obj.Prog
752for last = end; last.Link != nil; last = last.Link {
753}
754
755// LEAQ (autoffset+8)(SP), DI
756p = obj.Appendp(last, newprog)
757p.As = ALEAQ
758p.From.Type = obj.TYPE_MEM
759p.From.Reg = REG_SP
760p.From.Offset = int64(autoffset) + int64(ctxt.Arch.RegSize)
761p.To.Type = obj.TYPE_REG
762p.To.Reg = REG_DI
763if ctxt.Arch.Family == sys.I386 {
764p.As = ALEAL
765}
766
767// Set jne branch target.
768jne.To.SetTarget(p)
769
770// CMPQ panic_argp(BX), DI
771p = obj.Appendp(p, newprog)
772p.As = ACMPQ
773p.From.Type = obj.TYPE_MEM
774p.From.Reg = REG_BX
775p.From.Offset = 0 // Panic.argp
776p.To.Type = obj.TYPE_REG
777p.To.Reg = REG_DI
778if ctxt.Arch.Family == sys.I386 {
779p.As = ACMPL
780}
781
782// JNE end
783p = obj.Appendp(p, newprog)
784p.As = AJNE
785p.To.Type = obj.TYPE_BRANCH
786p.To.SetTarget(end)
787
788// MOVQ SP, panic_argp(BX)
789p = obj.Appendp(p, newprog)
790p.As = AMOVQ
791p.From.Type = obj.TYPE_REG
792p.From.Reg = REG_SP
793p.To.Type = obj.TYPE_MEM
794p.To.Reg = REG_BX
795p.To.Offset = 0 // Panic.argp
796if ctxt.Arch.Family == sys.I386 {
797p.As = AMOVL
798}
799
800// JMP end
801p = obj.Appendp(p, newprog)
802p.As = obj.AJMP
803p.To.Type = obj.TYPE_BRANCH
804p.To.SetTarget(end)
805
806// Reset p for following code.
807p = end
808}
809
810var deltasp int32
811for p = cursym.Func.Text; p != nil; p = p.Link {
812pcsize := ctxt.Arch.RegSize
813switch p.From.Name {
814case obj.NAME_AUTO:
815p.From.Offset += int64(deltasp) - int64(bpsize)
816case obj.NAME_PARAM:
817p.From.Offset += int64(deltasp) + int64(pcsize)
818}
819if p.GetFrom3() != nil {
820switch p.GetFrom3().Name {
821case obj.NAME_AUTO:
822p.GetFrom3().Offset += int64(deltasp) - int64(bpsize)
823case obj.NAME_PARAM:
824p.GetFrom3().Offset += int64(deltasp) + int64(pcsize)
825}
826}
827switch p.To.Name {
828case obj.NAME_AUTO:
829p.To.Offset += int64(deltasp) - int64(bpsize)
830case obj.NAME_PARAM:
831p.To.Offset += int64(deltasp) + int64(pcsize)
832}
833
834switch p.As {
835default:
836continue
837
838case APUSHL, APUSHFL:
839deltasp += 4
840p.Spadj = 4
841continue
842
843case APUSHQ, APUSHFQ:
844deltasp += 8
845p.Spadj = 8
846continue
847
848case APUSHW, APUSHFW:
849deltasp += 2
850p.Spadj = 2
851continue
852
853case APOPL, APOPFL:
854deltasp -= 4
855p.Spadj = -4
856continue
857
858case APOPQ, APOPFQ:
859deltasp -= 8
860p.Spadj = -8
861continue
862
863case APOPW, APOPFW:
864deltasp -= 2
865p.Spadj = -2
866continue
867
868case AADJSP:
869p.Spadj = int32(p.From.Offset)
870deltasp += int32(p.From.Offset)
871continue
872
873case obj.ARET:
874// do nothing
875}
876
877if autoffset != deltasp {
878ctxt.Diag("unbalanced PUSH/POP")
879}
880
881if autoffset != 0 {
882to := p.To // Keep To attached to RET for retjmp below
883p.To = obj.Addr{}
884if bpsize > 0 {
885// Restore caller's BP
886p.As = AMOVQ
887
888p.From.Type = obj.TYPE_MEM
889p.From.Reg = REG_SP
890p.From.Scale = 1
891p.From.Offset = int64(autoffset) - int64(bpsize)
892p.To.Type = obj.TYPE_REG
893p.To.Reg = REG_BP
894p = obj.Appendp(p, newprog)
895}
896
897p.As = AADJSP
898p.From.Type = obj.TYPE_CONST
899p.From.Offset = int64(-autoffset)
900p.Spadj = -autoffset
901p = obj.Appendp(p, newprog)
902p.As = obj.ARET
903p.To = to
904
905// If there are instructions following
906// this ARET, they come from a branch
907// with the same stackframe, so undo
908// the cleanup.
909p.Spadj = +autoffset
910}
911
912if p.To.Sym != nil { // retjmp
913p.As = obj.AJMP
914}
915}
916}
917
918func isZeroArgRuntimeCall(s *obj.LSym) bool {
919if s == nil {
920return false
921}
922switch s.Name {
923case "runtime.panicdivide", "runtime.panicwrap", "runtime.panicshift":
924return true
925}
926if strings.HasPrefix(s.Name, "runtime.panicIndex") || strings.HasPrefix(s.Name, "runtime.panicSlice") {
927// These functions do take arguments (in registers),
928// but use no stack before they do a stack check. We
929// should include them. See issue 31219.
930return true
931}
932return false
933}
934
935func indir_cx(ctxt *obj.Link, a *obj.Addr) {
936a.Type = obj.TYPE_MEM
937a.Reg = REG_CX
938}
939
940// Append code to p to load g into cx.
941// Overwrites p with the first instruction (no first appendp).
942// Overwriting p is unusual but it lets use this in both the
943// prologue (caller must call appendp first) and in the epilogue.
944// Returns last new instruction.
945func load_g_cx(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) *obj.Prog {
946p.As = AMOVQ
947if ctxt.Arch.PtrSize == 4 {
948p.As = AMOVL
949}
950p.From.Type = obj.TYPE_MEM
951p.From.Reg = REG_TLS
952p.From.Offset = 0
953p.To.Type = obj.TYPE_REG
954p.To.Reg = REG_CX
955
956next := p.Link
957progedit(ctxt, p, newprog)
958for p.Link != next {
959p = p.Link
960progedit(ctxt, p, newprog)
961}
962
963if p.From.Index == REG_TLS {
964p.From.Scale = 2
965}
966
967return p
968}
969
970// Append code to p to check for stack split.
971// Appends to (does not overwrite) p.
972// Assumes g is in CX.
973// Returns last new instruction.
974func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc, framesize int32, textarg int32) *obj.Prog {
975cmp := ACMPQ
976lea := ALEAQ
977mov := AMOVQ
978sub := ASUBQ
979
980if ctxt.Arch.Family == sys.I386 {
981cmp = ACMPL
982lea = ALEAL
983mov = AMOVL
984sub = ASUBL
985}
986
987var q1 *obj.Prog
988if framesize <= objabi.StackSmall {
989// small stack: SP <= stackguard
990// CMPQ SP, stackguard
991p = obj.Appendp(p, newprog)
992
993p.As = cmp
994p.From.Type = obj.TYPE_REG
995p.From.Reg = REG_SP
996indir_cx(ctxt, &p.To)
997p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
998if cursym.CFunc() {
999p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
1000}
1001
1002// Mark the stack bound check and morestack call async nonpreemptible.
1003// If we get preempted here, when resumed the preemption request is
1004// cleared, but we'll still call morestack, which will double the stack
1005// unnecessarily. See issue #35470.
1006p = ctxt.StartUnsafePoint(p, newprog)
1007} else if framesize <= objabi.StackBig {
1008// large stack: SP-framesize <= stackguard-StackSmall
1009// LEAQ -xxx(SP), AX
1010// CMPQ AX, stackguard
1011p = obj.Appendp(p, newprog)
1012
1013p.As = lea
1014p.From.Type = obj.TYPE_MEM
1015p.From.Reg = REG_SP
1016p.From.Offset = -(int64(framesize) - objabi.StackSmall)
1017p.To.Type = obj.TYPE_REG
1018p.To.Reg = REG_AX
1019
1020p = obj.Appendp(p, newprog)
1021p.As = cmp
1022p.From.Type = obj.TYPE_REG
1023p.From.Reg = REG_AX
1024indir_cx(ctxt, &p.To)
1025p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
1026if cursym.CFunc() {
1027p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
1028}
1029
1030p = ctxt.StartUnsafePoint(p, newprog) // see the comment above
1031} else {
1032// Such a large stack we need to protect against wraparound.
1033// If SP is close to zero:
1034// SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall)
1035// The +StackGuard on both sides is required to keep the left side positive:
1036// SP is allowed to be slightly below stackguard. See stack.h.
1037//
1038// Preemption sets stackguard to StackPreempt, a very large value.
1039// That breaks the math above, so we have to check for that explicitly.
1040// MOVQ stackguard, SI
1041// CMPQ SI, $StackPreempt
1042// JEQ label-of-call-to-morestack
1043// LEAQ StackGuard(SP), AX
1044// SUBQ SI, AX
1045// CMPQ AX, $(framesize+(StackGuard-StackSmall))
1046
1047p = obj.Appendp(p, newprog)
1048
1049p.As = mov
1050indir_cx(ctxt, &p.From)
1051p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
1052if cursym.CFunc() {
1053p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
1054}
1055p.To.Type = obj.TYPE_REG
1056p.To.Reg = REG_SI
1057
1058p = ctxt.StartUnsafePoint(p, newprog) // see the comment above
1059
1060p = obj.Appendp(p, newprog)
1061p.As = cmp
1062p.From.Type = obj.TYPE_REG
1063p.From.Reg = REG_SI
1064p.To.Type = obj.TYPE_CONST
1065p.To.Offset = objabi.StackPreempt
1066if ctxt.Arch.Family == sys.I386 {
1067p.To.Offset = int64(uint32(objabi.StackPreempt & (1<<32 - 1)))
1068}
1069
1070p = obj.Appendp(p, newprog)
1071p.As = AJEQ
1072p.To.Type = obj.TYPE_BRANCH
1073q1 = p
1074
1075p = obj.Appendp(p, newprog)
1076p.As = lea
1077p.From.Type = obj.TYPE_MEM
1078p.From.Reg = REG_SP
1079p.From.Offset = int64(objabi.StackGuard)
1080p.To.Type = obj.TYPE_REG
1081p.To.Reg = REG_AX
1082
1083p = obj.Appendp(p, newprog)
1084p.As = sub
1085p.From.Type = obj.TYPE_REG
1086p.From.Reg = REG_SI
1087p.To.Type = obj.TYPE_REG
1088p.To.Reg = REG_AX
1089
1090p = obj.Appendp(p, newprog)
1091p.As = cmp
1092p.From.Type = obj.TYPE_REG
1093p.From.Reg = REG_AX
1094p.To.Type = obj.TYPE_CONST
1095p.To.Offset = int64(framesize) + (int64(objabi.StackGuard) - objabi.StackSmall)
1096}
1097
1098// common
1099jls := obj.Appendp(p, newprog)
1100jls.As = AJLS
1101jls.To.Type = obj.TYPE_BRANCH
1102
1103end := ctxt.EndUnsafePoint(jls, newprog, -1)
1104
1105var last *obj.Prog
1106for last = cursym.Func.Text; last.Link != nil; last = last.Link {
1107}
1108
1109// Now we are at the end of the function, but logically
1110// we are still in function prologue. We need to fix the
1111// SP data and PCDATA.
1112spfix := obj.Appendp(last, newprog)
1113spfix.As = obj.ANOP
1114spfix.Spadj = -framesize
1115
1116pcdata := ctxt.EmitEntryStackMap(cursym, spfix, newprog)
1117pcdata = ctxt.StartUnsafePoint(pcdata, newprog)
1118
1119call := obj.Appendp(pcdata, newprog)
1120call.Pos = cursym.Func.Text.Pos
1121call.As = obj.ACALL
1122call.To.Type = obj.TYPE_BRANCH
1123call.To.Name = obj.NAME_EXTERN
1124morestack := "runtime.morestack"
1125switch {
1126case cursym.CFunc():
1127morestack = "runtime.morestackc"
1128case !cursym.Func.Text.From.Sym.NeedCtxt():
1129morestack = "runtime.morestack_noctxt"
1130}
1131call.To.Sym = ctxt.Lookup(morestack)
1132// When compiling 386 code for dynamic linking, the call needs to be adjusted
1133// to follow PIC rules. This in turn can insert more instructions, so we need
1134// to keep track of the start of the call (where the jump will be to) and the
1135// end (which following instructions are appended to).
1136callend := call
1137progedit(ctxt, callend, newprog)
1138for ; callend.Link != nil; callend = callend.Link {
1139progedit(ctxt, callend.Link, newprog)
1140}
1141
1142pcdata = ctxt.EndUnsafePoint(callend, newprog, -1)
1143
1144jmp := obj.Appendp(pcdata, newprog)
1145jmp.As = obj.AJMP
1146jmp.To.Type = obj.TYPE_BRANCH
1147jmp.To.SetTarget(cursym.Func.Text.Link)
1148jmp.Spadj = +framesize
1149
1150jls.To.SetTarget(call)
1151if q1 != nil {
1152q1.To.SetTarget(call)
1153}
1154
1155return end
1156}
1157
1158var unaryDst = map[obj.As]bool{
1159ABSWAPL: true,
1160ABSWAPQ: true,
1161ACLDEMOTE: true,
1162ACLFLUSH: true,
1163ACLFLUSHOPT: true,
1164ACLWB: true,
1165ACMPXCHG16B: true,
1166ACMPXCHG8B: true,
1167ADECB: true,
1168ADECL: true,
1169ADECQ: true,
1170ADECW: true,
1171AFBSTP: true,
1172AFFREE: true,
1173AFLDENV: true,
1174AFSAVE: true,
1175AFSTCW: true,
1176AFSTENV: true,
1177AFSTSW: true,
1178AFXSAVE64: true,
1179AFXSAVE: true,
1180AINCB: true,
1181AINCL: true,
1182AINCQ: true,
1183AINCW: true,
1184ANEGB: true,
1185ANEGL: true,
1186ANEGQ: true,
1187ANEGW: true,
1188ANOTB: true,
1189ANOTL: true,
1190ANOTQ: true,
1191ANOTW: true,
1192APOPL: true,
1193APOPQ: true,
1194APOPW: true,
1195ARDFSBASEL: true,
1196ARDFSBASEQ: true,
1197ARDGSBASEL: true,
1198ARDGSBASEQ: true,
1199ARDRANDL: true,
1200ARDRANDQ: true,
1201ARDRANDW: true,
1202ARDSEEDL: true,
1203ARDSEEDQ: true,
1204ARDSEEDW: true,
1205ASETCC: true,
1206ASETCS: true,
1207ASETEQ: true,
1208ASETGE: true,
1209ASETGT: true,
1210ASETHI: true,
1211ASETLE: true,
1212ASETLS: true,
1213ASETLT: true,
1214ASETMI: true,
1215ASETNE: true,
1216ASETOC: true,
1217ASETOS: true,
1218ASETPC: true,
1219ASETPL: true,
1220ASETPS: true,
1221ASGDT: true,
1222ASIDT: true,
1223ASLDTL: true,
1224ASLDTQ: true,
1225ASLDTW: true,
1226ASMSWL: true,
1227ASMSWQ: true,
1228ASMSWW: true,
1229ASTMXCSR: true,
1230ASTRL: true,
1231ASTRQ: true,
1232ASTRW: true,
1233AXSAVE64: true,
1234AXSAVE: true,
1235AXSAVEC64: true,
1236AXSAVEC: true,
1237AXSAVEOPT64: true,
1238AXSAVEOPT: true,
1239AXSAVES64: true,
1240AXSAVES: true,
1241}
1242
1243var Linkamd64 = obj.LinkArch{
1244Arch: sys.ArchAMD64,
1245Init: instinit,
1246Preprocess: preprocess,
1247Assemble: span6,
1248Progedit: progedit,
1249UnaryDst: unaryDst,
1250DWARFRegisters: AMD64DWARFRegisters,
1251}
1252
1253var Link386 = obj.LinkArch{
1254Arch: sys.Arch386,
1255Init: instinit,
1256Preprocess: preprocess,
1257Assemble: span6,
1258Progedit: progedit,
1259UnaryDst: unaryDst,
1260DWARFRegisters: X86DWARFRegisters,
1261}
1262