podman
1457 строк · 28.7 Кб
1// cmd/9l/noop.c, cmd/9l/pass.c, cmd/9l/span.c from Vita Nuova.
2//
3// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
4// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
5// Portions Copyright © 1997-1999 Vita Nuova Limited
6// Portions Copyright © 2000-2008 Vita Nuova Holdings Limited (www.vitanuova.com)
7// Portions Copyright © 2004,2006 Bruce Ellis
8// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
9// Revisions Copyright © 2000-2008 Lucent Technologies Inc. and others
10// Portions Copyright © 2009 The Go Authors. All rights reserved.
11//
12// Permission is hereby granted, free of charge, to any person obtaining a copy
13// of this software and associated documentation files (the "Software"), to deal
14// in the Software without restriction, including without limitation the rights
15// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16// copies of the Software, and to permit persons to whom the Software is
17// furnished to do so, subject to the following conditions:
18//
19// The above copyright notice and this permission notice shall be included in
20// all copies or substantial portions of the Software.
21//
22// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
28// THE SOFTWARE.
29
30package mips31
32import (33"github.com/twitchyliquid64/golang-asm/obj"34"github.com/twitchyliquid64/golang-asm/objabi"35"github.com/twitchyliquid64/golang-asm/sys"36"encoding/binary"37"fmt"38"math"39)
40
41func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {42c := ctxt0{ctxt: ctxt, newprog: newprog}43
44p.From.Class = 045p.To.Class = 046
47// Rewrite JMP/JAL to symbol as TYPE_BRANCH.48switch p.As {49case AJMP,50AJAL,51ARET,52obj.ADUFFZERO,53obj.ADUFFCOPY:54if p.To.Sym != nil {55p.To.Type = obj.TYPE_BRANCH56}57}58
59// Rewrite float constants to values stored in memory.60switch p.As {61case AMOVF:62if p.From.Type == obj.TYPE_FCONST {63f32 := float32(p.From.Val.(float64))64if math.Float32bits(f32) == 0 {65p.As = AMOVW66p.From.Type = obj.TYPE_REG67p.From.Reg = REGZERO68break69}70p.From.Type = obj.TYPE_MEM71p.From.Sym = ctxt.Float32Sym(f32)72p.From.Name = obj.NAME_EXTERN73p.From.Offset = 074}75
76case AMOVD:77if p.From.Type == obj.TYPE_FCONST {78f64 := p.From.Val.(float64)79if math.Float64bits(f64) == 0 && c.ctxt.Arch.Family == sys.MIPS64 {80p.As = AMOVV81p.From.Type = obj.TYPE_REG82p.From.Reg = REGZERO83break84}85p.From.Type = obj.TYPE_MEM86p.From.Sym = ctxt.Float64Sym(f64)87p.From.Name = obj.NAME_EXTERN88p.From.Offset = 089}90
91// Put >32-bit constants in memory and load them92case AMOVV:93if p.From.Type == obj.TYPE_CONST && p.From.Name == obj.NAME_NONE && p.From.Reg == 0 && int64(int32(p.From.Offset)) != p.From.Offset {94p.From.Type = obj.TYPE_MEM95p.From.Sym = ctxt.Int64Sym(p.From.Offset)96p.From.Name = obj.NAME_EXTERN97p.From.Offset = 098}99}100
101// Rewrite SUB constants into ADD.102switch p.As {103case ASUB:104if p.From.Type == obj.TYPE_CONST {105p.From.Offset = -p.From.Offset106p.As = AADD107}108
109case ASUBU:110if p.From.Type == obj.TYPE_CONST {111p.From.Offset = -p.From.Offset112p.As = AADDU113}114
115case ASUBV:116if p.From.Type == obj.TYPE_CONST {117p.From.Offset = -p.From.Offset118p.As = AADDV119}120
121case ASUBVU:122if p.From.Type == obj.TYPE_CONST {123p.From.Offset = -p.From.Offset124p.As = AADDVU125}126}127}
128
129func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {130// TODO(minux): add morestack short-cuts with small fixed frame-size.131c := ctxt0{ctxt: ctxt, newprog: newprog, cursym: cursym}132
133// a switch for enabling/disabling instruction scheduling134nosched := true135
136if c.cursym.Func.Text == nil || c.cursym.Func.Text.Link == nil {137return138}139
140p := c.cursym.Func.Text141textstksiz := p.To.Offset142if textstksiz == -ctxt.FixedFrameSize() {143// Historical way to mark NOFRAME.144p.From.Sym.Set(obj.AttrNoFrame, true)145textstksiz = 0146}147if textstksiz < 0 {148c.ctxt.Diag("negative frame size %d - did you mean NOFRAME?", textstksiz)149}150if p.From.Sym.NoFrame() {151if textstksiz != 0 {152c.ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", textstksiz)153}154}155
156c.cursym.Func.Args = p.To.Val.(int32)157c.cursym.Func.Locals = int32(textstksiz)158
159/*160* find leaf subroutines
161* expand RET
162* expand BECOME pseudo
163*/
164
165for p := c.cursym.Func.Text; p != nil; p = p.Link {166switch p.As {167/* too hard, just leave alone */168case obj.ATEXT:169p.Mark |= LABEL | LEAF | SYNC170if p.Link != nil {171p.Link.Mark |= LABEL172}173
174/* too hard, just leave alone */175case AMOVW,176AMOVV:177if p.To.Type == obj.TYPE_REG && p.To.Reg >= REG_SPECIAL {178p.Mark |= LABEL | SYNC179break180}181if p.From.Type == obj.TYPE_REG && p.From.Reg >= REG_SPECIAL {182p.Mark |= LABEL | SYNC183}184
185/* too hard, just leave alone */186case ASYSCALL,187AWORD,188ATLBWR,189ATLBWI,190ATLBP,191ATLBR:192p.Mark |= LABEL | SYNC193
194case ANOR:195if p.To.Type == obj.TYPE_REG {196if p.To.Reg == REGZERO {197p.Mark |= LABEL | SYNC198}199}200
201case ABGEZAL,202ABLTZAL,203AJAL,204obj.ADUFFZERO,205obj.ADUFFCOPY:206c.cursym.Func.Text.Mark &^= LEAF207fallthrough208
209case AJMP,210ABEQ,211ABGEZ,212ABGTZ,213ABLEZ,214ABLTZ,215ABNE,216ABFPT, ABFPF:217if p.As == ABFPT || p.As == ABFPF {218// We don't treat ABFPT and ABFPF as branches here,219// so that we will always fill nop (0x0) in their220// delay slot during assembly.221// This is to workaround a kernel FPU emulator bug222// where it uses the user stack to simulate the223// instruction in the delay slot if it's not 0x0,224// and somehow that leads to SIGSEGV when the kernel225// jump to the stack.226p.Mark |= SYNC227} else {228p.Mark |= BRANCH229}230q1 := p.To.Target()231if q1 != nil {232for q1.As == obj.ANOP {233q1 = q1.Link234p.To.SetTarget(q1)235}236
237if q1.Mark&LEAF == 0 {238q1.Mark |= LABEL239}240}241//else {242// p.Mark |= LABEL243//}244q1 = p.Link245if q1 != nil {246q1.Mark |= LABEL247}248
249case ARET:250if p.Link != nil {251p.Link.Mark |= LABEL252}253}254}255
256var mov, add obj.As257if c.ctxt.Arch.Family == sys.MIPS64 {258add = AADDV259mov = AMOVV260} else {261add = AADDU262mov = AMOVW263}264
265var q *obj.Prog266var q1 *obj.Prog267autosize := int32(0)268var p1 *obj.Prog269var p2 *obj.Prog270for p := c.cursym.Func.Text; p != nil; p = p.Link {271o := p.As272switch o {273case obj.ATEXT:274autosize = int32(textstksiz)275
276if p.Mark&LEAF != 0 && autosize == 0 {277// A leaf function with no locals has no frame.278p.From.Sym.Set(obj.AttrNoFrame, true)279}280
281if !p.From.Sym.NoFrame() {282// If there is a stack frame at all, it includes283// space to save the LR.284autosize += int32(c.ctxt.FixedFrameSize())285}286
287if autosize&4 != 0 && c.ctxt.Arch.Family == sys.MIPS64 {288autosize += 4289}290
291if autosize == 0 && c.cursym.Func.Text.Mark&LEAF == 0 {292if c.cursym.Func.Text.From.Sym.NoSplit() {293if ctxt.Debugvlog {294ctxt.Logf("save suppressed in: %s\n", c.cursym.Name)295}296
297c.cursym.Func.Text.Mark |= LEAF298}299}300
301p.To.Offset = int64(autosize) - ctxt.FixedFrameSize()302
303if c.cursym.Func.Text.Mark&LEAF != 0 {304c.cursym.Set(obj.AttrLeaf, true)305if p.From.Sym.NoFrame() {306break307}308}309
310if !p.From.Sym.NoSplit() {311p = c.stacksplit(p, autosize) // emit split check312}313
314q = p315
316if autosize != 0 {317// Make sure to save link register for non-empty frame, even if318// it is a leaf function, so that traceback works.319// Store link register before decrement SP, so if a signal comes320// during the execution of the function prologue, the traceback321// code will not see a half-updated stack frame.322// This sequence is not async preemptible, as if we open a frame323// at the current SP, it will clobber the saved LR.324q = c.ctxt.StartUnsafePoint(q, c.newprog)325
326q = obj.Appendp(q, newprog)327q.As = mov328q.Pos = p.Pos329q.From.Type = obj.TYPE_REG330q.From.Reg = REGLINK331q.To.Type = obj.TYPE_MEM332q.To.Offset = int64(-autosize)333q.To.Reg = REGSP334
335q = obj.Appendp(q, newprog)336q.As = add337q.Pos = p.Pos338q.From.Type = obj.TYPE_CONST339q.From.Offset = int64(-autosize)340q.To.Type = obj.TYPE_REG341q.To.Reg = REGSP342q.Spadj = +autosize343
344q = c.ctxt.EndUnsafePoint(q, c.newprog, -1)345}346
347if c.cursym.Func.Text.From.Sym.Wrapper() && c.cursym.Func.Text.Mark&LEAF == 0 {348// if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame349//350// MOV g_panic(g), R1351// BEQ R1, end352// MOV panic_argp(R1), R2353// ADD $(autosize+FIXED_FRAME), R29, R3354// BNE R2, R3, end355// ADD $FIXED_FRAME, R29, R2356// MOV R2, panic_argp(R1)357// end:358// NOP359//360// The NOP is needed to give the jumps somewhere to land.361// It is a liblink NOP, not an mips NOP: it encodes to 0 instruction bytes.362//363// We don't generate this for leafs because that means the wrapped364// function was inlined into the wrapper.365
366q = obj.Appendp(q, newprog)367
368q.As = mov369q.From.Type = obj.TYPE_MEM370q.From.Reg = REGG371q.From.Offset = 4 * int64(c.ctxt.Arch.PtrSize) // G.panic372q.To.Type = obj.TYPE_REG373q.To.Reg = REG_R1374
375q = obj.Appendp(q, newprog)376q.As = ABEQ377q.From.Type = obj.TYPE_REG378q.From.Reg = REG_R1379q.To.Type = obj.TYPE_BRANCH380q.Mark |= BRANCH381p1 = q382
383q = obj.Appendp(q, newprog)384q.As = mov385q.From.Type = obj.TYPE_MEM386q.From.Reg = REG_R1387q.From.Offset = 0 // Panic.argp388q.To.Type = obj.TYPE_REG389q.To.Reg = REG_R2390
391q = obj.Appendp(q, newprog)392q.As = add393q.From.Type = obj.TYPE_CONST394q.From.Offset = int64(autosize) + ctxt.FixedFrameSize()395q.Reg = REGSP396q.To.Type = obj.TYPE_REG397q.To.Reg = REG_R3398
399q = obj.Appendp(q, newprog)400q.As = ABNE401q.From.Type = obj.TYPE_REG402q.From.Reg = REG_R2403q.Reg = REG_R3404q.To.Type = obj.TYPE_BRANCH405q.Mark |= BRANCH406p2 = q407
408q = obj.Appendp(q, newprog)409q.As = add410q.From.Type = obj.TYPE_CONST411q.From.Offset = ctxt.FixedFrameSize()412q.Reg = REGSP413q.To.Type = obj.TYPE_REG414q.To.Reg = REG_R2415
416q = obj.Appendp(q, newprog)417q.As = mov418q.From.Type = obj.TYPE_REG419q.From.Reg = REG_R2420q.To.Type = obj.TYPE_MEM421q.To.Reg = REG_R1422q.To.Offset = 0 // Panic.argp423
424q = obj.Appendp(q, newprog)425
426q.As = obj.ANOP427p1.To.SetTarget(q)428p2.To.SetTarget(q)429}430
431case ARET:432if p.From.Type == obj.TYPE_CONST {433ctxt.Diag("using BECOME (%v) is not supported!", p)434break435}436
437retSym := p.To.Sym438p.To.Name = obj.NAME_NONE // clear fields as we may modify p to other instruction439p.To.Sym = nil440
441if c.cursym.Func.Text.Mark&LEAF != 0 {442if autosize == 0 {443p.As = AJMP444p.From = obj.Addr{}445if retSym != nil { // retjmp446p.To.Type = obj.TYPE_BRANCH447p.To.Name = obj.NAME_EXTERN448p.To.Sym = retSym449} else {450p.To.Type = obj.TYPE_MEM451p.To.Reg = REGLINK452p.To.Offset = 0453}454p.Mark |= BRANCH455break456}457
458p.As = add459p.From.Type = obj.TYPE_CONST460p.From.Offset = int64(autosize)461p.To.Type = obj.TYPE_REG462p.To.Reg = REGSP463p.Spadj = -autosize464
465q = c.newprog()466q.As = AJMP467q.Pos = p.Pos468q.To.Type = obj.TYPE_MEM469q.To.Offset = 0470q.To.Reg = REGLINK471q.Mark |= BRANCH472q.Spadj = +autosize473
474q.Link = p.Link475p.Link = q476break477}478
479p.As = mov480p.From.Type = obj.TYPE_MEM481p.From.Offset = 0482p.From.Reg = REGSP483p.To.Type = obj.TYPE_REG484p.To.Reg = REGLINK485
486if autosize != 0 {487q = c.newprog()488q.As = add489q.Pos = p.Pos490q.From.Type = obj.TYPE_CONST491q.From.Offset = int64(autosize)492q.To.Type = obj.TYPE_REG493q.To.Reg = REGSP494q.Spadj = -autosize495
496q.Link = p.Link497p.Link = q498}499
500q1 = c.newprog()501q1.As = AJMP502q1.Pos = p.Pos503if retSym != nil { // retjmp504q1.To.Type = obj.TYPE_BRANCH505q1.To.Name = obj.NAME_EXTERN506q1.To.Sym = retSym507} else {508q1.To.Type = obj.TYPE_MEM509q1.To.Offset = 0510q1.To.Reg = REGLINK511}512q1.Mark |= BRANCH513q1.Spadj = +autosize514
515q1.Link = q.Link516q.Link = q1517
518case AADD,519AADDU,520AADDV,521AADDVU:522if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.From.Type == obj.TYPE_CONST {523p.Spadj = int32(-p.From.Offset)524}525
526case obj.AGETCALLERPC:527if cursym.Leaf() {528/* MOV LR, Rd */529p.As = mov530p.From.Type = obj.TYPE_REG531p.From.Reg = REGLINK532} else {533/* MOV (RSP), Rd */534p.As = mov535p.From.Type = obj.TYPE_MEM536p.From.Reg = REGSP537}538}539}540
541if c.ctxt.Arch.Family == sys.MIPS {542// rewrite MOVD into two MOVF in 32-bit mode to avoid unaligned memory access543for p = c.cursym.Func.Text; p != nil; p = p1 {544p1 = p.Link545
546if p.As != AMOVD {547continue548}549if p.From.Type != obj.TYPE_MEM && p.To.Type != obj.TYPE_MEM {550continue551}552
553p.As = AMOVF554q = c.newprog()555*q = *p556q.Link = p.Link557p.Link = q558p1 = q.Link559
560var addrOff int64561if c.ctxt.Arch.ByteOrder == binary.BigEndian {562addrOff = 4 // swap load/save order563}564if p.From.Type == obj.TYPE_MEM {565reg := REG_F0 + (p.To.Reg-REG_F0)&^1566p.To.Reg = reg567q.To.Reg = reg + 1568p.From.Offset += addrOff569q.From.Offset += 4 - addrOff570} else if p.To.Type == obj.TYPE_MEM {571reg := REG_F0 + (p.From.Reg-REG_F0)&^1572p.From.Reg = reg573q.From.Reg = reg + 1574p.To.Offset += addrOff575q.To.Offset += 4 - addrOff576}577}578}579
580if nosched {581// if we don't do instruction scheduling, simply add582// NOP after each branch instruction.583for p = c.cursym.Func.Text; p != nil; p = p.Link {584if p.Mark&BRANCH != 0 {585c.addnop(p)586}587}588return589}590
591// instruction scheduling592q = nil // p - 1593q1 = c.cursym.Func.Text // top of block594o := 0 // count of instructions595for p = c.cursym.Func.Text; p != nil; p = p1 {596p1 = p.Link597o++598if p.Mark&NOSCHED != 0 {599if q1 != p {600c.sched(q1, q)601}602for ; p != nil; p = p.Link {603if p.Mark&NOSCHED == 0 {604break605}606q = p607}608p1 = p609q1 = p610o = 0611continue612}613if p.Mark&(LABEL|SYNC) != 0 {614if q1 != p {615c.sched(q1, q)616}617q1 = p618o = 1619}620if p.Mark&(BRANCH|SYNC) != 0 {621c.sched(q1, p)622q1 = p1623o = 0624}625if o >= NSCHED {626c.sched(q1, p)627q1 = p1628o = 0629}630q = p631}632}
633
634func (c *ctxt0) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {635var mov, add, sub obj.As636
637if c.ctxt.Arch.Family == sys.MIPS64 {638add = AADDV639mov = AMOVV640sub = ASUBVU641} else {642add = AADDU643mov = AMOVW644sub = ASUBU645}646
647// MOV g_stackguard(g), R1648p = obj.Appendp(p, c.newprog)649
650p.As = mov651p.From.Type = obj.TYPE_MEM652p.From.Reg = REGG653p.From.Offset = 2 * int64(c.ctxt.Arch.PtrSize) // G.stackguard0654if c.cursym.CFunc() {655p.From.Offset = 3 * int64(c.ctxt.Arch.PtrSize) // G.stackguard1656}657p.To.Type = obj.TYPE_REG658p.To.Reg = REG_R1659
660// Mark the stack bound check and morestack call async nonpreemptible.661// If we get preempted here, when resumed the preemption request is662// cleared, but we'll still call morestack, which will double the stack663// unnecessarily. See issue #35470.664p = c.ctxt.StartUnsafePoint(p, c.newprog)665
666var q *obj.Prog667if framesize <= objabi.StackSmall {668// small stack: SP < stackguard669// AGTU SP, stackguard, R1670p = obj.Appendp(p, c.newprog)671
672p.As = ASGTU673p.From.Type = obj.TYPE_REG674p.From.Reg = REGSP675p.Reg = REG_R1676p.To.Type = obj.TYPE_REG677p.To.Reg = REG_R1678} else if framesize <= objabi.StackBig {679// large stack: SP-framesize < stackguard-StackSmall680// ADD $-(framesize-StackSmall), SP, R2681// SGTU R2, stackguard, R1682p = obj.Appendp(p, c.newprog)683
684p.As = add685p.From.Type = obj.TYPE_CONST686p.From.Offset = -(int64(framesize) - objabi.StackSmall)687p.Reg = REGSP688p.To.Type = obj.TYPE_REG689p.To.Reg = REG_R2690
691p = obj.Appendp(p, c.newprog)692p.As = ASGTU693p.From.Type = obj.TYPE_REG694p.From.Reg = REG_R2695p.Reg = REG_R1696p.To.Type = obj.TYPE_REG697p.To.Reg = REG_R1698} else {699// Such a large stack we need to protect against wraparound.700// If SP is close to zero:701// SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall)702// The +StackGuard on both sides is required to keep the left side positive:703// SP is allowed to be slightly below stackguard. See stack.h.704//705// Preemption sets stackguard to StackPreempt, a very large value.706// That breaks the math above, so we have to check for that explicitly.707// // stackguard is R1708// MOV $StackPreempt, R2709// BEQ R1, R2, label-of-call-to-morestack710// ADD $StackGuard, SP, R2711// SUB R1, R2712// MOV $(framesize+(StackGuard-StackSmall)), R1713// SGTU R2, R1, R1714p = obj.Appendp(p, c.newprog)715
716p.As = mov717p.From.Type = obj.TYPE_CONST718p.From.Offset = objabi.StackPreempt719p.To.Type = obj.TYPE_REG720p.To.Reg = REG_R2721
722p = obj.Appendp(p, c.newprog)723q = p724p.As = ABEQ725p.From.Type = obj.TYPE_REG726p.From.Reg = REG_R1727p.Reg = REG_R2728p.To.Type = obj.TYPE_BRANCH729p.Mark |= BRANCH730
731p = obj.Appendp(p, c.newprog)732p.As = add733p.From.Type = obj.TYPE_CONST734p.From.Offset = int64(objabi.StackGuard)735p.Reg = REGSP736p.To.Type = obj.TYPE_REG737p.To.Reg = REG_R2738
739p = obj.Appendp(p, c.newprog)740p.As = sub741p.From.Type = obj.TYPE_REG742p.From.Reg = REG_R1743p.To.Type = obj.TYPE_REG744p.To.Reg = REG_R2745
746p = obj.Appendp(p, c.newprog)747p.As = mov748p.From.Type = obj.TYPE_CONST749p.From.Offset = int64(framesize) + int64(objabi.StackGuard) - objabi.StackSmall750p.To.Type = obj.TYPE_REG751p.To.Reg = REG_R1752
753p = obj.Appendp(p, c.newprog)754p.As = ASGTU755p.From.Type = obj.TYPE_REG756p.From.Reg = REG_R2757p.Reg = REG_R1758p.To.Type = obj.TYPE_REG759p.To.Reg = REG_R1760}761
762// q1: BNE R1, done763p = obj.Appendp(p, c.newprog)764q1 := p765
766p.As = ABNE767p.From.Type = obj.TYPE_REG768p.From.Reg = REG_R1769p.To.Type = obj.TYPE_BRANCH770p.Mark |= BRANCH771
772// MOV LINK, R3773p = obj.Appendp(p, c.newprog)774
775p.As = mov776p.From.Type = obj.TYPE_REG777p.From.Reg = REGLINK778p.To.Type = obj.TYPE_REG779p.To.Reg = REG_R3780if q != nil {781q.To.SetTarget(p)782p.Mark |= LABEL783}784
785p = c.ctxt.EmitEntryStackMap(c.cursym, p, c.newprog)786
787// JAL runtime.morestack(SB)788p = obj.Appendp(p, c.newprog)789
790p.As = AJAL791p.To.Type = obj.TYPE_BRANCH792if c.cursym.CFunc() {793p.To.Sym = c.ctxt.Lookup("runtime.morestackc")794} else if !c.cursym.Func.Text.From.Sym.NeedCtxt() {795p.To.Sym = c.ctxt.Lookup("runtime.morestack_noctxt")796} else {797p.To.Sym = c.ctxt.Lookup("runtime.morestack")798}799p.Mark |= BRANCH800
801p = c.ctxt.EndUnsafePoint(p, c.newprog, -1)802
803// JMP start804p = obj.Appendp(p, c.newprog)805
806p.As = AJMP807p.To.Type = obj.TYPE_BRANCH808p.To.SetTarget(c.cursym.Func.Text.Link)809p.Mark |= BRANCH810
811// placeholder for q1's jump target812p = obj.Appendp(p, c.newprog)813
814p.As = obj.ANOP // zero-width place holder815q1.To.SetTarget(p)816
817return p818}
819
820func (c *ctxt0) addnop(p *obj.Prog) {821q := c.newprog()822q.As = ANOOP823q.Pos = p.Pos824q.Link = p.Link825p.Link = q826}
827
828const (829E_HILO = 1 << 0830E_FCR = 1 << 1831E_MCR = 1 << 2832E_MEM = 1 << 3833E_MEMSP = 1 << 4 /* uses offset and size */834E_MEMSB = 1 << 5 /* uses offset and size */835ANYMEM = E_MEM | E_MEMSP | E_MEMSB836//DELAY = LOAD|BRANCH|FCMP837DELAY = BRANCH /* only schedule branch */838)
839
840type Dep struct {841ireg uint32842freg uint32843cc uint32844}
845
846type Sch struct {847p obj.Prog848set Dep
849used Dep
850soffset int32851size uint8852nop uint8853comp bool854}
855
856func (c *ctxt0) sched(p0, pe *obj.Prog) {857var sch [NSCHED]Sch858
859/*860* build side structure
861*/
862s := sch[:]863for p := p0; ; p = p.Link {864s[0].p = *p865c.markregused(&s[0])866if p == pe {867break868}869s = s[1:]870}871se := s872
873for i := cap(sch) - cap(se); i >= 0; i-- {874s = sch[i:]875if s[0].p.Mark&DELAY == 0 {876continue877}878if -cap(s) < -cap(se) {879if !conflict(&s[0], &s[1]) {880continue881}882}883
884var t []Sch885var j int886for j = cap(sch) - cap(s) - 1; j >= 0; j-- {887t = sch[j:]888if t[0].comp {889if s[0].p.Mark&BRANCH != 0 {890continue891}892}893if t[0].p.Mark&DELAY != 0 {894if -cap(s) >= -cap(se) || conflict(&t[0], &s[1]) {895continue896}897}898for u := t[1:]; -cap(u) <= -cap(s); u = u[1:] {899if c.depend(&u[0], &t[0]) {900continue901}902}903goto out2904}905
906if s[0].p.Mark&BRANCH != 0 {907s[0].nop = 1908}909continue910
911out2:912// t[0] is the instruction being moved to fill the delay913stmp := t[0]914copy(t[:i-j], t[1:i-j+1])915s[0] = stmp916
917if t[i-j-1].p.Mark&BRANCH != 0 {918// t[i-j] is being put into a branch delay slot919// combine its Spadj with the branch instruction920t[i-j-1].p.Spadj += t[i-j].p.Spadj921t[i-j].p.Spadj = 0922}923
924i--925}926
927/*928* put it all back
929*/
930var p *obj.Prog931var q *obj.Prog932for s, p = sch[:], p0; -cap(s) <= -cap(se); s, p = s[1:], q {933q = p.Link934if q != s[0].p.Link {935*p = s[0].p936p.Link = q937}938for s[0].nop != 0 {939s[0].nop--940c.addnop(p)941}942}943}
944
945func (c *ctxt0) markregused(s *Sch) {946p := &s.p947s.comp = c.compound(p)948s.nop = 0949if s.comp {950s.set.ireg |= 1 << (REGTMP - REG_R0)951s.used.ireg |= 1 << (REGTMP - REG_R0)952}953
954ar := 0 /* dest is really reference */955ad := 0 /* source/dest is really address */956ld := 0 /* opcode is load instruction */957sz := 20 /* size of load/store for overlap computation */958
959/*960* flags based on opcode
961*/
962switch p.As {963case obj.ATEXT:964c.autosize = int32(p.To.Offset + 8)965ad = 1966
967case AJAL:968r := p.Reg969if r == 0 {970r = REGLINK971}972s.set.ireg |= 1 << uint(r-REG_R0)973ar = 1974ad = 1975
976case ABGEZAL,977ABLTZAL:978s.set.ireg |= 1 << (REGLINK - REG_R0)979fallthrough980case ABEQ,981ABGEZ,982ABGTZ,983ABLEZ,984ABLTZ,985ABNE:986ar = 1987ad = 1988
989case ABFPT,990ABFPF:991ad = 1992s.used.cc |= E_FCR993
994case ACMPEQD,995ACMPEQF,996ACMPGED,997ACMPGEF,998ACMPGTD,999ACMPGTF:1000ar = 11001s.set.cc |= E_FCR1002p.Mark |= FCMP1003
1004case AJMP:1005ar = 11006ad = 11007
1008case AMOVB,1009AMOVBU:1010sz = 11011ld = 11012
1013case AMOVH,1014AMOVHU:1015sz = 21016ld = 11017
1018case AMOVF,1019AMOVW,1020AMOVWL,1021AMOVWR:1022sz = 41023ld = 11024
1025case AMOVD,1026AMOVV,1027AMOVVL,1028AMOVVR:1029sz = 81030ld = 11031
1032case ADIV,1033ADIVU,1034AMUL,1035AMULU,1036AREM,1037AREMU,1038ADIVV,1039ADIVVU,1040AMULV,1041AMULVU,1042AREMV,1043AREMVU:1044s.set.cc = E_HILO1045fallthrough1046case AADD,1047AADDU,1048AADDV,1049AADDVU,1050AAND,1051ANOR,1052AOR,1053ASGT,1054ASGTU,1055ASLL,1056ASRA,1057ASRL,1058ASLLV,1059ASRAV,1060ASRLV,1061ASUB,1062ASUBU,1063ASUBV,1064ASUBVU,1065AXOR,1066
1067AADDD,1068AADDF,1069AADDW,1070ASUBD,1071ASUBF,1072ASUBW,1073AMULF,1074AMULD,1075AMULW,1076ADIVF,1077ADIVD,1078ADIVW:1079if p.Reg == 0 {1080if p.To.Type == obj.TYPE_REG {1081p.Reg = p.To.Reg1082}1083//if(p->reg == NREG)1084// print("botch %P\n", p);1085}1086}1087
1088/*1089* flags based on 'to' field
1090*/
1091cls := int(p.To.Class)1092if cls == 0 {1093cls = c.aclass(&p.To) + 11094p.To.Class = int8(cls)1095}1096cls--1097switch cls {1098default:1099fmt.Printf("unknown class %d %v\n", cls, p)1100
1101case C_ZCON,1102C_SCON,1103C_ADD0CON,1104C_AND0CON,1105C_ADDCON,1106C_ANDCON,1107C_UCON,1108C_LCON,1109C_NONE,1110C_SBRA,1111C_LBRA,1112C_ADDR,1113C_TEXTSIZE:1114break1115
1116case C_HI,1117C_LO:1118s.set.cc |= E_HILO1119
1120case C_FCREG:1121s.set.cc |= E_FCR1122
1123case C_MREG:1124s.set.cc |= E_MCR1125
1126case C_ZOREG,1127C_SOREG,1128C_LOREG:1129cls = int(p.To.Reg)1130s.used.ireg |= 1 << uint(cls-REG_R0)1131if ad != 0 {1132break1133}1134s.size = uint8(sz)1135s.soffset = c.regoff(&p.To)1136
1137m := uint32(ANYMEM)1138if cls == REGSB {1139m = E_MEMSB1140}1141if cls == REGSP {1142m = E_MEMSP1143}1144
1145if ar != 0 {1146s.used.cc |= m1147} else {1148s.set.cc |= m1149}1150
1151case C_SACON,1152C_LACON:1153s.used.ireg |= 1 << (REGSP - REG_R0)1154
1155case C_SECON,1156C_LECON:1157s.used.ireg |= 1 << (REGSB - REG_R0)1158
1159case C_REG:1160if ar != 0 {1161s.used.ireg |= 1 << uint(p.To.Reg-REG_R0)1162} else {1163s.set.ireg |= 1 << uint(p.To.Reg-REG_R0)1164}1165
1166case C_FREG:1167if ar != 0 {1168s.used.freg |= 1 << uint(p.To.Reg-REG_F0)1169} else {1170s.set.freg |= 1 << uint(p.To.Reg-REG_F0)1171}1172if ld != 0 && p.From.Type == obj.TYPE_REG {1173p.Mark |= LOAD1174}1175
1176case C_SAUTO,1177C_LAUTO:1178s.used.ireg |= 1 << (REGSP - REG_R0)1179if ad != 0 {1180break1181}1182s.size = uint8(sz)1183s.soffset = c.regoff(&p.To)1184
1185if ar != 0 {1186s.used.cc |= E_MEMSP1187} else {1188s.set.cc |= E_MEMSP1189}1190
1191case C_SEXT,1192C_LEXT:1193s.used.ireg |= 1 << (REGSB - REG_R0)1194if ad != 0 {1195break1196}1197s.size = uint8(sz)1198s.soffset = c.regoff(&p.To)1199
1200if ar != 0 {1201s.used.cc |= E_MEMSB1202} else {1203s.set.cc |= E_MEMSB1204}1205}1206
1207/*1208* flags based on 'from' field
1209*/
1210cls = int(p.From.Class)1211if cls == 0 {1212cls = c.aclass(&p.From) + 11213p.From.Class = int8(cls)1214}1215cls--1216switch cls {1217default:1218fmt.Printf("unknown class %d %v\n", cls, p)1219
1220case C_ZCON,1221C_SCON,1222C_ADD0CON,1223C_AND0CON,1224C_ADDCON,1225C_ANDCON,1226C_UCON,1227C_LCON,1228C_NONE,1229C_SBRA,1230C_LBRA,1231C_ADDR,1232C_TEXTSIZE:1233break1234
1235case C_HI,1236C_LO:1237s.used.cc |= E_HILO1238
1239case C_FCREG:1240s.used.cc |= E_FCR1241
1242case C_MREG:1243s.used.cc |= E_MCR1244
1245case C_ZOREG,1246C_SOREG,1247C_LOREG:1248cls = int(p.From.Reg)1249s.used.ireg |= 1 << uint(cls-REG_R0)1250if ld != 0 {1251p.Mark |= LOAD1252}1253s.size = uint8(sz)1254s.soffset = c.regoff(&p.From)1255
1256m := uint32(ANYMEM)1257if cls == REGSB {1258m = E_MEMSB1259}1260if cls == REGSP {1261m = E_MEMSP1262}1263
1264s.used.cc |= m1265
1266case C_SACON,1267C_LACON:1268cls = int(p.From.Reg)1269if cls == 0 {1270cls = REGSP1271}1272s.used.ireg |= 1 << uint(cls-REG_R0)1273
1274case C_SECON,1275C_LECON:1276s.used.ireg |= 1 << (REGSB - REG_R0)1277
1278case C_REG:1279s.used.ireg |= 1 << uint(p.From.Reg-REG_R0)1280
1281case C_FREG:1282s.used.freg |= 1 << uint(p.From.Reg-REG_F0)1283if ld != 0 && p.To.Type == obj.TYPE_REG {1284p.Mark |= LOAD1285}1286
1287case C_SAUTO,1288C_LAUTO:1289s.used.ireg |= 1 << (REGSP - REG_R0)1290if ld != 0 {1291p.Mark |= LOAD1292}1293if ad != 0 {1294break1295}1296s.size = uint8(sz)1297s.soffset = c.regoff(&p.From)1298
1299s.used.cc |= E_MEMSP1300
1301case C_SEXT:1302case C_LEXT:1303s.used.ireg |= 1 << (REGSB - REG_R0)1304if ld != 0 {1305p.Mark |= LOAD1306}1307if ad != 0 {1308break1309}1310s.size = uint8(sz)1311s.soffset = c.regoff(&p.From)1312
1313s.used.cc |= E_MEMSB1314}1315
1316cls = int(p.Reg)1317if cls != 0 {1318if REG_F0 <= cls && cls <= REG_F31 {1319s.used.freg |= 1 << uint(cls-REG_F0)1320} else {1321s.used.ireg |= 1 << uint(cls-REG_R0)1322}1323}1324s.set.ireg &^= (1 << (REGZERO - REG_R0)) /* R0 can't be set */1325}
1326
1327/*
1328* test to see if two instructions can be
1329* interchanged without changing semantics
1330*/
1331func (c *ctxt0) depend(sa, sb *Sch) bool {1332if sa.set.ireg&(sb.set.ireg|sb.used.ireg) != 0 {1333return true1334}1335if sb.set.ireg&sa.used.ireg != 0 {1336return true1337}1338
1339if sa.set.freg&(sb.set.freg|sb.used.freg) != 0 {1340return true1341}1342if sb.set.freg&sa.used.freg != 0 {1343return true1344}1345
1346/*1347* special case.
1348* loads from same address cannot pass.
1349* this is for hardware fifo's and the like
1350*/
1351if sa.used.cc&sb.used.cc&E_MEM != 0 {1352if sa.p.Reg == sb.p.Reg {1353if c.regoff(&sa.p.From) == c.regoff(&sb.p.From) {1354return true1355}1356}1357}1358
1359x := (sa.set.cc & (sb.set.cc | sb.used.cc)) | (sb.set.cc & sa.used.cc)1360if x != 0 {1361/*1362* allow SB and SP to pass each other.
1363* allow SB to pass SB iff doffsets are ok
1364* anything else conflicts
1365*/
1366if x != E_MEMSP && x != E_MEMSB {1367return true1368}1369x = sa.set.cc | sb.set.cc | sa.used.cc | sb.used.cc1370if x&E_MEM != 0 {1371return true1372}1373if offoverlap(sa, sb) {1374return true1375}1376}1377
1378return false1379}
1380
1381func offoverlap(sa, sb *Sch) bool {1382if sa.soffset < sb.soffset {1383if sa.soffset+int32(sa.size) > sb.soffset {1384return true1385}1386return false1387}1388if sb.soffset+int32(sb.size) > sa.soffset {1389return true1390}1391return false1392}
1393
1394/*
1395* test 2 adjacent instructions
1396* and find out if inserted instructions
1397* are desired to prevent stalls.
1398*/
1399func conflict(sa, sb *Sch) bool {1400if sa.set.ireg&sb.used.ireg != 0 {1401return true1402}1403if sa.set.freg&sb.used.freg != 0 {1404return true1405}1406if sa.set.cc&sb.used.cc != 0 {1407return true1408}1409return false1410}
1411
1412func (c *ctxt0) compound(p *obj.Prog) bool {1413o := c.oplook(p)1414if o.size != 4 {1415return true1416}1417if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSB {1418return true1419}1420return false1421}
1422
1423var Linkmips64 = obj.LinkArch{1424Arch: sys.ArchMIPS64,1425Init: buildop,1426Preprocess: preprocess,1427Assemble: span0,1428Progedit: progedit,1429DWARFRegisters: MIPSDWARFRegisters,1430}
1431
1432var Linkmips64le = obj.LinkArch{1433Arch: sys.ArchMIPS64LE,1434Init: buildop,1435Preprocess: preprocess,1436Assemble: span0,1437Progedit: progedit,1438DWARFRegisters: MIPSDWARFRegisters,1439}
1440
1441var Linkmips = obj.LinkArch{1442Arch: sys.ArchMIPS,1443Init: buildop,1444Preprocess: preprocess,1445Assemble: span0,1446Progedit: progedit,1447DWARFRegisters: MIPSDWARFRegisters,1448}
1449
1450var Linkmipsle = obj.LinkArch{1451Arch: sys.ArchMIPSLE,1452Init: buildop,1453Preprocess: preprocess,1454Assemble: span0,1455Progedit: progedit,1456DWARFRegisters: MIPSDWARFRegisters,1457}
1458