qemu

Форк
0
/
tcg.c 
6564 строки · 207.3 Кб
1
/*
2
 * Tiny Code Generator for QEMU
3
 *
4
 * Copyright (c) 2008 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24

25
#include "qemu/osdep.h"
26

27
/* Define to jump the ELF file used to communicate with GDB.  */
28
#undef DEBUG_JIT
29

30
#include "qemu/error-report.h"
31
#include "qemu/cutils.h"
32
#include "qemu/host-utils.h"
33
#include "qemu/qemu-print.h"
34
#include "qemu/cacheflush.h"
35
#include "qemu/cacheinfo.h"
36
#include "qemu/timer.h"
37
#include "exec/translation-block.h"
38
#include "exec/tlb-common.h"
39
#include "tcg/startup.h"
40
#include "tcg/tcg-op-common.h"
41

42
#if UINTPTR_MAX == UINT32_MAX
43
# define ELF_CLASS  ELFCLASS32
44
#else
45
# define ELF_CLASS  ELFCLASS64
46
#endif
47
#if HOST_BIG_ENDIAN
48
# define ELF_DATA   ELFDATA2MSB
49
#else
50
# define ELF_DATA   ELFDATA2LSB
51
#endif
52

53
#include "elf.h"
54
#include "exec/log.h"
55
#include "tcg/tcg-ldst.h"
56
#include "tcg/tcg-temp-internal.h"
57
#include "tcg-internal.h"
58
#include "tcg/perf.h"
59
#ifdef CONFIG_USER_ONLY
60
#include "user/guest-base.h"
61
#endif
62

63
/* Forward declarations for functions declared in tcg-target.c.inc and
64
   used here. */
65
static void tcg_target_init(TCGContext *s);
66
static void tcg_target_qemu_prologue(TCGContext *s);
67
static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
68
                        intptr_t value, intptr_t addend);
69

70
/* The CIE and FDE header definitions will be common to all hosts.  */
71
typedef struct {
72
    uint32_t len __attribute__((aligned((sizeof(void *)))));
73
    uint32_t id;
74
    uint8_t version;
75
    char augmentation[1];
76
    uint8_t code_align;
77
    uint8_t data_align;
78
    uint8_t return_column;
79
} DebugFrameCIE;
80

81
typedef struct QEMU_PACKED {
82
    uint32_t len __attribute__((aligned((sizeof(void *)))));
83
    uint32_t cie_offset;
84
    uintptr_t func_start;
85
    uintptr_t func_len;
86
} DebugFrameFDEHeader;
87

88
typedef struct QEMU_PACKED {
89
    DebugFrameCIE cie;
90
    DebugFrameFDEHeader fde;
91
} DebugFrameHeader;
92

93
typedef struct TCGLabelQemuLdst {
94
    bool is_ld;             /* qemu_ld: true, qemu_st: false */
95
    MemOpIdx oi;
96
    TCGType type;           /* result type of a load */
97
    TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
98
    TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
99
    TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
100
    TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
101
    const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
102
    tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
103
    QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
104
} TCGLabelQemuLdst;
105

106
static void tcg_register_jit_int(const void *buf, size_t size,
107
                                 const void *debug_frame,
108
                                 size_t debug_frame_size)
109
    __attribute__((unused));
110

111
/* Forward declarations for functions declared and used in tcg-target.c.inc. */
112
static void tcg_out_tb_start(TCGContext *s);
113
static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
114
                       intptr_t arg2);
115
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
116
static void tcg_out_movi(TCGContext *s, TCGType type,
117
                         TCGReg ret, tcg_target_long arg);
118
static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
119
static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
120
static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
121
static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
122
static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
123
static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
124
static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
125
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
126
static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
127
static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
128
static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
129
static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
130
static void tcg_out_goto_tb(TCGContext *s, int which);
131
static void tcg_out_op(TCGContext *s, TCGOpcode opc,
132
                       const TCGArg args[TCG_MAX_OP_ARGS],
133
                       const int const_args[TCG_MAX_OP_ARGS]);
134
#if TCG_TARGET_MAYBE_vec
135
static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
136
                            TCGReg dst, TCGReg src);
137
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
138
                             TCGReg dst, TCGReg base, intptr_t offset);
139
static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
140
                             TCGReg dst, int64_t arg);
141
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
142
                           unsigned vecl, unsigned vece,
143
                           const TCGArg args[TCG_MAX_OP_ARGS],
144
                           const int const_args[TCG_MAX_OP_ARGS]);
145
#else
146
static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
147
                                   TCGReg dst, TCGReg src)
148
{
149
    g_assert_not_reached();
150
}
151
static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
152
                                    TCGReg dst, TCGReg base, intptr_t offset)
153
{
154
    g_assert_not_reached();
155
}
156
static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
157
                                    TCGReg dst, int64_t arg)
158
{
159
    g_assert_not_reached();
160
}
161
static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
162
                                  unsigned vecl, unsigned vece,
163
                                  const TCGArg args[TCG_MAX_OP_ARGS],
164
                                  const int const_args[TCG_MAX_OP_ARGS])
165
{
166
    g_assert_not_reached();
167
}
168
#endif
169
static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
170
                       intptr_t arg2);
171
static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
172
                        TCGReg base, intptr_t ofs);
173
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
174
                         const TCGHelperInfo *info);
175
static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
176
static bool tcg_target_const_match(int64_t val, int ct,
177
                                   TCGType type, TCGCond cond, int vece);
178
#ifdef TCG_TARGET_NEED_LDST_LABELS
179
static int tcg_out_ldst_finalize(TCGContext *s);
180
#endif
181

182
#ifndef CONFIG_USER_ONLY
183
#define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
184
#endif
185

186
typedef struct TCGLdstHelperParam {
187
    TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
188
    unsigned ntmp;
189
    int tmp[3];
190
} TCGLdstHelperParam;
191

192
static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
193
                                   const TCGLdstHelperParam *p)
194
    __attribute__((unused));
195
static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
196
                                  bool load_sign, const TCGLdstHelperParam *p)
197
    __attribute__((unused));
198
static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199
                                   const TCGLdstHelperParam *p)
200
    __attribute__((unused));
201

202
static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
203
    [MO_UB] = helper_ldub_mmu,
204
    [MO_SB] = helper_ldsb_mmu,
205
    [MO_UW] = helper_lduw_mmu,
206
    [MO_SW] = helper_ldsw_mmu,
207
    [MO_UL] = helper_ldul_mmu,
208
    [MO_UQ] = helper_ldq_mmu,
209
#if TCG_TARGET_REG_BITS == 64
210
    [MO_SL] = helper_ldsl_mmu,
211
    [MO_128] = helper_ld16_mmu,
212
#endif
213
};
214

215
static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
216
    [MO_8]  = helper_stb_mmu,
217
    [MO_16] = helper_stw_mmu,
218
    [MO_32] = helper_stl_mmu,
219
    [MO_64] = helper_stq_mmu,
220
#if TCG_TARGET_REG_BITS == 64
221
    [MO_128] = helper_st16_mmu,
222
#endif
223
};
224

225
typedef struct {
226
    MemOp atom;   /* lg2 bits of atomicity required */
227
    MemOp align;  /* lg2 bits of alignment to use */
228
} TCGAtomAlign;
229

230
static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
231
                                           MemOp host_atom, bool allow_two_ops)
232
    __attribute__((unused));
233

234
#ifdef CONFIG_USER_ONLY
235
bool tcg_use_softmmu;
236
#endif
237

238
TCGContext tcg_init_ctx;
239
__thread TCGContext *tcg_ctx;
240

241
TCGContext **tcg_ctxs;
242
unsigned int tcg_cur_ctxs;
243
unsigned int tcg_max_ctxs;
244
TCGv_env tcg_env;
245
const void *tcg_code_gen_epilogue;
246
uintptr_t tcg_splitwx_diff;
247

248
#ifndef CONFIG_TCG_INTERPRETER
249
tcg_prologue_fn *tcg_qemu_tb_exec;
250
#endif
251

252
static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
253
static TCGRegSet tcg_target_call_clobber_regs;
254

255
#if TCG_TARGET_INSN_UNIT_SIZE == 1
256
static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
257
{
258
    *s->code_ptr++ = v;
259
}
260

261
static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
262
                                                      uint8_t v)
263
{
264
    *p = v;
265
}
266
#endif
267

268
#if TCG_TARGET_INSN_UNIT_SIZE <= 2
269
static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
270
{
271
    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
272
        *s->code_ptr++ = v;
273
    } else {
274
        tcg_insn_unit *p = s->code_ptr;
275
        memcpy(p, &v, sizeof(v));
276
        s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
277
    }
278
}
279

280
static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
281
                                                       uint16_t v)
282
{
283
    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
284
        *p = v;
285
    } else {
286
        memcpy(p, &v, sizeof(v));
287
    }
288
}
289
#endif
290

291
#if TCG_TARGET_INSN_UNIT_SIZE <= 4
292
static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
293
{
294
    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
295
        *s->code_ptr++ = v;
296
    } else {
297
        tcg_insn_unit *p = s->code_ptr;
298
        memcpy(p, &v, sizeof(v));
299
        s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
300
    }
301
}
302

303
static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
304
                                                       uint32_t v)
305
{
306
    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
307
        *p = v;
308
    } else {
309
        memcpy(p, &v, sizeof(v));
310
    }
311
}
312
#endif
313

314
#if TCG_TARGET_INSN_UNIT_SIZE <= 8
315
static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
316
{
317
    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
318
        *s->code_ptr++ = v;
319
    } else {
320
        tcg_insn_unit *p = s->code_ptr;
321
        memcpy(p, &v, sizeof(v));
322
        s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
323
    }
324
}
325

326
static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
327
                                                       uint64_t v)
328
{
329
    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
330
        *p = v;
331
    } else {
332
        memcpy(p, &v, sizeof(v));
333
    }
334
}
335
#endif
336

337
/* label relocation processing */
338

339
static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
340
                          TCGLabel *l, intptr_t addend)
341
{
342
    TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
343

344
    r->type = type;
345
    r->ptr = code_ptr;
346
    r->addend = addend;
347
    QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
348
}
349

350
static void tcg_out_label(TCGContext *s, TCGLabel *l)
351
{
352
    tcg_debug_assert(!l->has_value);
353
    l->has_value = 1;
354
    l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
355
}
356

357
TCGLabel *gen_new_label(void)
358
{
359
    TCGContext *s = tcg_ctx;
360
    TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
361

362
    memset(l, 0, sizeof(TCGLabel));
363
    l->id = s->nb_labels++;
364
    QSIMPLEQ_INIT(&l->branches);
365
    QSIMPLEQ_INIT(&l->relocs);
366

367
    QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
368

369
    return l;
370
}
371

372
static bool tcg_resolve_relocs(TCGContext *s)
373
{
374
    TCGLabel *l;
375

376
    QSIMPLEQ_FOREACH(l, &s->labels, next) {
377
        TCGRelocation *r;
378
        uintptr_t value = l->u.value;
379

380
        QSIMPLEQ_FOREACH(r, &l->relocs, next) {
381
            if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
382
                return false;
383
            }
384
        }
385
    }
386
    return true;
387
}
388

389
static void set_jmp_reset_offset(TCGContext *s, int which)
390
{
391
    /*
392
     * We will check for overflow at the end of the opcode loop in
393
     * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
394
     */
395
    s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
396
}
397

398
static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
399
{
400
    /*
401
     * We will check for overflow at the end of the opcode loop in
402
     * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
403
     */
404
    s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
405
}
406

407
static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
408
{
409
    /*
410
     * Return the read-execute version of the pointer, for the benefit
411
     * of any pc-relative addressing mode.
412
     */
413
    return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
414
}
415

416
static int __attribute__((unused))
417
tlb_mask_table_ofs(TCGContext *s, int which)
418
{
419
    return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
420
            sizeof(CPUNegativeOffsetState));
421
}
422

423
/* Signal overflow, starting over with fewer guest insns. */
424
static G_NORETURN
425
void tcg_raise_tb_overflow(TCGContext *s)
426
{
427
    siglongjmp(s->jmp_trans, -2);
428
}
429

430
/*
431
 * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
432
 * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
433
 *
434
 * However, tcg_out_helper_load_slots reuses this field to hold an
435
 * argument slot number (which may designate a argument register or an
436
 * argument stack slot), converting to TCGReg once all arguments that
437
 * are destined for the stack are processed.
438
 */
439
typedef struct TCGMovExtend {
440
    unsigned dst;
441
    TCGReg src;
442
    TCGType dst_type;
443
    TCGType src_type;
444
    MemOp src_ext;
445
} TCGMovExtend;
446

447
/**
448
 * tcg_out_movext -- move and extend
449
 * @s: tcg context
450
 * @dst_type: integral type for destination
451
 * @dst: destination register
452
 * @src_type: integral type for source
453
 * @src_ext: extension to apply to source
454
 * @src: source register
455
 *
456
 * Move or extend @src into @dst, depending on @src_ext and the types.
457
 */
458
static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
459
                           TCGType src_type, MemOp src_ext, TCGReg src)
460
{
461
    switch (src_ext) {
462
    case MO_UB:
463
        tcg_out_ext8u(s, dst, src);
464
        break;
465
    case MO_SB:
466
        tcg_out_ext8s(s, dst_type, dst, src);
467
        break;
468
    case MO_UW:
469
        tcg_out_ext16u(s, dst, src);
470
        break;
471
    case MO_SW:
472
        tcg_out_ext16s(s, dst_type, dst, src);
473
        break;
474
    case MO_UL:
475
    case MO_SL:
476
        if (dst_type == TCG_TYPE_I32) {
477
            if (src_type == TCG_TYPE_I32) {
478
                tcg_out_mov(s, TCG_TYPE_I32, dst, src);
479
            } else {
480
                tcg_out_extrl_i64_i32(s, dst, src);
481
            }
482
        } else if (src_type == TCG_TYPE_I32) {
483
            if (src_ext & MO_SIGN) {
484
                tcg_out_exts_i32_i64(s, dst, src);
485
            } else {
486
                tcg_out_extu_i32_i64(s, dst, src);
487
            }
488
        } else {
489
            if (src_ext & MO_SIGN) {
490
                tcg_out_ext32s(s, dst, src);
491
            } else {
492
                tcg_out_ext32u(s, dst, src);
493
            }
494
        }
495
        break;
496
    case MO_UQ:
497
        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
498
        if (dst_type == TCG_TYPE_I32) {
499
            tcg_out_extrl_i64_i32(s, dst, src);
500
        } else {
501
            tcg_out_mov(s, TCG_TYPE_I64, dst, src);
502
        }
503
        break;
504
    default:
505
        g_assert_not_reached();
506
    }
507
}
508

509
/* Minor variations on a theme, using a structure. */
510
static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
511
                                    TCGReg src)
512
{
513
    tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
514
}
515

516
static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
517
{
518
    tcg_out_movext1_new_src(s, i, i->src);
519
}
520

521
/**
522
 * tcg_out_movext2 -- move and extend two pair
523
 * @s: tcg context
524
 * @i1: first move description
525
 * @i2: second move description
526
 * @scratch: temporary register, or -1 for none
527
 *
528
 * As tcg_out_movext, for both @i1 and @i2, caring for overlap
529
 * between the sources and destinations.
530
 */
531

532
static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
533
                            const TCGMovExtend *i2, int scratch)
534
{
535
    TCGReg src1 = i1->src;
536
    TCGReg src2 = i2->src;
537

538
    if (i1->dst != src2) {
539
        tcg_out_movext1(s, i1);
540
        tcg_out_movext1(s, i2);
541
        return;
542
    }
543
    if (i2->dst == src1) {
544
        TCGType src1_type = i1->src_type;
545
        TCGType src2_type = i2->src_type;
546

547
        if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
548
            /* The data is now in the correct registers, now extend. */
549
            src1 = i2->src;
550
            src2 = i1->src;
551
        } else {
552
            tcg_debug_assert(scratch >= 0);
553
            tcg_out_mov(s, src1_type, scratch, src1);
554
            src1 = scratch;
555
        }
556
    }
557
    tcg_out_movext1_new_src(s, i2, src2);
558
    tcg_out_movext1_new_src(s, i1, src1);
559
}
560

561
/**
562
 * tcg_out_movext3 -- move and extend three pair
563
 * @s: tcg context
564
 * @i1: first move description
565
 * @i2: second move description
566
 * @i3: third move description
567
 * @scratch: temporary register, or -1 for none
568
 *
569
 * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
570
 * between the sources and destinations.
571
 */
572

573
static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
574
                            const TCGMovExtend *i2, const TCGMovExtend *i3,
575
                            int scratch)
576
{
577
    TCGReg src1 = i1->src;
578
    TCGReg src2 = i2->src;
579
    TCGReg src3 = i3->src;
580

581
    if (i1->dst != src2 && i1->dst != src3) {
582
        tcg_out_movext1(s, i1);
583
        tcg_out_movext2(s, i2, i3, scratch);
584
        return;
585
    }
586
    if (i2->dst != src1 && i2->dst != src3) {
587
        tcg_out_movext1(s, i2);
588
        tcg_out_movext2(s, i1, i3, scratch);
589
        return;
590
    }
591
    if (i3->dst != src1 && i3->dst != src2) {
592
        tcg_out_movext1(s, i3);
593
        tcg_out_movext2(s, i1, i2, scratch);
594
        return;
595
    }
596

597
    /*
598
     * There is a cycle.  Since there are only 3 nodes, the cycle is
599
     * either "clockwise" or "anti-clockwise", and can be solved with
600
     * a single scratch or two xchg.
601
     */
602
    if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
603
        /* "Clockwise" */
604
        if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
605
            tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
606
            /* The data is now in the correct registers, now extend. */
607
            tcg_out_movext1_new_src(s, i1, i1->dst);
608
            tcg_out_movext1_new_src(s, i2, i2->dst);
609
            tcg_out_movext1_new_src(s, i3, i3->dst);
610
        } else {
611
            tcg_debug_assert(scratch >= 0);
612
            tcg_out_mov(s, i1->src_type, scratch, src1);
613
            tcg_out_movext1(s, i3);
614
            tcg_out_movext1(s, i2);
615
            tcg_out_movext1_new_src(s, i1, scratch);
616
        }
617
    } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
618
        /* "Anti-clockwise" */
619
        if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
620
            tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
621
            /* The data is now in the correct registers, now extend. */
622
            tcg_out_movext1_new_src(s, i1, i1->dst);
623
            tcg_out_movext1_new_src(s, i2, i2->dst);
624
            tcg_out_movext1_new_src(s, i3, i3->dst);
625
        } else {
626
            tcg_debug_assert(scratch >= 0);
627
            tcg_out_mov(s, i1->src_type, scratch, src1);
628
            tcg_out_movext1(s, i2);
629
            tcg_out_movext1(s, i3);
630
            tcg_out_movext1_new_src(s, i1, scratch);
631
        }
632
    } else {
633
        g_assert_not_reached();
634
    }
635
}
636

637
#define C_PFX1(P, A)                    P##A
638
#define C_PFX2(P, A, B)                 P##A##_##B
639
#define C_PFX3(P, A, B, C)              P##A##_##B##_##C
640
#define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
641
#define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
642
#define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
643

644
/* Define an enumeration for the various combinations. */
645

646
#define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
647
#define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
648
#define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
649
#define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
650

651
#define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
652
#define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
653
#define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
654
#define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
655

656
#define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
657
#define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
658
#define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
659

660
#define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
661
#define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
662
#define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
663
#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
664
#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
665

666
typedef enum {
667
#include "tcg-target-con-set.h"
668
} TCGConstraintSetIndex;
669

670
static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
671

672
#undef C_O0_I1
673
#undef C_O0_I2
674
#undef C_O0_I3
675
#undef C_O0_I4
676
#undef C_O1_I1
677
#undef C_O1_I2
678
#undef C_O1_I3
679
#undef C_O1_I4
680
#undef C_N1_I2
681
#undef C_N1O1_I1
682
#undef C_N2_I1
683
#undef C_O2_I1
684
#undef C_O2_I2
685
#undef C_O2_I3
686
#undef C_O2_I4
687
#undef C_N1_O1_I4
688

689
/* Put all of the constraint sets into an array, indexed by the enum. */
690

691
#define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
692
#define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
693
#define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
694
#define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
695

696
#define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
697
#define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
698
#define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
699
#define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
700

701
#define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
702
#define C_N1O1_I1(O1, O2, I1)           { .args_ct_str = { "&" #O1, #O2, #I1 } },
703
#define C_N2_I1(O1, O2, I1)             { .args_ct_str = { "&" #O1, "&" #O2, #I1 } },
704

705
#define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
706
#define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
707
#define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
708
#define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
709
#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
710

711
static const TCGTargetOpDef constraint_sets[] = {
712
#include "tcg-target-con-set.h"
713
};
714

715

716
#undef C_O0_I1
717
#undef C_O0_I2
718
#undef C_O0_I3
719
#undef C_O0_I4
720
#undef C_O1_I1
721
#undef C_O1_I2
722
#undef C_O1_I3
723
#undef C_O1_I4
724
#undef C_N1_I2
725
#undef C_N1O1_I1
726
#undef C_N2_I1
727
#undef C_O2_I1
728
#undef C_O2_I2
729
#undef C_O2_I3
730
#undef C_O2_I4
731
#undef C_N1_O1_I4
732

733
/* Expand the enumerator to be returned from tcg_target_op_def(). */
734

735
#define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
736
#define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
737
#define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
738
#define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
739

740
#define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
741
#define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
742
#define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
743
#define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
744

745
#define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
746
#define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
747
#define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
748

749
#define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
750
#define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
751
#define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
752
#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
753
#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
754

755
#include "tcg-target.c.inc"
756

757
#ifndef CONFIG_TCG_INTERPRETER
758
/* Validate CPUTLBDescFast placement. */
759
QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
760
                        sizeof(CPUNegativeOffsetState))
761
                  < MIN_TLB_MASK_TABLE_OFS);
762
#endif
763

764
/*
765
 * All TCG threads except the parent (i.e. the one that called tcg_context_init
766
 * and registered the target's TCG globals) must register with this function
767
 * before initiating translation.
768
 *
769
 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
770
 * of tcg_region_init() for the reasoning behind this.
771
 *
772
 * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
773
 * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
774
 * is not used anymore for translation once this function is called.
775
 *
776
 * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
777
 * iterates over the array (e.g. tcg_code_size() the same for both system/user
778
 * modes.
779
 */
780
#ifdef CONFIG_USER_ONLY
781
void tcg_register_thread(void)
782
{
783
    tcg_ctx = &tcg_init_ctx;
784
}
785
#else
786
void tcg_register_thread(void)
787
{
788
    TCGContext *s = g_malloc(sizeof(*s));
789
    unsigned int i, n;
790

791
    *s = tcg_init_ctx;
792

793
    /* Relink mem_base.  */
794
    for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
795
        if (tcg_init_ctx.temps[i].mem_base) {
796
            ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
797
            tcg_debug_assert(b >= 0 && b < n);
798
            s->temps[i].mem_base = &s->temps[b];
799
        }
800
    }
801

802
    /* Claim an entry in tcg_ctxs */
803
    n = qatomic_fetch_inc(&tcg_cur_ctxs);
804
    g_assert(n < tcg_max_ctxs);
805
    qatomic_set(&tcg_ctxs[n], s);
806

807
    if (n > 0) {
808
        tcg_region_initial_alloc(s);
809
    }
810

811
    tcg_ctx = s;
812
}
813
#endif /* !CONFIG_USER_ONLY */
814

815
/* pool based memory allocation */
816
void *tcg_malloc_internal(TCGContext *s, int size)
817
{
818
    TCGPool *p;
819
    int pool_size;
820

821
    if (size > TCG_POOL_CHUNK_SIZE) {
822
        /* big malloc: insert a new pool (XXX: could optimize) */
823
        p = g_malloc(sizeof(TCGPool) + size);
824
        p->size = size;
825
        p->next = s->pool_first_large;
826
        s->pool_first_large = p;
827
        return p->data;
828
    } else {
829
        p = s->pool_current;
830
        if (!p) {
831
            p = s->pool_first;
832
            if (!p)
833
                goto new_pool;
834
        } else {
835
            if (!p->next) {
836
            new_pool:
837
                pool_size = TCG_POOL_CHUNK_SIZE;
838
                p = g_malloc(sizeof(TCGPool) + pool_size);
839
                p->size = pool_size;
840
                p->next = NULL;
841
                if (s->pool_current) {
842
                    s->pool_current->next = p;
843
                } else {
844
                    s->pool_first = p;
845
                }
846
            } else {
847
                p = p->next;
848
            }
849
        }
850
    }
851
    s->pool_current = p;
852
    s->pool_cur = p->data + size;
853
    s->pool_end = p->data + p->size;
854
    return p->data;
855
}
856

857
void tcg_pool_reset(TCGContext *s)
858
{
859
    TCGPool *p, *t;
860
    for (p = s->pool_first_large; p; p = t) {
861
        t = p->next;
862
        g_free(p);
863
    }
864
    s->pool_first_large = NULL;
865
    s->pool_cur = s->pool_end = NULL;
866
    s->pool_current = NULL;
867
}
868

869
/*
870
 * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
871
 * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
872
 * We only use these for layout in tcg_out_ld_helper_ret and
873
 * tcg_out_st_helper_args, and share them between several of
874
 * the helpers, with the end result that it's easier to build manually.
875
 */
876

877
#if TCG_TARGET_REG_BITS == 32
878
# define dh_typecode_ttl  dh_typecode_i32
879
#else
880
# define dh_typecode_ttl  dh_typecode_i64
881
#endif
882

883
static TCGHelperInfo info_helper_ld32_mmu = {
884
    .flags = TCG_CALL_NO_WG,
885
    .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
886
              | dh_typemask(env, 1)
887
              | dh_typemask(i64, 2)  /* uint64_t addr */
888
              | dh_typemask(i32, 3)  /* unsigned oi */
889
              | dh_typemask(ptr, 4)  /* uintptr_t ra */
890
};
891

892
static TCGHelperInfo info_helper_ld64_mmu = {
893
    .flags = TCG_CALL_NO_WG,
894
    .typemask = dh_typemask(i64, 0)  /* return uint64_t */
895
              | dh_typemask(env, 1)
896
              | dh_typemask(i64, 2)  /* uint64_t addr */
897
              | dh_typemask(i32, 3)  /* unsigned oi */
898
              | dh_typemask(ptr, 4)  /* uintptr_t ra */
899
};
900

901
static TCGHelperInfo info_helper_ld128_mmu = {
902
    .flags = TCG_CALL_NO_WG,
903
    .typemask = dh_typemask(i128, 0) /* return Int128 */
904
              | dh_typemask(env, 1)
905
              | dh_typemask(i64, 2)  /* uint64_t addr */
906
              | dh_typemask(i32, 3)  /* unsigned oi */
907
              | dh_typemask(ptr, 4)  /* uintptr_t ra */
908
};
909

910
static TCGHelperInfo info_helper_st32_mmu = {
911
    .flags = TCG_CALL_NO_WG,
912
    .typemask = dh_typemask(void, 0)
913
              | dh_typemask(env, 1)
914
              | dh_typemask(i64, 2)  /* uint64_t addr */
915
              | dh_typemask(i32, 3)  /* uint32_t data */
916
              | dh_typemask(i32, 4)  /* unsigned oi */
917
              | dh_typemask(ptr, 5)  /* uintptr_t ra */
918
};
919

920
static TCGHelperInfo info_helper_st64_mmu = {
921
    .flags = TCG_CALL_NO_WG,
922
    .typemask = dh_typemask(void, 0)
923
              | dh_typemask(env, 1)
924
              | dh_typemask(i64, 2)  /* uint64_t addr */
925
              | dh_typemask(i64, 3)  /* uint64_t data */
926
              | dh_typemask(i32, 4)  /* unsigned oi */
927
              | dh_typemask(ptr, 5)  /* uintptr_t ra */
928
};
929

930
static TCGHelperInfo info_helper_st128_mmu = {
931
    .flags = TCG_CALL_NO_WG,
932
    .typemask = dh_typemask(void, 0)
933
              | dh_typemask(env, 1)
934
              | dh_typemask(i64, 2)  /* uint64_t addr */
935
              | dh_typemask(i128, 3) /* Int128 data */
936
              | dh_typemask(i32, 4)  /* unsigned oi */
937
              | dh_typemask(ptr, 5)  /* uintptr_t ra */
938
};
939

940
#ifdef CONFIG_TCG_INTERPRETER
941
static ffi_type *typecode_to_ffi(int argmask)
942
{
943
    /*
944
     * libffi does not support __int128_t, so we have forced Int128
945
     * to use the structure definition instead of the builtin type.
946
     */
947
    static ffi_type *ffi_type_i128_elements[3] = {
948
        &ffi_type_uint64,
949
        &ffi_type_uint64,
950
        NULL
951
    };
952
    static ffi_type ffi_type_i128 = {
953
        .size = 16,
954
        .alignment = __alignof__(Int128),
955
        .type = FFI_TYPE_STRUCT,
956
        .elements = ffi_type_i128_elements,
957
    };
958

959
    switch (argmask) {
960
    case dh_typecode_void:
961
        return &ffi_type_void;
962
    case dh_typecode_i32:
963
        return &ffi_type_uint32;
964
    case dh_typecode_s32:
965
        return &ffi_type_sint32;
966
    case dh_typecode_i64:
967
        return &ffi_type_uint64;
968
    case dh_typecode_s64:
969
        return &ffi_type_sint64;
970
    case dh_typecode_ptr:
971
        return &ffi_type_pointer;
972
    case dh_typecode_i128:
973
        return &ffi_type_i128;
974
    }
975
    g_assert_not_reached();
976
}
977

978
static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
979
{
980
    unsigned typemask = info->typemask;
981
    struct {
982
        ffi_cif cif;
983
        ffi_type *args[];
984
    } *ca;
985
    ffi_status status;
986
    int nargs;
987

988
    /* Ignoring the return type, find the last non-zero field. */
989
    nargs = 32 - clz32(typemask >> 3);
990
    nargs = DIV_ROUND_UP(nargs, 3);
991
    assert(nargs <= MAX_CALL_IARGS);
992

993
    ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
994
    ca->cif.rtype = typecode_to_ffi(typemask & 7);
995
    ca->cif.nargs = nargs;
996

997
    if (nargs != 0) {
998
        ca->cif.arg_types = ca->args;
999
        for (int j = 0; j < nargs; ++j) {
1000
            int typecode = extract32(typemask, (j + 1) * 3, 3);
1001
            ca->args[j] = typecode_to_ffi(typecode);
1002
        }
1003
    }
1004

1005
    status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1006
                          ca->cif.rtype, ca->cif.arg_types);
1007
    assert(status == FFI_OK);
1008

1009
    return &ca->cif;
1010
}
1011

1012
#define HELPER_INFO_INIT(I)      (&(I)->cif)
1013
#define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1014
#else
1015
#define HELPER_INFO_INIT(I)      (&(I)->init)
1016
#define HELPER_INFO_INIT_VAL(I)  1
1017
#endif /* CONFIG_TCG_INTERPRETER */
1018

1019
static inline bool arg_slot_reg_p(unsigned arg_slot)
1020
{
1021
    /*
1022
     * Split the sizeof away from the comparison to avoid Werror from
1023
     * "unsigned < 0 is always false", when iarg_regs is empty.
1024
     */
1025
    unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1026
    return arg_slot < nreg;
1027
}
1028

1029
static inline int arg_slot_stk_ofs(unsigned arg_slot)
1030
{
1031
    unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1032
    unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1033

1034
    tcg_debug_assert(stk_slot < max);
1035
    return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1036
}
1037

1038
typedef struct TCGCumulativeArgs {
1039
    int arg_idx;                /* tcg_gen_callN args[] */
1040
    int info_in_idx;            /* TCGHelperInfo in[] */
1041
    int arg_slot;               /* regs+stack slot */
1042
    int ref_slot;               /* stack slots for references */
1043
} TCGCumulativeArgs;
1044

1045
static void layout_arg_even(TCGCumulativeArgs *cum)
1046
{
1047
    cum->arg_slot += cum->arg_slot & 1;
1048
}
1049

1050
static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1051
                         TCGCallArgumentKind kind)
1052
{
1053
    TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1054

1055
    *loc = (TCGCallArgumentLoc){
1056
        .kind = kind,
1057
        .arg_idx = cum->arg_idx,
1058
        .arg_slot = cum->arg_slot,
1059
    };
1060
    cum->info_in_idx++;
1061
    cum->arg_slot++;
1062
}
1063

1064
static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1065
                                TCGHelperInfo *info, int n)
1066
{
1067
    TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1068

1069
    for (int i = 0; i < n; ++i) {
1070
        /* Layout all using the same arg_idx, adjusting the subindex. */
1071
        loc[i] = (TCGCallArgumentLoc){
1072
            .kind = TCG_CALL_ARG_NORMAL,
1073
            .arg_idx = cum->arg_idx,
1074
            .tmp_subindex = i,
1075
            .arg_slot = cum->arg_slot + i,
1076
        };
1077
    }
1078
    cum->info_in_idx += n;
1079
    cum->arg_slot += n;
1080
}
1081

1082
static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1083
{
1084
    TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1085
    int n = 128 / TCG_TARGET_REG_BITS;
1086

1087
    /* The first subindex carries the pointer. */
1088
    layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1089

1090
    /*
1091
     * The callee is allowed to clobber memory associated with
1092
     * structure pass by-reference.  Therefore we must make copies.
1093
     * Allocate space from "ref_slot", which will be adjusted to
1094
     * follow the parameters on the stack.
1095
     */
1096
    loc[0].ref_slot = cum->ref_slot;
1097

1098
    /*
1099
     * Subsequent words also go into the reference slot, but
1100
     * do not accumulate into the regular arguments.
1101
     */
1102
    for (int i = 1; i < n; ++i) {
1103
        loc[i] = (TCGCallArgumentLoc){
1104
            .kind = TCG_CALL_ARG_BY_REF_N,
1105
            .arg_idx = cum->arg_idx,
1106
            .tmp_subindex = i,
1107
            .ref_slot = cum->ref_slot + i,
1108
        };
1109
    }
1110
    cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1111
    cum->ref_slot += n;
1112
}
1113

1114
static void init_call_layout(TCGHelperInfo *info)
1115
{
1116
    int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1117
    int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1118
    unsigned typemask = info->typemask;
1119
    unsigned typecode;
1120
    TCGCumulativeArgs cum = { };
1121

1122
    /*
1123
     * Parse and place any function return value.
1124
     */
1125
    typecode = typemask & 7;
1126
    switch (typecode) {
1127
    case dh_typecode_void:
1128
        info->nr_out = 0;
1129
        break;
1130
    case dh_typecode_i32:
1131
    case dh_typecode_s32:
1132
    case dh_typecode_ptr:
1133
        info->nr_out = 1;
1134
        info->out_kind = TCG_CALL_RET_NORMAL;
1135
        break;
1136
    case dh_typecode_i64:
1137
    case dh_typecode_s64:
1138
        info->nr_out = 64 / TCG_TARGET_REG_BITS;
1139
        info->out_kind = TCG_CALL_RET_NORMAL;
1140
        /* Query the last register now to trigger any assert early. */
1141
        tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1142
        break;
1143
    case dh_typecode_i128:
1144
        info->nr_out = 128 / TCG_TARGET_REG_BITS;
1145
        info->out_kind = TCG_TARGET_CALL_RET_I128;
1146
        switch (TCG_TARGET_CALL_RET_I128) {
1147
        case TCG_CALL_RET_NORMAL:
1148
            /* Query the last register now to trigger any assert early. */
1149
            tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1150
            break;
1151
        case TCG_CALL_RET_BY_VEC:
1152
            /* Query the single register now to trigger any assert early. */
1153
            tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1154
            break;
1155
        case TCG_CALL_RET_BY_REF:
1156
            /*
1157
             * Allocate the first argument to the output.
1158
             * We don't need to store this anywhere, just make it
1159
             * unavailable for use in the input loop below.
1160
             */
1161
            cum.arg_slot = 1;
1162
            break;
1163
        default:
1164
            qemu_build_not_reached();
1165
        }
1166
        break;
1167
    default:
1168
        g_assert_not_reached();
1169
    }
1170

1171
    /*
1172
     * Parse and place function arguments.
1173
     */
1174
    for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1175
        TCGCallArgumentKind kind;
1176
        TCGType type;
1177

1178
        typecode = typemask & 7;
1179
        switch (typecode) {
1180
        case dh_typecode_i32:
1181
        case dh_typecode_s32:
1182
            type = TCG_TYPE_I32;
1183
            break;
1184
        case dh_typecode_i64:
1185
        case dh_typecode_s64:
1186
            type = TCG_TYPE_I64;
1187
            break;
1188
        case dh_typecode_ptr:
1189
            type = TCG_TYPE_PTR;
1190
            break;
1191
        case dh_typecode_i128:
1192
            type = TCG_TYPE_I128;
1193
            break;
1194
        default:
1195
            g_assert_not_reached();
1196
        }
1197

1198
        switch (type) {
1199
        case TCG_TYPE_I32:
1200
            switch (TCG_TARGET_CALL_ARG_I32) {
1201
            case TCG_CALL_ARG_EVEN:
1202
                layout_arg_even(&cum);
1203
                /* fall through */
1204
            case TCG_CALL_ARG_NORMAL:
1205
                layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1206
                break;
1207
            case TCG_CALL_ARG_EXTEND:
1208
                kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1209
                layout_arg_1(&cum, info, kind);
1210
                break;
1211
            default:
1212
                qemu_build_not_reached();
1213
            }
1214
            break;
1215

1216
        case TCG_TYPE_I64:
1217
            switch (TCG_TARGET_CALL_ARG_I64) {
1218
            case TCG_CALL_ARG_EVEN:
1219
                layout_arg_even(&cum);
1220
                /* fall through */
1221
            case TCG_CALL_ARG_NORMAL:
1222
                if (TCG_TARGET_REG_BITS == 32) {
1223
                    layout_arg_normal_n(&cum, info, 2);
1224
                } else {
1225
                    layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1226
                }
1227
                break;
1228
            default:
1229
                qemu_build_not_reached();
1230
            }
1231
            break;
1232

1233
        case TCG_TYPE_I128:
1234
            switch (TCG_TARGET_CALL_ARG_I128) {
1235
            case TCG_CALL_ARG_EVEN:
1236
                layout_arg_even(&cum);
1237
                /* fall through */
1238
            case TCG_CALL_ARG_NORMAL:
1239
                layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1240
                break;
1241
            case TCG_CALL_ARG_BY_REF:
1242
                layout_arg_by_ref(&cum, info);
1243
                break;
1244
            default:
1245
                qemu_build_not_reached();
1246
            }
1247
            break;
1248

1249
        default:
1250
            g_assert_not_reached();
1251
        }
1252
    }
1253
    info->nr_in = cum.info_in_idx;
1254

1255
    /* Validate that we didn't overrun the input array. */
1256
    assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1257
    /* Validate the backend has enough argument space. */
1258
    assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1259

1260
    /*
1261
     * Relocate the "ref_slot" area to the end of the parameters.
1262
     * Minimizing this stack offset helps code size for x86,
1263
     * which has a signed 8-bit offset encoding.
1264
     */
1265
    if (cum.ref_slot != 0) {
1266
        int ref_base = 0;
1267

1268
        if (cum.arg_slot > max_reg_slots) {
1269
            int align = __alignof(Int128) / sizeof(tcg_target_long);
1270

1271
            ref_base = cum.arg_slot - max_reg_slots;
1272
            if (align > 1) {
1273
                ref_base = ROUND_UP(ref_base, align);
1274
            }
1275
        }
1276
        assert(ref_base + cum.ref_slot <= max_stk_slots);
1277
        ref_base += max_reg_slots;
1278

1279
        if (ref_base != 0) {
1280
            for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1281
                TCGCallArgumentLoc *loc = &info->in[i];
1282
                switch (loc->kind) {
1283
                case TCG_CALL_ARG_BY_REF:
1284
                case TCG_CALL_ARG_BY_REF_N:
1285
                    loc->ref_slot += ref_base;
1286
                    break;
1287
                default:
1288
                    break;
1289
                }
1290
            }
1291
        }
1292
    }
1293
}
1294

1295
static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1296
static void process_op_defs(TCGContext *s);
1297
static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1298
                                            TCGReg reg, const char *name);
1299

1300
static void tcg_context_init(unsigned max_cpus)
1301
{
1302
    TCGContext *s = &tcg_init_ctx;
1303
    int op, total_args, n, i;
1304
    TCGOpDef *def;
1305
    TCGArgConstraint *args_ct;
1306
    TCGTemp *ts;
1307

1308
    memset(s, 0, sizeof(*s));
1309
    s->nb_globals = 0;
1310

1311
    /* Count total number of arguments and allocate the corresponding
1312
       space */
1313
    total_args = 0;
1314
    for(op = 0; op < NB_OPS; op++) {
1315
        def = &tcg_op_defs[op];
1316
        n = def->nb_iargs + def->nb_oargs;
1317
        total_args += n;
1318
    }
1319

1320
    args_ct = g_new0(TCGArgConstraint, total_args);
1321

1322
    for(op = 0; op < NB_OPS; op++) {
1323
        def = &tcg_op_defs[op];
1324
        def->args_ct = args_ct;
1325
        n = def->nb_iargs + def->nb_oargs;
1326
        args_ct += n;
1327
    }
1328

1329
    init_call_layout(&info_helper_ld32_mmu);
1330
    init_call_layout(&info_helper_ld64_mmu);
1331
    init_call_layout(&info_helper_ld128_mmu);
1332
    init_call_layout(&info_helper_st32_mmu);
1333
    init_call_layout(&info_helper_st64_mmu);
1334
    init_call_layout(&info_helper_st128_mmu);
1335

1336
    tcg_target_init(s);
1337
    process_op_defs(s);
1338

1339
    /* Reverse the order of the saved registers, assuming they're all at
1340
       the start of tcg_target_reg_alloc_order.  */
1341
    for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1342
        int r = tcg_target_reg_alloc_order[n];
1343
        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1344
            break;
1345
        }
1346
    }
1347
    for (i = 0; i < n; ++i) {
1348
        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1349
    }
1350
    for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1351
        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1352
    }
1353

1354
    tcg_ctx = s;
1355
    /*
1356
     * In user-mode we simply share the init context among threads, since we
1357
     * use a single region. See the documentation tcg_region_init() for the
1358
     * reasoning behind this.
1359
     * In system-mode we will have at most max_cpus TCG threads.
1360
     */
1361
#ifdef CONFIG_USER_ONLY
1362
    tcg_ctxs = &tcg_ctx;
1363
    tcg_cur_ctxs = 1;
1364
    tcg_max_ctxs = 1;
1365
#else
1366
    tcg_max_ctxs = max_cpus;
1367
    tcg_ctxs = g_new0(TCGContext *, max_cpus);
1368
#endif
1369

1370
    tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1371
    ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1372
    tcg_env = temp_tcgv_ptr(ts);
1373
}
1374

1375
void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1376
{
1377
    tcg_context_init(max_cpus);
1378
    tcg_region_init(tb_size, splitwx, max_cpus);
1379
}
1380

1381
/*
1382
 * Allocate TBs right before their corresponding translated code, making
1383
 * sure that TBs and code are on different cache lines.
1384
 */
1385
TranslationBlock *tcg_tb_alloc(TCGContext *s)
1386
{
1387
    uintptr_t align = qemu_icache_linesize;
1388
    TranslationBlock *tb;
1389
    void *next;
1390

1391
 retry:
1392
    tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1393
    next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1394

1395
    if (unlikely(next > s->code_gen_highwater)) {
1396
        if (tcg_region_alloc(s)) {
1397
            return NULL;
1398
        }
1399
        goto retry;
1400
    }
1401
    qatomic_set(&s->code_gen_ptr, next);
1402
    s->data_gen_ptr = NULL;
1403
    return tb;
1404
}
1405

1406
void tcg_prologue_init(void)
1407
{
1408
    TCGContext *s = tcg_ctx;
1409
    size_t prologue_size;
1410

1411
    s->code_ptr = s->code_gen_ptr;
1412
    s->code_buf = s->code_gen_ptr;
1413
    s->data_gen_ptr = NULL;
1414

1415
#ifndef CONFIG_TCG_INTERPRETER
1416
    tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1417
#endif
1418

1419
#ifdef TCG_TARGET_NEED_POOL_LABELS
1420
    s->pool_labels = NULL;
1421
#endif
1422

1423
    qemu_thread_jit_write();
1424
    /* Generate the prologue.  */
1425
    tcg_target_qemu_prologue(s);
1426

1427
#ifdef TCG_TARGET_NEED_POOL_LABELS
1428
    /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1429
    {
1430
        int result = tcg_out_pool_finalize(s);
1431
        tcg_debug_assert(result == 0);
1432
    }
1433
#endif
1434

1435
    prologue_size = tcg_current_code_size(s);
1436
    perf_report_prologue(s->code_gen_ptr, prologue_size);
1437

1438
#ifndef CONFIG_TCG_INTERPRETER
1439
    flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1440
                        (uintptr_t)s->code_buf, prologue_size);
1441
#endif
1442

1443
    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1444
        FILE *logfile = qemu_log_trylock();
1445
        if (logfile) {
1446
            fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1447
            if (s->data_gen_ptr) {
1448
                size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1449
                size_t data_size = prologue_size - code_size;
1450
                size_t i;
1451

1452
                disas(logfile, s->code_gen_ptr, code_size);
1453

1454
                for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1455
                    if (sizeof(tcg_target_ulong) == 8) {
1456
                        fprintf(logfile,
1457
                                "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1458
                                (uintptr_t)s->data_gen_ptr + i,
1459
                                *(uint64_t *)(s->data_gen_ptr + i));
1460
                    } else {
1461
                        fprintf(logfile,
1462
                                "0x%08" PRIxPTR ":  .long  0x%08x\n",
1463
                                (uintptr_t)s->data_gen_ptr + i,
1464
                                *(uint32_t *)(s->data_gen_ptr + i));
1465
                    }
1466
                }
1467
            } else {
1468
                disas(logfile, s->code_gen_ptr, prologue_size);
1469
            }
1470
            fprintf(logfile, "\n");
1471
            qemu_log_unlock(logfile);
1472
        }
1473
    }
1474

1475
#ifndef CONFIG_TCG_INTERPRETER
1476
    /*
1477
     * Assert that goto_ptr is implemented completely, setting an epilogue.
1478
     * For tci, we use NULL as the signal to return from the interpreter,
1479
     * so skip this check.
1480
     */
1481
    tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1482
#endif
1483

1484
    tcg_region_prologue_set(s);
1485
}
1486

1487
void tcg_func_start(TCGContext *s)
1488
{
1489
    tcg_pool_reset(s);
1490
    s->nb_temps = s->nb_globals;
1491

1492
    /* No temps have been previously allocated for size or locality.  */
1493
    memset(s->free_temps, 0, sizeof(s->free_temps));
1494

1495
    /* No constant temps have been previously allocated. */
1496
    for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1497
        if (s->const_table[i]) {
1498
            g_hash_table_remove_all(s->const_table[i]);
1499
        }
1500
    }
1501

1502
    s->nb_ops = 0;
1503
    s->nb_labels = 0;
1504
    s->current_frame_offset = s->frame_start;
1505

1506
#ifdef CONFIG_DEBUG_TCG
1507
    s->goto_tb_issue_mask = 0;
1508
#endif
1509

1510
    QTAILQ_INIT(&s->ops);
1511
    QTAILQ_INIT(&s->free_ops);
1512
    s->emit_before_op = NULL;
1513
    QSIMPLEQ_INIT(&s->labels);
1514

1515
    tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1516
                     s->addr_type == TCG_TYPE_I64);
1517

1518
    tcg_debug_assert(s->insn_start_words > 0);
1519
}
1520

1521
static TCGTemp *tcg_temp_alloc(TCGContext *s)
1522
{
1523
    int n = s->nb_temps++;
1524

1525
    if (n >= TCG_MAX_TEMPS) {
1526
        tcg_raise_tb_overflow(s);
1527
    }
1528
    return memset(&s->temps[n], 0, sizeof(TCGTemp));
1529
}
1530

1531
static TCGTemp *tcg_global_alloc(TCGContext *s)
1532
{
1533
    TCGTemp *ts;
1534

1535
    tcg_debug_assert(s->nb_globals == s->nb_temps);
1536
    tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1537
    s->nb_globals++;
1538
    ts = tcg_temp_alloc(s);
1539
    ts->kind = TEMP_GLOBAL;
1540

1541
    return ts;
1542
}
1543

1544
static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1545
                                            TCGReg reg, const char *name)
1546
{
1547
    TCGTemp *ts;
1548

1549
    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1550

1551
    ts = tcg_global_alloc(s);
1552
    ts->base_type = type;
1553
    ts->type = type;
1554
    ts->kind = TEMP_FIXED;
1555
    ts->reg = reg;
1556
    ts->name = name;
1557
    tcg_regset_set_reg(s->reserved_regs, reg);
1558

1559
    return ts;
1560
}
1561

1562
void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1563
{
1564
    s->frame_start = start;
1565
    s->frame_end = start + size;
1566
    s->frame_temp
1567
        = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1568
}
1569

1570
static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1571
                                            const char *name, TCGType type)
1572
{
1573
    TCGContext *s = tcg_ctx;
1574
    TCGTemp *base_ts = tcgv_ptr_temp(base);
1575
    TCGTemp *ts = tcg_global_alloc(s);
1576
    int indirect_reg = 0;
1577

1578
    switch (base_ts->kind) {
1579
    case TEMP_FIXED:
1580
        break;
1581
    case TEMP_GLOBAL:
1582
        /* We do not support double-indirect registers.  */
1583
        tcg_debug_assert(!base_ts->indirect_reg);
1584
        base_ts->indirect_base = 1;
1585
        s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1586
                            ? 2 : 1);
1587
        indirect_reg = 1;
1588
        break;
1589
    default:
1590
        g_assert_not_reached();
1591
    }
1592

1593
    if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1594
        TCGTemp *ts2 = tcg_global_alloc(s);
1595
        char buf[64];
1596

1597
        ts->base_type = TCG_TYPE_I64;
1598
        ts->type = TCG_TYPE_I32;
1599
        ts->indirect_reg = indirect_reg;
1600
        ts->mem_allocated = 1;
1601
        ts->mem_base = base_ts;
1602
        ts->mem_offset = offset;
1603
        pstrcpy(buf, sizeof(buf), name);
1604
        pstrcat(buf, sizeof(buf), "_0");
1605
        ts->name = strdup(buf);
1606

1607
        tcg_debug_assert(ts2 == ts + 1);
1608
        ts2->base_type = TCG_TYPE_I64;
1609
        ts2->type = TCG_TYPE_I32;
1610
        ts2->indirect_reg = indirect_reg;
1611
        ts2->mem_allocated = 1;
1612
        ts2->mem_base = base_ts;
1613
        ts2->mem_offset = offset + 4;
1614
        ts2->temp_subindex = 1;
1615
        pstrcpy(buf, sizeof(buf), name);
1616
        pstrcat(buf, sizeof(buf), "_1");
1617
        ts2->name = strdup(buf);
1618
    } else {
1619
        ts->base_type = type;
1620
        ts->type = type;
1621
        ts->indirect_reg = indirect_reg;
1622
        ts->mem_allocated = 1;
1623
        ts->mem_base = base_ts;
1624
        ts->mem_offset = offset;
1625
        ts->name = name;
1626
    }
1627
    return ts;
1628
}
1629

1630
TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1631
{
1632
    TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1633
    return temp_tcgv_i32(ts);
1634
}
1635

1636
TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1637
{
1638
    TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1639
    return temp_tcgv_i64(ts);
1640
}
1641

1642
TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1643
{
1644
    TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1645
    return temp_tcgv_ptr(ts);
1646
}
1647

1648
TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1649
{
1650
    TCGContext *s = tcg_ctx;
1651
    TCGTemp *ts;
1652
    int n;
1653

1654
    if (kind == TEMP_EBB) {
1655
        int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1656

1657
        if (idx < TCG_MAX_TEMPS) {
1658
            /* There is already an available temp with the right type.  */
1659
            clear_bit(idx, s->free_temps[type].l);
1660

1661
            ts = &s->temps[idx];
1662
            ts->temp_allocated = 1;
1663
            tcg_debug_assert(ts->base_type == type);
1664
            tcg_debug_assert(ts->kind == kind);
1665
            return ts;
1666
        }
1667
    } else {
1668
        tcg_debug_assert(kind == TEMP_TB);
1669
    }
1670

1671
    switch (type) {
1672
    case TCG_TYPE_I32:
1673
    case TCG_TYPE_V64:
1674
    case TCG_TYPE_V128:
1675
    case TCG_TYPE_V256:
1676
        n = 1;
1677
        break;
1678
    case TCG_TYPE_I64:
1679
        n = 64 / TCG_TARGET_REG_BITS;
1680
        break;
1681
    case TCG_TYPE_I128:
1682
        n = 128 / TCG_TARGET_REG_BITS;
1683
        break;
1684
    default:
1685
        g_assert_not_reached();
1686
    }
1687

1688
    ts = tcg_temp_alloc(s);
1689
    ts->base_type = type;
1690
    ts->temp_allocated = 1;
1691
    ts->kind = kind;
1692

1693
    if (n == 1) {
1694
        ts->type = type;
1695
    } else {
1696
        ts->type = TCG_TYPE_REG;
1697

1698
        for (int i = 1; i < n; ++i) {
1699
            TCGTemp *ts2 = tcg_temp_alloc(s);
1700

1701
            tcg_debug_assert(ts2 == ts + i);
1702
            ts2->base_type = type;
1703
            ts2->type = TCG_TYPE_REG;
1704
            ts2->temp_allocated = 1;
1705
            ts2->temp_subindex = i;
1706
            ts2->kind = kind;
1707
        }
1708
    }
1709
    return ts;
1710
}
1711

1712
TCGv_i32 tcg_temp_new_i32(void)
1713
{
1714
    return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1715
}
1716

1717
TCGv_i32 tcg_temp_ebb_new_i32(void)
1718
{
1719
    return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1720
}
1721

1722
TCGv_i64 tcg_temp_new_i64(void)
1723
{
1724
    return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1725
}
1726

1727
TCGv_i64 tcg_temp_ebb_new_i64(void)
1728
{
1729
    return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1730
}
1731

1732
TCGv_ptr tcg_temp_new_ptr(void)
1733
{
1734
    return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1735
}
1736

1737
TCGv_ptr tcg_temp_ebb_new_ptr(void)
1738
{
1739
    return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1740
}
1741

1742
TCGv_i128 tcg_temp_new_i128(void)
1743
{
1744
    return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1745
}
1746

1747
TCGv_i128 tcg_temp_ebb_new_i128(void)
1748
{
1749
    return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1750
}
1751

1752
TCGv_vec tcg_temp_new_vec(TCGType type)
1753
{
1754
    TCGTemp *t;
1755

1756
#ifdef CONFIG_DEBUG_TCG
1757
    switch (type) {
1758
    case TCG_TYPE_V64:
1759
        assert(TCG_TARGET_HAS_v64);
1760
        break;
1761
    case TCG_TYPE_V128:
1762
        assert(TCG_TARGET_HAS_v128);
1763
        break;
1764
    case TCG_TYPE_V256:
1765
        assert(TCG_TARGET_HAS_v256);
1766
        break;
1767
    default:
1768
        g_assert_not_reached();
1769
    }
1770
#endif
1771

1772
    t = tcg_temp_new_internal(type, TEMP_EBB);
1773
    return temp_tcgv_vec(t);
1774
}
1775

1776
/* Create a new temp of the same type as an existing temp.  */
1777
TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1778
{
1779
    TCGTemp *t = tcgv_vec_temp(match);
1780

1781
    tcg_debug_assert(t->temp_allocated != 0);
1782

1783
    t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1784
    return temp_tcgv_vec(t);
1785
}
1786

1787
void tcg_temp_free_internal(TCGTemp *ts)
1788
{
1789
    TCGContext *s = tcg_ctx;
1790

1791
    switch (ts->kind) {
1792
    case TEMP_CONST:
1793
    case TEMP_TB:
1794
        /* Silently ignore free. */
1795
        break;
1796
    case TEMP_EBB:
1797
        tcg_debug_assert(ts->temp_allocated != 0);
1798
        ts->temp_allocated = 0;
1799
        set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1800
        break;
1801
    default:
1802
        /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1803
        g_assert_not_reached();
1804
    }
1805
}
1806

1807
void tcg_temp_free_i32(TCGv_i32 arg)
1808
{
1809
    tcg_temp_free_internal(tcgv_i32_temp(arg));
1810
}
1811

1812
void tcg_temp_free_i64(TCGv_i64 arg)
1813
{
1814
    tcg_temp_free_internal(tcgv_i64_temp(arg));
1815
}
1816

1817
void tcg_temp_free_i128(TCGv_i128 arg)
1818
{
1819
    tcg_temp_free_internal(tcgv_i128_temp(arg));
1820
}
1821

1822
void tcg_temp_free_ptr(TCGv_ptr arg)
1823
{
1824
    tcg_temp_free_internal(tcgv_ptr_temp(arg));
1825
}
1826

1827
void tcg_temp_free_vec(TCGv_vec arg)
1828
{
1829
    tcg_temp_free_internal(tcgv_vec_temp(arg));
1830
}
1831

1832
TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1833
{
1834
    TCGContext *s = tcg_ctx;
1835
    GHashTable *h = s->const_table[type];
1836
    TCGTemp *ts;
1837

1838
    if (h == NULL) {
1839
        h = g_hash_table_new(g_int64_hash, g_int64_equal);
1840
        s->const_table[type] = h;
1841
    }
1842

1843
    ts = g_hash_table_lookup(h, &val);
1844
    if (ts == NULL) {
1845
        int64_t *val_ptr;
1846

1847
        ts = tcg_temp_alloc(s);
1848

1849
        if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1850
            TCGTemp *ts2 = tcg_temp_alloc(s);
1851

1852
            tcg_debug_assert(ts2 == ts + 1);
1853

1854
            ts->base_type = TCG_TYPE_I64;
1855
            ts->type = TCG_TYPE_I32;
1856
            ts->kind = TEMP_CONST;
1857
            ts->temp_allocated = 1;
1858

1859
            ts2->base_type = TCG_TYPE_I64;
1860
            ts2->type = TCG_TYPE_I32;
1861
            ts2->kind = TEMP_CONST;
1862
            ts2->temp_allocated = 1;
1863
            ts2->temp_subindex = 1;
1864

1865
            /*
1866
             * Retain the full value of the 64-bit constant in the low
1867
             * part, so that the hash table works.  Actual uses will
1868
             * truncate the value to the low part.
1869
             */
1870
            ts[HOST_BIG_ENDIAN].val = val;
1871
            ts[!HOST_BIG_ENDIAN].val = val >> 32;
1872
            val_ptr = &ts[HOST_BIG_ENDIAN].val;
1873
        } else {
1874
            ts->base_type = type;
1875
            ts->type = type;
1876
            ts->kind = TEMP_CONST;
1877
            ts->temp_allocated = 1;
1878
            ts->val = val;
1879
            val_ptr = &ts->val;
1880
        }
1881
        g_hash_table_insert(h, val_ptr, ts);
1882
    }
1883

1884
    return ts;
1885
}
1886

1887
TCGv_i32 tcg_constant_i32(int32_t val)
1888
{
1889
    return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
1890
}
1891

1892
TCGv_i64 tcg_constant_i64(int64_t val)
1893
{
1894
    return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
1895
}
1896

1897
TCGv_ptr tcg_constant_ptr_int(intptr_t val)
1898
{
1899
    return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
1900
}
1901

1902
TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1903
{
1904
    val = dup_const(vece, val);
1905
    return temp_tcgv_vec(tcg_constant_internal(type, val));
1906
}
1907

1908
TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1909
{
1910
    TCGTemp *t = tcgv_vec_temp(match);
1911

1912
    tcg_debug_assert(t->temp_allocated != 0);
1913
    return tcg_constant_vec(t->base_type, vece, val);
1914
}
1915

1916
#ifdef CONFIG_DEBUG_TCG
1917
size_t temp_idx(TCGTemp *ts)
1918
{
1919
    ptrdiff_t n = ts - tcg_ctx->temps;
1920
    assert(n >= 0 && n < tcg_ctx->nb_temps);
1921
    return n;
1922
}
1923

1924
TCGTemp *tcgv_i32_temp(TCGv_i32 v)
1925
{
1926
    uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
1927

1928
    assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
1929
    assert(o % sizeof(TCGTemp) == 0);
1930

1931
    return (void *)tcg_ctx + (uintptr_t)v;
1932
}
1933
#endif /* CONFIG_DEBUG_TCG */
1934

1935
/* Return true if OP may appear in the opcode stream.
1936
   Test the runtime variable that controls each opcode.  */
1937
bool tcg_op_supported(TCGOpcode op)
1938
{
1939
    const bool have_vec
1940
        = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1941

1942
    switch (op) {
1943
    case INDEX_op_discard:
1944
    case INDEX_op_set_label:
1945
    case INDEX_op_call:
1946
    case INDEX_op_br:
1947
    case INDEX_op_mb:
1948
    case INDEX_op_insn_start:
1949
    case INDEX_op_exit_tb:
1950
    case INDEX_op_goto_tb:
1951
    case INDEX_op_goto_ptr:
1952
    case INDEX_op_qemu_ld_a32_i32:
1953
    case INDEX_op_qemu_ld_a64_i32:
1954
    case INDEX_op_qemu_st_a32_i32:
1955
    case INDEX_op_qemu_st_a64_i32:
1956
    case INDEX_op_qemu_ld_a32_i64:
1957
    case INDEX_op_qemu_ld_a64_i64:
1958
    case INDEX_op_qemu_st_a32_i64:
1959
    case INDEX_op_qemu_st_a64_i64:
1960
        return true;
1961

1962
    case INDEX_op_qemu_st8_a32_i32:
1963
    case INDEX_op_qemu_st8_a64_i32:
1964
        return TCG_TARGET_HAS_qemu_st8_i32;
1965

1966
    case INDEX_op_qemu_ld_a32_i128:
1967
    case INDEX_op_qemu_ld_a64_i128:
1968
    case INDEX_op_qemu_st_a32_i128:
1969
    case INDEX_op_qemu_st_a64_i128:
1970
        return TCG_TARGET_HAS_qemu_ldst_i128;
1971

1972
    case INDEX_op_mov_i32:
1973
    case INDEX_op_setcond_i32:
1974
    case INDEX_op_brcond_i32:
1975
    case INDEX_op_movcond_i32:
1976
    case INDEX_op_ld8u_i32:
1977
    case INDEX_op_ld8s_i32:
1978
    case INDEX_op_ld16u_i32:
1979
    case INDEX_op_ld16s_i32:
1980
    case INDEX_op_ld_i32:
1981
    case INDEX_op_st8_i32:
1982
    case INDEX_op_st16_i32:
1983
    case INDEX_op_st_i32:
1984
    case INDEX_op_add_i32:
1985
    case INDEX_op_sub_i32:
1986
    case INDEX_op_neg_i32:
1987
    case INDEX_op_mul_i32:
1988
    case INDEX_op_and_i32:
1989
    case INDEX_op_or_i32:
1990
    case INDEX_op_xor_i32:
1991
    case INDEX_op_shl_i32:
1992
    case INDEX_op_shr_i32:
1993
    case INDEX_op_sar_i32:
1994
        return true;
1995

1996
    case INDEX_op_negsetcond_i32:
1997
        return TCG_TARGET_HAS_negsetcond_i32;
1998
    case INDEX_op_div_i32:
1999
    case INDEX_op_divu_i32:
2000
        return TCG_TARGET_HAS_div_i32;
2001
    case INDEX_op_rem_i32:
2002
    case INDEX_op_remu_i32:
2003
        return TCG_TARGET_HAS_rem_i32;
2004
    case INDEX_op_div2_i32:
2005
    case INDEX_op_divu2_i32:
2006
        return TCG_TARGET_HAS_div2_i32;
2007
    case INDEX_op_rotl_i32:
2008
    case INDEX_op_rotr_i32:
2009
        return TCG_TARGET_HAS_rot_i32;
2010
    case INDEX_op_deposit_i32:
2011
        return TCG_TARGET_HAS_deposit_i32;
2012
    case INDEX_op_extract_i32:
2013
        return TCG_TARGET_HAS_extract_i32;
2014
    case INDEX_op_sextract_i32:
2015
        return TCG_TARGET_HAS_sextract_i32;
2016
    case INDEX_op_extract2_i32:
2017
        return TCG_TARGET_HAS_extract2_i32;
2018
    case INDEX_op_add2_i32:
2019
        return TCG_TARGET_HAS_add2_i32;
2020
    case INDEX_op_sub2_i32:
2021
        return TCG_TARGET_HAS_sub2_i32;
2022
    case INDEX_op_mulu2_i32:
2023
        return TCG_TARGET_HAS_mulu2_i32;
2024
    case INDEX_op_muls2_i32:
2025
        return TCG_TARGET_HAS_muls2_i32;
2026
    case INDEX_op_muluh_i32:
2027
        return TCG_TARGET_HAS_muluh_i32;
2028
    case INDEX_op_mulsh_i32:
2029
        return TCG_TARGET_HAS_mulsh_i32;
2030
    case INDEX_op_ext8s_i32:
2031
        return TCG_TARGET_HAS_ext8s_i32;
2032
    case INDEX_op_ext16s_i32:
2033
        return TCG_TARGET_HAS_ext16s_i32;
2034
    case INDEX_op_ext8u_i32:
2035
        return TCG_TARGET_HAS_ext8u_i32;
2036
    case INDEX_op_ext16u_i32:
2037
        return TCG_TARGET_HAS_ext16u_i32;
2038
    case INDEX_op_bswap16_i32:
2039
        return TCG_TARGET_HAS_bswap16_i32;
2040
    case INDEX_op_bswap32_i32:
2041
        return TCG_TARGET_HAS_bswap32_i32;
2042
    case INDEX_op_not_i32:
2043
        return TCG_TARGET_HAS_not_i32;
2044
    case INDEX_op_andc_i32:
2045
        return TCG_TARGET_HAS_andc_i32;
2046
    case INDEX_op_orc_i32:
2047
        return TCG_TARGET_HAS_orc_i32;
2048
    case INDEX_op_eqv_i32:
2049
        return TCG_TARGET_HAS_eqv_i32;
2050
    case INDEX_op_nand_i32:
2051
        return TCG_TARGET_HAS_nand_i32;
2052
    case INDEX_op_nor_i32:
2053
        return TCG_TARGET_HAS_nor_i32;
2054
    case INDEX_op_clz_i32:
2055
        return TCG_TARGET_HAS_clz_i32;
2056
    case INDEX_op_ctz_i32:
2057
        return TCG_TARGET_HAS_ctz_i32;
2058
    case INDEX_op_ctpop_i32:
2059
        return TCG_TARGET_HAS_ctpop_i32;
2060

2061
    case INDEX_op_brcond2_i32:
2062
    case INDEX_op_setcond2_i32:
2063
        return TCG_TARGET_REG_BITS == 32;
2064

2065
    case INDEX_op_mov_i64:
2066
    case INDEX_op_setcond_i64:
2067
    case INDEX_op_brcond_i64:
2068
    case INDEX_op_movcond_i64:
2069
    case INDEX_op_ld8u_i64:
2070
    case INDEX_op_ld8s_i64:
2071
    case INDEX_op_ld16u_i64:
2072
    case INDEX_op_ld16s_i64:
2073
    case INDEX_op_ld32u_i64:
2074
    case INDEX_op_ld32s_i64:
2075
    case INDEX_op_ld_i64:
2076
    case INDEX_op_st8_i64:
2077
    case INDEX_op_st16_i64:
2078
    case INDEX_op_st32_i64:
2079
    case INDEX_op_st_i64:
2080
    case INDEX_op_add_i64:
2081
    case INDEX_op_sub_i64:
2082
    case INDEX_op_neg_i64:
2083
    case INDEX_op_mul_i64:
2084
    case INDEX_op_and_i64:
2085
    case INDEX_op_or_i64:
2086
    case INDEX_op_xor_i64:
2087
    case INDEX_op_shl_i64:
2088
    case INDEX_op_shr_i64:
2089
    case INDEX_op_sar_i64:
2090
    case INDEX_op_ext_i32_i64:
2091
    case INDEX_op_extu_i32_i64:
2092
        return TCG_TARGET_REG_BITS == 64;
2093

2094
    case INDEX_op_negsetcond_i64:
2095
        return TCG_TARGET_HAS_negsetcond_i64;
2096
    case INDEX_op_div_i64:
2097
    case INDEX_op_divu_i64:
2098
        return TCG_TARGET_HAS_div_i64;
2099
    case INDEX_op_rem_i64:
2100
    case INDEX_op_remu_i64:
2101
        return TCG_TARGET_HAS_rem_i64;
2102
    case INDEX_op_div2_i64:
2103
    case INDEX_op_divu2_i64:
2104
        return TCG_TARGET_HAS_div2_i64;
2105
    case INDEX_op_rotl_i64:
2106
    case INDEX_op_rotr_i64:
2107
        return TCG_TARGET_HAS_rot_i64;
2108
    case INDEX_op_deposit_i64:
2109
        return TCG_TARGET_HAS_deposit_i64;
2110
    case INDEX_op_extract_i64:
2111
        return TCG_TARGET_HAS_extract_i64;
2112
    case INDEX_op_sextract_i64:
2113
        return TCG_TARGET_HAS_sextract_i64;
2114
    case INDEX_op_extract2_i64:
2115
        return TCG_TARGET_HAS_extract2_i64;
2116
    case INDEX_op_extrl_i64_i32:
2117
    case INDEX_op_extrh_i64_i32:
2118
        return TCG_TARGET_HAS_extr_i64_i32;
2119
    case INDEX_op_ext8s_i64:
2120
        return TCG_TARGET_HAS_ext8s_i64;
2121
    case INDEX_op_ext16s_i64:
2122
        return TCG_TARGET_HAS_ext16s_i64;
2123
    case INDEX_op_ext32s_i64:
2124
        return TCG_TARGET_HAS_ext32s_i64;
2125
    case INDEX_op_ext8u_i64:
2126
        return TCG_TARGET_HAS_ext8u_i64;
2127
    case INDEX_op_ext16u_i64:
2128
        return TCG_TARGET_HAS_ext16u_i64;
2129
    case INDEX_op_ext32u_i64:
2130
        return TCG_TARGET_HAS_ext32u_i64;
2131
    case INDEX_op_bswap16_i64:
2132
        return TCG_TARGET_HAS_bswap16_i64;
2133
    case INDEX_op_bswap32_i64:
2134
        return TCG_TARGET_HAS_bswap32_i64;
2135
    case INDEX_op_bswap64_i64:
2136
        return TCG_TARGET_HAS_bswap64_i64;
2137
    case INDEX_op_not_i64:
2138
        return TCG_TARGET_HAS_not_i64;
2139
    case INDEX_op_andc_i64:
2140
        return TCG_TARGET_HAS_andc_i64;
2141
    case INDEX_op_orc_i64:
2142
        return TCG_TARGET_HAS_orc_i64;
2143
    case INDEX_op_eqv_i64:
2144
        return TCG_TARGET_HAS_eqv_i64;
2145
    case INDEX_op_nand_i64:
2146
        return TCG_TARGET_HAS_nand_i64;
2147
    case INDEX_op_nor_i64:
2148
        return TCG_TARGET_HAS_nor_i64;
2149
    case INDEX_op_clz_i64:
2150
        return TCG_TARGET_HAS_clz_i64;
2151
    case INDEX_op_ctz_i64:
2152
        return TCG_TARGET_HAS_ctz_i64;
2153
    case INDEX_op_ctpop_i64:
2154
        return TCG_TARGET_HAS_ctpop_i64;
2155
    case INDEX_op_add2_i64:
2156
        return TCG_TARGET_HAS_add2_i64;
2157
    case INDEX_op_sub2_i64:
2158
        return TCG_TARGET_HAS_sub2_i64;
2159
    case INDEX_op_mulu2_i64:
2160
        return TCG_TARGET_HAS_mulu2_i64;
2161
    case INDEX_op_muls2_i64:
2162
        return TCG_TARGET_HAS_muls2_i64;
2163
    case INDEX_op_muluh_i64:
2164
        return TCG_TARGET_HAS_muluh_i64;
2165
    case INDEX_op_mulsh_i64:
2166
        return TCG_TARGET_HAS_mulsh_i64;
2167

2168
    case INDEX_op_mov_vec:
2169
    case INDEX_op_dup_vec:
2170
    case INDEX_op_dupm_vec:
2171
    case INDEX_op_ld_vec:
2172
    case INDEX_op_st_vec:
2173
    case INDEX_op_add_vec:
2174
    case INDEX_op_sub_vec:
2175
    case INDEX_op_and_vec:
2176
    case INDEX_op_or_vec:
2177
    case INDEX_op_xor_vec:
2178
    case INDEX_op_cmp_vec:
2179
        return have_vec;
2180
    case INDEX_op_dup2_vec:
2181
        return have_vec && TCG_TARGET_REG_BITS == 32;
2182
    case INDEX_op_not_vec:
2183
        return have_vec && TCG_TARGET_HAS_not_vec;
2184
    case INDEX_op_neg_vec:
2185
        return have_vec && TCG_TARGET_HAS_neg_vec;
2186
    case INDEX_op_abs_vec:
2187
        return have_vec && TCG_TARGET_HAS_abs_vec;
2188
    case INDEX_op_andc_vec:
2189
        return have_vec && TCG_TARGET_HAS_andc_vec;
2190
    case INDEX_op_orc_vec:
2191
        return have_vec && TCG_TARGET_HAS_orc_vec;
2192
    case INDEX_op_nand_vec:
2193
        return have_vec && TCG_TARGET_HAS_nand_vec;
2194
    case INDEX_op_nor_vec:
2195
        return have_vec && TCG_TARGET_HAS_nor_vec;
2196
    case INDEX_op_eqv_vec:
2197
        return have_vec && TCG_TARGET_HAS_eqv_vec;
2198
    case INDEX_op_mul_vec:
2199
        return have_vec && TCG_TARGET_HAS_mul_vec;
2200
    case INDEX_op_shli_vec:
2201
    case INDEX_op_shri_vec:
2202
    case INDEX_op_sari_vec:
2203
        return have_vec && TCG_TARGET_HAS_shi_vec;
2204
    case INDEX_op_shls_vec:
2205
    case INDEX_op_shrs_vec:
2206
    case INDEX_op_sars_vec:
2207
        return have_vec && TCG_TARGET_HAS_shs_vec;
2208
    case INDEX_op_shlv_vec:
2209
    case INDEX_op_shrv_vec:
2210
    case INDEX_op_sarv_vec:
2211
        return have_vec && TCG_TARGET_HAS_shv_vec;
2212
    case INDEX_op_rotli_vec:
2213
        return have_vec && TCG_TARGET_HAS_roti_vec;
2214
    case INDEX_op_rotls_vec:
2215
        return have_vec && TCG_TARGET_HAS_rots_vec;
2216
    case INDEX_op_rotlv_vec:
2217
    case INDEX_op_rotrv_vec:
2218
        return have_vec && TCG_TARGET_HAS_rotv_vec;
2219
    case INDEX_op_ssadd_vec:
2220
    case INDEX_op_usadd_vec:
2221
    case INDEX_op_sssub_vec:
2222
    case INDEX_op_ussub_vec:
2223
        return have_vec && TCG_TARGET_HAS_sat_vec;
2224
    case INDEX_op_smin_vec:
2225
    case INDEX_op_umin_vec:
2226
    case INDEX_op_smax_vec:
2227
    case INDEX_op_umax_vec:
2228
        return have_vec && TCG_TARGET_HAS_minmax_vec;
2229
    case INDEX_op_bitsel_vec:
2230
        return have_vec && TCG_TARGET_HAS_bitsel_vec;
2231
    case INDEX_op_cmpsel_vec:
2232
        return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2233

2234
    default:
2235
        tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2236
        return true;
2237
    }
2238
}
2239

2240
static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2241

2242
static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2243
                          TCGTemp *ret, TCGTemp **args)
2244
{
2245
    TCGv_i64 extend_free[MAX_CALL_IARGS];
2246
    int n_extend = 0;
2247
    TCGOp *op;
2248
    int i, n, pi = 0, total_args;
2249

2250
    if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2251
        init_call_layout(info);
2252
        g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2253
    }
2254

2255
    total_args = info->nr_out + info->nr_in + 2;
2256
    op = tcg_op_alloc(INDEX_op_call, total_args);
2257

2258
#ifdef CONFIG_PLUGIN
2259
    /* Flag helpers that may affect guest state */
2260
    if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2261
        tcg_ctx->plugin_insn->calls_helpers = true;
2262
    }
2263
#endif
2264

2265
    TCGOP_CALLO(op) = n = info->nr_out;
2266
    switch (n) {
2267
    case 0:
2268
        tcg_debug_assert(ret == NULL);
2269
        break;
2270
    case 1:
2271
        tcg_debug_assert(ret != NULL);
2272
        op->args[pi++] = temp_arg(ret);
2273
        break;
2274
    case 2:
2275
    case 4:
2276
        tcg_debug_assert(ret != NULL);
2277
        tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2278
        tcg_debug_assert(ret->temp_subindex == 0);
2279
        for (i = 0; i < n; ++i) {
2280
            op->args[pi++] = temp_arg(ret + i);
2281
        }
2282
        break;
2283
    default:
2284
        g_assert_not_reached();
2285
    }
2286

2287
    TCGOP_CALLI(op) = n = info->nr_in;
2288
    for (i = 0; i < n; i++) {
2289
        const TCGCallArgumentLoc *loc = &info->in[i];
2290
        TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2291

2292
        switch (loc->kind) {
2293
        case TCG_CALL_ARG_NORMAL:
2294
        case TCG_CALL_ARG_BY_REF:
2295
        case TCG_CALL_ARG_BY_REF_N:
2296
            op->args[pi++] = temp_arg(ts);
2297
            break;
2298

2299
        case TCG_CALL_ARG_EXTEND_U:
2300
        case TCG_CALL_ARG_EXTEND_S:
2301
            {
2302
                TCGv_i64 temp = tcg_temp_ebb_new_i64();
2303
                TCGv_i32 orig = temp_tcgv_i32(ts);
2304

2305
                if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2306
                    tcg_gen_ext_i32_i64(temp, orig);
2307
                } else {
2308
                    tcg_gen_extu_i32_i64(temp, orig);
2309
                }
2310
                op->args[pi++] = tcgv_i64_arg(temp);
2311
                extend_free[n_extend++] = temp;
2312
            }
2313
            break;
2314

2315
        default:
2316
            g_assert_not_reached();
2317
        }
2318
    }
2319
    op->args[pi++] = (uintptr_t)func;
2320
    op->args[pi++] = (uintptr_t)info;
2321
    tcg_debug_assert(pi == total_args);
2322

2323
    if (tcg_ctx->emit_before_op) {
2324
        QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2325
    } else {
2326
        QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2327
    }
2328

2329
    tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2330
    for (i = 0; i < n_extend; ++i) {
2331
        tcg_temp_free_i64(extend_free[i]);
2332
    }
2333
}
2334

2335
void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2336
{
2337
    tcg_gen_callN(func, info, ret, NULL);
2338
}
2339

2340
void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2341
{
2342
    tcg_gen_callN(func, info, ret, &t1);
2343
}
2344

2345
void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2346
                   TCGTemp *t1, TCGTemp *t2)
2347
{
2348
    TCGTemp *args[2] = { t1, t2 };
2349
    tcg_gen_callN(func, info, ret, args);
2350
}
2351

2352
void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2353
                   TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2354
{
2355
    TCGTemp *args[3] = { t1, t2, t3 };
2356
    tcg_gen_callN(func, info, ret, args);
2357
}
2358

2359
void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2360
                   TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2361
{
2362
    TCGTemp *args[4] = { t1, t2, t3, t4 };
2363
    tcg_gen_callN(func, info, ret, args);
2364
}
2365

2366
void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2367
                   TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2368
{
2369
    TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2370
    tcg_gen_callN(func, info, ret, args);
2371
}
2372

2373
void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2374
                   TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2375
                   TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2376
{
2377
    TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2378
    tcg_gen_callN(func, info, ret, args);
2379
}
2380

2381
void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2382
                   TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2383
                   TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2384
{
2385
    TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2386
    tcg_gen_callN(func, info, ret, args);
2387
}
2388

2389
static void tcg_reg_alloc_start(TCGContext *s)
2390
{
2391
    int i, n;
2392

2393
    for (i = 0, n = s->nb_temps; i < n; i++) {
2394
        TCGTemp *ts = &s->temps[i];
2395
        TCGTempVal val = TEMP_VAL_MEM;
2396

2397
        switch (ts->kind) {
2398
        case TEMP_CONST:
2399
            val = TEMP_VAL_CONST;
2400
            break;
2401
        case TEMP_FIXED:
2402
            val = TEMP_VAL_REG;
2403
            break;
2404
        case TEMP_GLOBAL:
2405
            break;
2406
        case TEMP_EBB:
2407
            val = TEMP_VAL_DEAD;
2408
            /* fall through */
2409
        case TEMP_TB:
2410
            ts->mem_allocated = 0;
2411
            break;
2412
        default:
2413
            g_assert_not_reached();
2414
        }
2415
        ts->val_type = val;
2416
    }
2417

2418
    memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2419
}
2420

2421
static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2422
                                 TCGTemp *ts)
2423
{
2424
    int idx = temp_idx(ts);
2425

2426
    switch (ts->kind) {
2427
    case TEMP_FIXED:
2428
    case TEMP_GLOBAL:
2429
        pstrcpy(buf, buf_size, ts->name);
2430
        break;
2431
    case TEMP_TB:
2432
        snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2433
        break;
2434
    case TEMP_EBB:
2435
        snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2436
        break;
2437
    case TEMP_CONST:
2438
        switch (ts->type) {
2439
        case TCG_TYPE_I32:
2440
            snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2441
            break;
2442
#if TCG_TARGET_REG_BITS > 32
2443
        case TCG_TYPE_I64:
2444
            snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2445
            break;
2446
#endif
2447
        case TCG_TYPE_V64:
2448
        case TCG_TYPE_V128:
2449
        case TCG_TYPE_V256:
2450
            snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2451
                     64 << (ts->type - TCG_TYPE_V64), ts->val);
2452
            break;
2453
        default:
2454
            g_assert_not_reached();
2455
        }
2456
        break;
2457
    }
2458
    return buf;
2459
}
2460

2461
static char *tcg_get_arg_str(TCGContext *s, char *buf,
2462
                             int buf_size, TCGArg arg)
2463
{
2464
    return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2465
}
2466

2467
static const char * const cond_name[] =
2468
{
2469
    [TCG_COND_NEVER] = "never",
2470
    [TCG_COND_ALWAYS] = "always",
2471
    [TCG_COND_EQ] = "eq",
2472
    [TCG_COND_NE] = "ne",
2473
    [TCG_COND_LT] = "lt",
2474
    [TCG_COND_GE] = "ge",
2475
    [TCG_COND_LE] = "le",
2476
    [TCG_COND_GT] = "gt",
2477
    [TCG_COND_LTU] = "ltu",
2478
    [TCG_COND_GEU] = "geu",
2479
    [TCG_COND_LEU] = "leu",
2480
    [TCG_COND_GTU] = "gtu",
2481
    [TCG_COND_TSTEQ] = "tsteq",
2482
    [TCG_COND_TSTNE] = "tstne",
2483
};
2484

2485
static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2486
{
2487
    [MO_UB]   = "ub",
2488
    [MO_SB]   = "sb",
2489
    [MO_LEUW] = "leuw",
2490
    [MO_LESW] = "lesw",
2491
    [MO_LEUL] = "leul",
2492
    [MO_LESL] = "lesl",
2493
    [MO_LEUQ] = "leq",
2494
    [MO_BEUW] = "beuw",
2495
    [MO_BESW] = "besw",
2496
    [MO_BEUL] = "beul",
2497
    [MO_BESL] = "besl",
2498
    [MO_BEUQ] = "beq",
2499
    [MO_128 + MO_BE] = "beo",
2500
    [MO_128 + MO_LE] = "leo",
2501
};
2502

2503
static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2504
    [MO_UNALN >> MO_ASHIFT]    = "un+",
2505
    [MO_ALIGN >> MO_ASHIFT]    = "al+",
2506
    [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2507
    [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2508
    [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2509
    [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2510
    [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2511
    [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2512
};
2513

2514
static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2515
    [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2516
    [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2517
    [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2518
    [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2519
    [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2520
    [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2521
};
2522

2523
static const char bswap_flag_name[][6] = {
2524
    [TCG_BSWAP_IZ] = "iz",
2525
    [TCG_BSWAP_OZ] = "oz",
2526
    [TCG_BSWAP_OS] = "os",
2527
    [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2528
    [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2529
};
2530

2531
#ifdef CONFIG_PLUGIN
2532
static const char * const plugin_from_name[] = {
2533
    "from-tb",
2534
    "from-insn",
2535
    "after-insn",
2536
    "after-tb",
2537
};
2538
#endif
2539

2540
static inline bool tcg_regset_single(TCGRegSet d)
2541
{
2542
    return (d & (d - 1)) == 0;
2543
}
2544

2545
static inline TCGReg tcg_regset_first(TCGRegSet d)
2546
{
2547
    if (TCG_TARGET_NB_REGS <= 32) {
2548
        return ctz32(d);
2549
    } else {
2550
        return ctz64(d);
2551
    }
2552
}
2553

2554
/* Return only the number of characters output -- no error return. */
2555
#define ne_fprintf(...) \
2556
    ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2557

2558
void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2559
{
2560
    char buf[128];
2561
    TCGOp *op;
2562

2563
    QTAILQ_FOREACH(op, &s->ops, link) {
2564
        int i, k, nb_oargs, nb_iargs, nb_cargs;
2565
        const TCGOpDef *def;
2566
        TCGOpcode c;
2567
        int col = 0;
2568

2569
        c = op->opc;
2570
        def = &tcg_op_defs[c];
2571

2572
        if (c == INDEX_op_insn_start) {
2573
            nb_oargs = 0;
2574
            col += ne_fprintf(f, "\n ----");
2575

2576
            for (i = 0, k = s->insn_start_words; i < k; ++i) {
2577
                col += ne_fprintf(f, " %016" PRIx64,
2578
                                  tcg_get_insn_start_param(op, i));
2579
            }
2580
        } else if (c == INDEX_op_call) {
2581
            const TCGHelperInfo *info = tcg_call_info(op);
2582
            void *func = tcg_call_func(op);
2583

2584
            /* variable number of arguments */
2585
            nb_oargs = TCGOP_CALLO(op);
2586
            nb_iargs = TCGOP_CALLI(op);
2587
            nb_cargs = def->nb_cargs;
2588

2589
            col += ne_fprintf(f, " %s ", def->name);
2590

2591
            /*
2592
             * Print the function name from TCGHelperInfo, if available.
2593
             * Note that plugins have a template function for the info,
2594
             * but the actual function pointer comes from the plugin.
2595
             */
2596
            if (func == info->func) {
2597
                col += ne_fprintf(f, "%s", info->name);
2598
            } else {
2599
                col += ne_fprintf(f, "plugin(%p)", func);
2600
            }
2601

2602
            col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2603
            for (i = 0; i < nb_oargs; i++) {
2604
                col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2605
                                                            op->args[i]));
2606
            }
2607
            for (i = 0; i < nb_iargs; i++) {
2608
                TCGArg arg = op->args[nb_oargs + i];
2609
                const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2610
                col += ne_fprintf(f, ",%s", t);
2611
            }
2612
        } else {
2613
            col += ne_fprintf(f, " %s ", def->name);
2614

2615
            nb_oargs = def->nb_oargs;
2616
            nb_iargs = def->nb_iargs;
2617
            nb_cargs = def->nb_cargs;
2618

2619
            if (def->flags & TCG_OPF_VECTOR) {
2620
                col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2621
                                  8 << TCGOP_VECE(op));
2622
            }
2623

2624
            k = 0;
2625
            for (i = 0; i < nb_oargs; i++) {
2626
                const char *sep =  k ? "," : "";
2627
                col += ne_fprintf(f, "%s%s", sep,
2628
                                  tcg_get_arg_str(s, buf, sizeof(buf),
2629
                                                  op->args[k++]));
2630
            }
2631
            for (i = 0; i < nb_iargs; i++) {
2632
                const char *sep =  k ? "," : "";
2633
                col += ne_fprintf(f, "%s%s", sep,
2634
                                  tcg_get_arg_str(s, buf, sizeof(buf),
2635
                                                  op->args[k++]));
2636
            }
2637
            switch (c) {
2638
            case INDEX_op_brcond_i32:
2639
            case INDEX_op_setcond_i32:
2640
            case INDEX_op_negsetcond_i32:
2641
            case INDEX_op_movcond_i32:
2642
            case INDEX_op_brcond2_i32:
2643
            case INDEX_op_setcond2_i32:
2644
            case INDEX_op_brcond_i64:
2645
            case INDEX_op_setcond_i64:
2646
            case INDEX_op_negsetcond_i64:
2647
            case INDEX_op_movcond_i64:
2648
            case INDEX_op_cmp_vec:
2649
            case INDEX_op_cmpsel_vec:
2650
                if (op->args[k] < ARRAY_SIZE(cond_name)
2651
                    && cond_name[op->args[k]]) {
2652
                    col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2653
                } else {
2654
                    col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2655
                }
2656
                i = 1;
2657
                break;
2658
            case INDEX_op_qemu_ld_a32_i32:
2659
            case INDEX_op_qemu_ld_a64_i32:
2660
            case INDEX_op_qemu_st_a32_i32:
2661
            case INDEX_op_qemu_st_a64_i32:
2662
            case INDEX_op_qemu_st8_a32_i32:
2663
            case INDEX_op_qemu_st8_a64_i32:
2664
            case INDEX_op_qemu_ld_a32_i64:
2665
            case INDEX_op_qemu_ld_a64_i64:
2666
            case INDEX_op_qemu_st_a32_i64:
2667
            case INDEX_op_qemu_st_a64_i64:
2668
            case INDEX_op_qemu_ld_a32_i128:
2669
            case INDEX_op_qemu_ld_a64_i128:
2670
            case INDEX_op_qemu_st_a32_i128:
2671
            case INDEX_op_qemu_st_a64_i128:
2672
                {
2673
                    const char *s_al, *s_op, *s_at;
2674
                    MemOpIdx oi = op->args[k++];
2675
                    MemOp mop = get_memop(oi);
2676
                    unsigned ix = get_mmuidx(oi);
2677

2678
                    s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2679
                    s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2680
                    s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2681
                    mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2682

2683
                    /* If all fields are accounted for, print symbolically. */
2684
                    if (!mop && s_al && s_op && s_at) {
2685
                        col += ne_fprintf(f, ",%s%s%s,%u",
2686
                                          s_at, s_al, s_op, ix);
2687
                    } else {
2688
                        mop = get_memop(oi);
2689
                        col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2690
                    }
2691
                    i = 1;
2692
                }
2693
                break;
2694
            case INDEX_op_bswap16_i32:
2695
            case INDEX_op_bswap16_i64:
2696
            case INDEX_op_bswap32_i32:
2697
            case INDEX_op_bswap32_i64:
2698
            case INDEX_op_bswap64_i64:
2699
                {
2700
                    TCGArg flags = op->args[k];
2701
                    const char *name = NULL;
2702

2703
                    if (flags < ARRAY_SIZE(bswap_flag_name)) {
2704
                        name = bswap_flag_name[flags];
2705
                    }
2706
                    if (name) {
2707
                        col += ne_fprintf(f, ",%s", name);
2708
                    } else {
2709
                        col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2710
                    }
2711
                    i = k = 1;
2712
                }
2713
                break;
2714
#ifdef CONFIG_PLUGIN
2715
            case INDEX_op_plugin_cb:
2716
                {
2717
                    TCGArg from = op->args[k++];
2718
                    const char *name = NULL;
2719

2720
                    if (from < ARRAY_SIZE(plugin_from_name)) {
2721
                        name = plugin_from_name[from];
2722
                    }
2723
                    if (name) {
2724
                        col += ne_fprintf(f, "%s", name);
2725
                    } else {
2726
                        col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2727
                    }
2728
                    i = 1;
2729
                }
2730
                break;
2731
#endif
2732
            default:
2733
                i = 0;
2734
                break;
2735
            }
2736
            switch (c) {
2737
            case INDEX_op_set_label:
2738
            case INDEX_op_br:
2739
            case INDEX_op_brcond_i32:
2740
            case INDEX_op_brcond_i64:
2741
            case INDEX_op_brcond2_i32:
2742
                col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2743
                                  arg_label(op->args[k])->id);
2744
                i++, k++;
2745
                break;
2746
            case INDEX_op_mb:
2747
                {
2748
                    TCGBar membar = op->args[k];
2749
                    const char *b_op, *m_op;
2750

2751
                    switch (membar & TCG_BAR_SC) {
2752
                    case 0:
2753
                        b_op = "none";
2754
                        break;
2755
                    case TCG_BAR_LDAQ:
2756
                        b_op = "acq";
2757
                        break;
2758
                    case TCG_BAR_STRL:
2759
                        b_op = "rel";
2760
                        break;
2761
                    case TCG_BAR_SC:
2762
                        b_op = "seq";
2763
                        break;
2764
                    default:
2765
                        g_assert_not_reached();
2766
                    }
2767

2768
                    switch (membar & TCG_MO_ALL) {
2769
                    case 0:
2770
                        m_op = "none";
2771
                        break;
2772
                    case TCG_MO_LD_LD:
2773
                        m_op = "rr";
2774
                        break;
2775
                    case TCG_MO_LD_ST:
2776
                        m_op = "rw";
2777
                        break;
2778
                    case TCG_MO_ST_LD:
2779
                        m_op = "wr";
2780
                        break;
2781
                    case TCG_MO_ST_ST:
2782
                        m_op = "ww";
2783
                        break;
2784
                    case TCG_MO_LD_LD | TCG_MO_LD_ST:
2785
                        m_op = "rr+rw";
2786
                        break;
2787
                    case TCG_MO_LD_LD | TCG_MO_ST_LD:
2788
                        m_op = "rr+wr";
2789
                        break;
2790
                    case TCG_MO_LD_LD | TCG_MO_ST_ST:
2791
                        m_op = "rr+ww";
2792
                        break;
2793
                    case TCG_MO_LD_ST | TCG_MO_ST_LD:
2794
                        m_op = "rw+wr";
2795
                        break;
2796
                    case TCG_MO_LD_ST | TCG_MO_ST_ST:
2797
                        m_op = "rw+ww";
2798
                        break;
2799
                    case TCG_MO_ST_LD | TCG_MO_ST_ST:
2800
                        m_op = "wr+ww";
2801
                        break;
2802
                    case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2803
                        m_op = "rr+rw+wr";
2804
                        break;
2805
                    case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2806
                        m_op = "rr+rw+ww";
2807
                        break;
2808
                    case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2809
                        m_op = "rr+wr+ww";
2810
                        break;
2811
                    case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2812
                        m_op = "rw+wr+ww";
2813
                        break;
2814
                    case TCG_MO_ALL:
2815
                        m_op = "all";
2816
                        break;
2817
                    default:
2818
                        g_assert_not_reached();
2819
                    }
2820

2821
                    col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2822
                    i++, k++;
2823
                }
2824
                break;
2825
            default:
2826
                break;
2827
            }
2828
            for (; i < nb_cargs; i++, k++) {
2829
                col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2830
                                  op->args[k]);
2831
            }
2832
        }
2833

2834
        if (have_prefs || op->life) {
2835
            for (; col < 40; ++col) {
2836
                putc(' ', f);
2837
            }
2838
        }
2839

2840
        if (op->life) {
2841
            unsigned life = op->life;
2842

2843
            if (life & (SYNC_ARG * 3)) {
2844
                ne_fprintf(f, "  sync:");
2845
                for (i = 0; i < 2; ++i) {
2846
                    if (life & (SYNC_ARG << i)) {
2847
                        ne_fprintf(f, " %d", i);
2848
                    }
2849
                }
2850
            }
2851
            life /= DEAD_ARG;
2852
            if (life) {
2853
                ne_fprintf(f, "  dead:");
2854
                for (i = 0; life; ++i, life >>= 1) {
2855
                    if (life & 1) {
2856
                        ne_fprintf(f, " %d", i);
2857
                    }
2858
                }
2859
            }
2860
        }
2861

2862
        if (have_prefs) {
2863
            for (i = 0; i < nb_oargs; ++i) {
2864
                TCGRegSet set = output_pref(op, i);
2865

2866
                if (i == 0) {
2867
                    ne_fprintf(f, "  pref=");
2868
                } else {
2869
                    ne_fprintf(f, ",");
2870
                }
2871
                if (set == 0) {
2872
                    ne_fprintf(f, "none");
2873
                } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2874
                    ne_fprintf(f, "all");
2875
#ifdef CONFIG_DEBUG_TCG
2876
                } else if (tcg_regset_single(set)) {
2877
                    TCGReg reg = tcg_regset_first(set);
2878
                    ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2879
#endif
2880
                } else if (TCG_TARGET_NB_REGS <= 32) {
2881
                    ne_fprintf(f, "0x%x", (uint32_t)set);
2882
                } else {
2883
                    ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2884
                }
2885
            }
2886
        }
2887

2888
        putc('\n', f);
2889
    }
2890
}
2891

2892
/* we give more priority to constraints with less registers */
2893
static int get_constraint_priority(const TCGOpDef *def, int k)
2894
{
2895
    const TCGArgConstraint *arg_ct = &def->args_ct[k];
2896
    int n = ctpop64(arg_ct->regs);
2897

2898
    /*
2899
     * Sort constraints of a single register first, which includes output
2900
     * aliases (which must exactly match the input already allocated).
2901
     */
2902
    if (n == 1 || arg_ct->oalias) {
2903
        return INT_MAX;
2904
    }
2905

2906
    /*
2907
     * Sort register pairs next, first then second immediately after.
2908
     * Arbitrarily sort multiple pairs by the index of the first reg;
2909
     * there shouldn't be many pairs.
2910
     */
2911
    switch (arg_ct->pair) {
2912
    case 1:
2913
    case 3:
2914
        return (k + 1) * 2;
2915
    case 2:
2916
        return (arg_ct->pair_index + 1) * 2 - 1;
2917
    }
2918

2919
    /* Finally, sort by decreasing register count. */
2920
    assert(n > 1);
2921
    return -n;
2922
}
2923

2924
/* sort from highest priority to lowest */
2925
static void sort_constraints(TCGOpDef *def, int start, int n)
2926
{
2927
    int i, j;
2928
    TCGArgConstraint *a = def->args_ct;
2929

2930
    for (i = 0; i < n; i++) {
2931
        a[start + i].sort_index = start + i;
2932
    }
2933
    if (n <= 1) {
2934
        return;
2935
    }
2936
    for (i = 0; i < n - 1; i++) {
2937
        for (j = i + 1; j < n; j++) {
2938
            int p1 = get_constraint_priority(def, a[start + i].sort_index);
2939
            int p2 = get_constraint_priority(def, a[start + j].sort_index);
2940
            if (p1 < p2) {
2941
                int tmp = a[start + i].sort_index;
2942
                a[start + i].sort_index = a[start + j].sort_index;
2943
                a[start + j].sort_index = tmp;
2944
            }
2945
        }
2946
    }
2947
}
2948

2949
static void process_op_defs(TCGContext *s)
2950
{
2951
    TCGOpcode op;
2952

2953
    for (op = 0; op < NB_OPS; op++) {
2954
        TCGOpDef *def = &tcg_op_defs[op];
2955
        const TCGTargetOpDef *tdefs;
2956
        bool saw_alias_pair = false;
2957
        int i, o, i2, o2, nb_args;
2958

2959
        if (def->flags & TCG_OPF_NOT_PRESENT) {
2960
            continue;
2961
        }
2962

2963
        nb_args = def->nb_iargs + def->nb_oargs;
2964
        if (nb_args == 0) {
2965
            continue;
2966
        }
2967

2968
        /*
2969
         * Macro magic should make it impossible, but double-check that
2970
         * the array index is in range.  Since the signness of an enum
2971
         * is implementation defined, force the result to unsigned.
2972
         */
2973
        unsigned con_set = tcg_target_op_def(op);
2974
        tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2975
        tdefs = &constraint_sets[con_set];
2976

2977
        for (i = 0; i < nb_args; i++) {
2978
            const char *ct_str = tdefs->args_ct_str[i];
2979
            bool input_p = i >= def->nb_oargs;
2980

2981
            /* Incomplete TCGTargetOpDef entry. */
2982
            tcg_debug_assert(ct_str != NULL);
2983

2984
            switch (*ct_str) {
2985
            case '0' ... '9':
2986
                o = *ct_str - '0';
2987
                tcg_debug_assert(input_p);
2988
                tcg_debug_assert(o < def->nb_oargs);
2989
                tcg_debug_assert(def->args_ct[o].regs != 0);
2990
                tcg_debug_assert(!def->args_ct[o].oalias);
2991
                def->args_ct[i] = def->args_ct[o];
2992
                /* The output sets oalias.  */
2993
                def->args_ct[o].oalias = 1;
2994
                def->args_ct[o].alias_index = i;
2995
                /* The input sets ialias. */
2996
                def->args_ct[i].ialias = 1;
2997
                def->args_ct[i].alias_index = o;
2998
                if (def->args_ct[i].pair) {
2999
                    saw_alias_pair = true;
3000
                }
3001
                tcg_debug_assert(ct_str[1] == '\0');
3002
                continue;
3003

3004
            case '&':
3005
                tcg_debug_assert(!input_p);
3006
                def->args_ct[i].newreg = true;
3007
                ct_str++;
3008
                break;
3009

3010
            case 'p': /* plus */
3011
                /* Allocate to the register after the previous. */
3012
                tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
3013
                o = i - 1;
3014
                tcg_debug_assert(!def->args_ct[o].pair);
3015
                tcg_debug_assert(!def->args_ct[o].ct);
3016
                def->args_ct[i] = (TCGArgConstraint){
3017
                    .pair = 2,
3018
                    .pair_index = o,
3019
                    .regs = def->args_ct[o].regs << 1,
3020
                    .newreg = def->args_ct[o].newreg,
3021
                };
3022
                def->args_ct[o].pair = 1;
3023
                def->args_ct[o].pair_index = i;
3024
                tcg_debug_assert(ct_str[1] == '\0');
3025
                continue;
3026

3027
            case 'm': /* minus */
3028
                /* Allocate to the register before the previous. */
3029
                tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
3030
                o = i - 1;
3031
                tcg_debug_assert(!def->args_ct[o].pair);
3032
                tcg_debug_assert(!def->args_ct[o].ct);
3033
                def->args_ct[i] = (TCGArgConstraint){
3034
                    .pair = 1,
3035
                    .pair_index = o,
3036
                    .regs = def->args_ct[o].regs >> 1,
3037
                    .newreg = def->args_ct[o].newreg,
3038
                };
3039
                def->args_ct[o].pair = 2;
3040
                def->args_ct[o].pair_index = i;
3041
                tcg_debug_assert(ct_str[1] == '\0');
3042
                continue;
3043
            }
3044

3045
            do {
3046
                switch (*ct_str) {
3047
                case 'i':
3048
                    def->args_ct[i].ct |= TCG_CT_CONST;
3049
                    break;
3050

3051
                /* Include all of the target-specific constraints. */
3052

3053
#undef CONST
3054
#define CONST(CASE, MASK) \
3055
    case CASE: def->args_ct[i].ct |= MASK; break;
3056
#define REGS(CASE, MASK) \
3057
    case CASE: def->args_ct[i].regs |= MASK; break;
3058

3059
#include "tcg-target-con-str.h"
3060

3061
#undef REGS
3062
#undef CONST
3063
                default:
3064
                case '0' ... '9':
3065
                case '&':
3066
                case 'p':
3067
                case 'm':
3068
                    /* Typo in TCGTargetOpDef constraint. */
3069
                    g_assert_not_reached();
3070
                }
3071
            } while (*++ct_str != '\0');
3072
        }
3073

3074
        /* TCGTargetOpDef entry with too much information? */
3075
        tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
3076

3077
        /*
3078
         * Fix up output pairs that are aliased with inputs.
3079
         * When we created the alias, we copied pair from the output.
3080
         * There are three cases:
3081
         *    (1a) Pairs of inputs alias pairs of outputs.
3082
         *    (1b) One input aliases the first of a pair of outputs.
3083
         *    (2)  One input aliases the second of a pair of outputs.
3084
         *
3085
         * Case 1a is handled by making sure that the pair_index'es are
3086
         * properly updated so that they appear the same as a pair of inputs.
3087
         *
3088
         * Case 1b is handled by setting the pair_index of the input to
3089
         * itself, simply so it doesn't point to an unrelated argument.
3090
         * Since we don't encounter the "second" during the input allocation
3091
         * phase, nothing happens with the second half of the input pair.
3092
         *
3093
         * Case 2 is handled by setting the second input to pair=3, the
3094
         * first output to pair=3, and the pair_index'es to match.
3095
         */
3096
        if (saw_alias_pair) {
3097
            for (i = def->nb_oargs; i < nb_args; i++) {
3098
                /*
3099
                 * Since [0-9pm] must be alone in the constraint string,
3100
                 * the only way they can both be set is if the pair comes
3101
                 * from the output alias.
3102
                 */
3103
                if (!def->args_ct[i].ialias) {
3104
                    continue;
3105
                }
3106
                switch (def->args_ct[i].pair) {
3107
                case 0:
3108
                    break;
3109
                case 1:
3110
                    o = def->args_ct[i].alias_index;
3111
                    o2 = def->args_ct[o].pair_index;
3112
                    tcg_debug_assert(def->args_ct[o].pair == 1);
3113
                    tcg_debug_assert(def->args_ct[o2].pair == 2);
3114
                    if (def->args_ct[o2].oalias) {
3115
                        /* Case 1a */
3116
                        i2 = def->args_ct[o2].alias_index;
3117
                        tcg_debug_assert(def->args_ct[i2].pair == 2);
3118
                        def->args_ct[i2].pair_index = i;
3119
                        def->args_ct[i].pair_index = i2;
3120
                    } else {
3121
                        /* Case 1b */
3122
                        def->args_ct[i].pair_index = i;
3123
                    }
3124
                    break;
3125
                case 2:
3126
                    o = def->args_ct[i].alias_index;
3127
                    o2 = def->args_ct[o].pair_index;
3128
                    tcg_debug_assert(def->args_ct[o].pair == 2);
3129
                    tcg_debug_assert(def->args_ct[o2].pair == 1);
3130
                    if (def->args_ct[o2].oalias) {
3131
                        /* Case 1a */
3132
                        i2 = def->args_ct[o2].alias_index;
3133
                        tcg_debug_assert(def->args_ct[i2].pair == 1);
3134
                        def->args_ct[i2].pair_index = i;
3135
                        def->args_ct[i].pair_index = i2;
3136
                    } else {
3137
                        /* Case 2 */
3138
                        def->args_ct[i].pair = 3;
3139
                        def->args_ct[o2].pair = 3;
3140
                        def->args_ct[i].pair_index = o2;
3141
                        def->args_ct[o2].pair_index = i;
3142
                    }
3143
                    break;
3144
                default:
3145
                    g_assert_not_reached();
3146
                }
3147
            }
3148
        }
3149

3150
        /* sort the constraints (XXX: this is just an heuristic) */
3151
        sort_constraints(def, 0, def->nb_oargs);
3152
        sort_constraints(def, def->nb_oargs, def->nb_iargs);
3153
    }
3154
}
3155

3156
static void remove_label_use(TCGOp *op, int idx)
3157
{
3158
    TCGLabel *label = arg_label(op->args[idx]);
3159
    TCGLabelUse *use;
3160

3161
    QSIMPLEQ_FOREACH(use, &label->branches, next) {
3162
        if (use->op == op) {
3163
            QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3164
            return;
3165
        }
3166
    }
3167
    g_assert_not_reached();
3168
}
3169

3170
void tcg_op_remove(TCGContext *s, TCGOp *op)
3171
{
3172
    switch (op->opc) {
3173
    case INDEX_op_br:
3174
        remove_label_use(op, 0);
3175
        break;
3176
    case INDEX_op_brcond_i32:
3177
    case INDEX_op_brcond_i64:
3178
        remove_label_use(op, 3);
3179
        break;
3180
    case INDEX_op_brcond2_i32:
3181
        remove_label_use(op, 5);
3182
        break;
3183
    default:
3184
        break;
3185
    }
3186

3187
    QTAILQ_REMOVE(&s->ops, op, link);
3188
    QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3189
    s->nb_ops--;
3190
}
3191

3192
void tcg_remove_ops_after(TCGOp *op)
3193
{
3194
    TCGContext *s = tcg_ctx;
3195

3196
    while (true) {
3197
        TCGOp *last = tcg_last_op();
3198
        if (last == op) {
3199
            return;
3200
        }
3201
        tcg_op_remove(s, last);
3202
    }
3203
}
3204

3205
static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3206
{
3207
    TCGContext *s = tcg_ctx;
3208
    TCGOp *op = NULL;
3209

3210
    if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3211
        QTAILQ_FOREACH(op, &s->free_ops, link) {
3212
            if (nargs <= op->nargs) {
3213
                QTAILQ_REMOVE(&s->free_ops, op, link);
3214
                nargs = op->nargs;
3215
                goto found;
3216
            }
3217
        }
3218
    }
3219

3220
    /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3221
    nargs = MAX(4, nargs);
3222
    op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3223

3224
 found:
3225
    memset(op, 0, offsetof(TCGOp, link));
3226
    op->opc = opc;
3227
    op->nargs = nargs;
3228

3229
    /* Check for bitfield overflow. */
3230
    tcg_debug_assert(op->nargs == nargs);
3231

3232
    s->nb_ops++;
3233
    return op;
3234
}
3235

3236
TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3237
{
3238
    TCGOp *op = tcg_op_alloc(opc, nargs);
3239

3240
    if (tcg_ctx->emit_before_op) {
3241
        QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3242
    } else {
3243
        QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3244
    }
3245
    return op;
3246
}
3247

3248
TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3249
                            TCGOpcode opc, unsigned nargs)
3250
{
3251
    TCGOp *new_op = tcg_op_alloc(opc, nargs);
3252
    QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3253
    return new_op;
3254
}
3255

3256
TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3257
                           TCGOpcode opc, unsigned nargs)
3258
{
3259
    TCGOp *new_op = tcg_op_alloc(opc, nargs);
3260
    QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3261
    return new_op;
3262
}
3263

3264
static void move_label_uses(TCGLabel *to, TCGLabel *from)
3265
{
3266
    TCGLabelUse *u;
3267

3268
    QSIMPLEQ_FOREACH(u, &from->branches, next) {
3269
        TCGOp *op = u->op;
3270
        switch (op->opc) {
3271
        case INDEX_op_br:
3272
            op->args[0] = label_arg(to);
3273
            break;
3274
        case INDEX_op_brcond_i32:
3275
        case INDEX_op_brcond_i64:
3276
            op->args[3] = label_arg(to);
3277
            break;
3278
        case INDEX_op_brcond2_i32:
3279
            op->args[5] = label_arg(to);
3280
            break;
3281
        default:
3282
            g_assert_not_reached();
3283
        }
3284
    }
3285

3286
    QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3287
}
3288

3289
/* Reachable analysis : remove unreachable code.  */
3290
static void __attribute__((noinline))
3291
reachable_code_pass(TCGContext *s)
3292
{
3293
    TCGOp *op, *op_next, *op_prev;
3294
    bool dead = false;
3295

3296
    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3297
        bool remove = dead;
3298
        TCGLabel *label;
3299

3300
        switch (op->opc) {
3301
        case INDEX_op_set_label:
3302
            label = arg_label(op->args[0]);
3303

3304
            /*
3305
             * Note that the first op in the TB is always a load,
3306
             * so there is always something before a label.
3307
             */
3308
            op_prev = QTAILQ_PREV(op, link);
3309

3310
            /*
3311
             * If we find two sequential labels, move all branches to
3312
             * reference the second label and remove the first label.
3313
             * Do this before branch to next optimization, so that the
3314
             * middle label is out of the way.
3315
             */
3316
            if (op_prev->opc == INDEX_op_set_label) {
3317
                move_label_uses(label, arg_label(op_prev->args[0]));
3318
                tcg_op_remove(s, op_prev);
3319
                op_prev = QTAILQ_PREV(op, link);
3320
            }
3321

3322
            /*
3323
             * Optimization can fold conditional branches to unconditional.
3324
             * If we find a label which is preceded by an unconditional
3325
             * branch to next, remove the branch.  We couldn't do this when
3326
             * processing the branch because any dead code between the branch
3327
             * and label had not yet been removed.
3328
             */
3329
            if (op_prev->opc == INDEX_op_br &&
3330
                label == arg_label(op_prev->args[0])) {
3331
                tcg_op_remove(s, op_prev);
3332
                /* Fall through means insns become live again.  */
3333
                dead = false;
3334
            }
3335

3336
            if (QSIMPLEQ_EMPTY(&label->branches)) {
3337
                /*
3338
                 * While there is an occasional backward branch, virtually
3339
                 * all branches generated by the translators are forward.
3340
                 * Which means that generally we will have already removed
3341
                 * all references to the label that will be, and there is
3342
                 * little to be gained by iterating.
3343
                 */
3344
                remove = true;
3345
            } else {
3346
                /* Once we see a label, insns become live again.  */
3347
                dead = false;
3348
                remove = false;
3349
            }
3350
            break;
3351

3352
        case INDEX_op_br:
3353
        case INDEX_op_exit_tb:
3354
        case INDEX_op_goto_ptr:
3355
            /* Unconditional branches; everything following is dead.  */
3356
            dead = true;
3357
            break;
3358

3359
        case INDEX_op_call:
3360
            /* Notice noreturn helper calls, raising exceptions.  */
3361
            if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3362
                dead = true;
3363
            }
3364
            break;
3365

3366
        case INDEX_op_insn_start:
3367
            /* Never remove -- we need to keep these for unwind.  */
3368
            remove = false;
3369
            break;
3370

3371
        default:
3372
            break;
3373
        }
3374

3375
        if (remove) {
3376
            tcg_op_remove(s, op);
3377
        }
3378
    }
3379
}
3380

3381
#define TS_DEAD  1
3382
#define TS_MEM   2
3383

3384
#define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3385
#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3386

3387
/* For liveness_pass_1, the register preferences for a given temp.  */
3388
static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3389
{
3390
    return ts->state_ptr;
3391
}
3392

3393
/* For liveness_pass_1, reset the preferences for a given temp to the
3394
 * maximal regset for its type.
3395
 */
3396
static inline void la_reset_pref(TCGTemp *ts)
3397
{
3398
    *la_temp_pref(ts)
3399
        = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3400
}
3401

3402
/* liveness analysis: end of function: all temps are dead, and globals
3403
   should be in memory. */
3404
static void la_func_end(TCGContext *s, int ng, int nt)
3405
{
3406
    int i;
3407

3408
    for (i = 0; i < ng; ++i) {
3409
        s->temps[i].state = TS_DEAD | TS_MEM;
3410
        la_reset_pref(&s->temps[i]);
3411
    }
3412
    for (i = ng; i < nt; ++i) {
3413
        s->temps[i].state = TS_DEAD;
3414
        la_reset_pref(&s->temps[i]);
3415
    }
3416
}
3417

3418
/* liveness analysis: end of basic block: all temps are dead, globals
3419
   and local temps should be in memory. */
3420
static void la_bb_end(TCGContext *s, int ng, int nt)
3421
{
3422
    int i;
3423

3424
    for (i = 0; i < nt; ++i) {
3425
        TCGTemp *ts = &s->temps[i];
3426
        int state;
3427

3428
        switch (ts->kind) {
3429
        case TEMP_FIXED:
3430
        case TEMP_GLOBAL:
3431
        case TEMP_TB:
3432
            state = TS_DEAD | TS_MEM;
3433
            break;
3434
        case TEMP_EBB:
3435
        case TEMP_CONST:
3436
            state = TS_DEAD;
3437
            break;
3438
        default:
3439
            g_assert_not_reached();
3440
        }
3441
        ts->state = state;
3442
        la_reset_pref(ts);
3443
    }
3444
}
3445

3446
/* liveness analysis: sync globals back to memory.  */
3447
static void la_global_sync(TCGContext *s, int ng)
3448
{
3449
    int i;
3450

3451
    for (i = 0; i < ng; ++i) {
3452
        int state = s->temps[i].state;
3453
        s->temps[i].state = state | TS_MEM;
3454
        if (state == TS_DEAD) {
3455
            /* If the global was previously dead, reset prefs.  */
3456
            la_reset_pref(&s->temps[i]);
3457
        }
3458
    }
3459
}
3460

3461
/*
3462
 * liveness analysis: conditional branch: all temps are dead unless
3463
 * explicitly live-across-conditional-branch, globals and local temps
3464
 * should be synced.
3465
 */
3466
static void la_bb_sync(TCGContext *s, int ng, int nt)
3467
{
3468
    la_global_sync(s, ng);
3469

3470
    for (int i = ng; i < nt; ++i) {
3471
        TCGTemp *ts = &s->temps[i];
3472
        int state;
3473

3474
        switch (ts->kind) {
3475
        case TEMP_TB:
3476
            state = ts->state;
3477
            ts->state = state | TS_MEM;
3478
            if (state != TS_DEAD) {
3479
                continue;
3480
            }
3481
            break;
3482
        case TEMP_EBB:
3483
        case TEMP_CONST:
3484
            continue;
3485
        default:
3486
            g_assert_not_reached();
3487
        }
3488
        la_reset_pref(&s->temps[i]);
3489
    }
3490
}
3491

3492
/* liveness analysis: sync globals back to memory and kill.  */
3493
static void la_global_kill(TCGContext *s, int ng)
3494
{
3495
    int i;
3496

3497
    for (i = 0; i < ng; i++) {
3498
        s->temps[i].state = TS_DEAD | TS_MEM;
3499
        la_reset_pref(&s->temps[i]);
3500
    }
3501
}
3502

3503
/* liveness analysis: note live globals crossing calls.  */
3504
static void la_cross_call(TCGContext *s, int nt)
3505
{
3506
    TCGRegSet mask = ~tcg_target_call_clobber_regs;
3507
    int i;
3508

3509
    for (i = 0; i < nt; i++) {
3510
        TCGTemp *ts = &s->temps[i];
3511
        if (!(ts->state & TS_DEAD)) {
3512
            TCGRegSet *pset = la_temp_pref(ts);
3513
            TCGRegSet set = *pset;
3514

3515
            set &= mask;
3516
            /* If the combination is not possible, restart.  */
3517
            if (set == 0) {
3518
                set = tcg_target_available_regs[ts->type] & mask;
3519
            }
3520
            *pset = set;
3521
        }
3522
    }
3523
}
3524

3525
/*
3526
 * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3527
 * to TEMP_EBB, if possible.
3528
 */
3529
static void __attribute__((noinline))
3530
liveness_pass_0(TCGContext *s)
3531
{
3532
    void * const multiple_ebb = (void *)(uintptr_t)-1;
3533
    int nb_temps = s->nb_temps;
3534
    TCGOp *op, *ebb;
3535

3536
    for (int i = s->nb_globals; i < nb_temps; ++i) {
3537
        s->temps[i].state_ptr = NULL;
3538
    }
3539

3540
    /*
3541
     * Represent each EBB by the op at which it begins.  In the case of
3542
     * the first EBB, this is the first op, otherwise it is a label.
3543
     * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3544
     * within a single EBB, else MULTIPLE_EBB.
3545
     */
3546
    ebb = QTAILQ_FIRST(&s->ops);
3547
    QTAILQ_FOREACH(op, &s->ops, link) {
3548
        const TCGOpDef *def;
3549
        int nb_oargs, nb_iargs;
3550

3551
        switch (op->opc) {
3552
        case INDEX_op_set_label:
3553
            ebb = op;
3554
            continue;
3555
        case INDEX_op_discard:
3556
            continue;
3557
        case INDEX_op_call:
3558
            nb_oargs = TCGOP_CALLO(op);
3559
            nb_iargs = TCGOP_CALLI(op);
3560
            break;
3561
        default:
3562
            def = &tcg_op_defs[op->opc];
3563
            nb_oargs = def->nb_oargs;
3564
            nb_iargs = def->nb_iargs;
3565
            break;
3566
        }
3567

3568
        for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3569
            TCGTemp *ts = arg_temp(op->args[i]);
3570

3571
            if (ts->kind != TEMP_TB) {
3572
                continue;
3573
            }
3574
            if (ts->state_ptr == NULL) {
3575
                ts->state_ptr = ebb;
3576
            } else if (ts->state_ptr != ebb) {
3577
                ts->state_ptr = multiple_ebb;
3578
            }
3579
        }
3580
    }
3581

3582
    /*
3583
     * For TEMP_TB that turned out not to be used beyond one EBB,
3584
     * reduce the liveness to TEMP_EBB.
3585
     */
3586
    for (int i = s->nb_globals; i < nb_temps; ++i) {
3587
        TCGTemp *ts = &s->temps[i];
3588
        if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3589
            ts->kind = TEMP_EBB;
3590
        }
3591
    }
3592
}
3593

3594
/* Liveness analysis : update the opc_arg_life array to tell if a
3595
   given input arguments is dead. Instructions updating dead
3596
   temporaries are removed. */
3597
static void __attribute__((noinline))
3598
liveness_pass_1(TCGContext *s)
3599
{
3600
    int nb_globals = s->nb_globals;
3601
    int nb_temps = s->nb_temps;
3602
    TCGOp *op, *op_prev;
3603
    TCGRegSet *prefs;
3604
    int i;
3605

3606
    prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3607
    for (i = 0; i < nb_temps; ++i) {
3608
        s->temps[i].state_ptr = prefs + i;
3609
    }
3610

3611
    /* ??? Should be redundant with the exit_tb that ends the TB.  */
3612
    la_func_end(s, nb_globals, nb_temps);
3613

3614
    QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3615
        int nb_iargs, nb_oargs;
3616
        TCGOpcode opc_new, opc_new2;
3617
        bool have_opc_new2;
3618
        TCGLifeData arg_life = 0;
3619
        TCGTemp *ts;
3620
        TCGOpcode opc = op->opc;
3621
        const TCGOpDef *def = &tcg_op_defs[opc];
3622

3623
        switch (opc) {
3624
        case INDEX_op_call:
3625
            {
3626
                const TCGHelperInfo *info = tcg_call_info(op);
3627
                int call_flags = tcg_call_flags(op);
3628

3629
                nb_oargs = TCGOP_CALLO(op);
3630
                nb_iargs = TCGOP_CALLI(op);
3631

3632
                /* pure functions can be removed if their result is unused */
3633
                if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3634
                    for (i = 0; i < nb_oargs; i++) {
3635
                        ts = arg_temp(op->args[i]);
3636
                        if (ts->state != TS_DEAD) {
3637
                            goto do_not_remove_call;
3638
                        }
3639
                    }
3640
                    goto do_remove;
3641
                }
3642
            do_not_remove_call:
3643

3644
                /* Output args are dead.  */
3645
                for (i = 0; i < nb_oargs; i++) {
3646
                    ts = arg_temp(op->args[i]);
3647
                    if (ts->state & TS_DEAD) {
3648
                        arg_life |= DEAD_ARG << i;
3649
                    }
3650
                    if (ts->state & TS_MEM) {
3651
                        arg_life |= SYNC_ARG << i;
3652
                    }
3653
                    ts->state = TS_DEAD;
3654
                    la_reset_pref(ts);
3655
                }
3656

3657
                /* Not used -- it will be tcg_target_call_oarg_reg().  */
3658
                memset(op->output_pref, 0, sizeof(op->output_pref));
3659

3660
                if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3661
                                    TCG_CALL_NO_READ_GLOBALS))) {
3662
                    la_global_kill(s, nb_globals);
3663
                } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3664
                    la_global_sync(s, nb_globals);
3665
                }
3666

3667
                /* Record arguments that die in this helper.  */
3668
                for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3669
                    ts = arg_temp(op->args[i]);
3670
                    if (ts->state & TS_DEAD) {
3671
                        arg_life |= DEAD_ARG << i;
3672
                    }
3673
                }
3674

3675
                /* For all live registers, remove call-clobbered prefs.  */
3676
                la_cross_call(s, nb_temps);
3677

3678
                /*
3679
                 * Input arguments are live for preceding opcodes.
3680
                 *
3681
                 * For those arguments that die, and will be allocated in
3682
                 * registers, clear the register set for that arg, to be
3683
                 * filled in below.  For args that will be on the stack,
3684
                 * reset to any available reg.  Process arguments in reverse
3685
                 * order so that if a temp is used more than once, the stack
3686
                 * reset to max happens before the register reset to 0.
3687
                 */
3688
                for (i = nb_iargs - 1; i >= 0; i--) {
3689
                    const TCGCallArgumentLoc *loc = &info->in[i];
3690
                    ts = arg_temp(op->args[nb_oargs + i]);
3691

3692
                    if (ts->state & TS_DEAD) {
3693
                        switch (loc->kind) {
3694
                        case TCG_CALL_ARG_NORMAL:
3695
                        case TCG_CALL_ARG_EXTEND_U:
3696
                        case TCG_CALL_ARG_EXTEND_S:
3697
                            if (arg_slot_reg_p(loc->arg_slot)) {
3698
                                *la_temp_pref(ts) = 0;
3699
                                break;
3700
                            }
3701
                            /* fall through */
3702
                        default:
3703
                            *la_temp_pref(ts) =
3704
                                tcg_target_available_regs[ts->type];
3705
                            break;
3706
                        }
3707
                        ts->state &= ~TS_DEAD;
3708
                    }
3709
                }
3710

3711
                /*
3712
                 * For each input argument, add its input register to prefs.
3713
                 * If a temp is used once, this produces a single set bit;
3714
                 * if a temp is used multiple times, this produces a set.
3715
                 */
3716
                for (i = 0; i < nb_iargs; i++) {
3717
                    const TCGCallArgumentLoc *loc = &info->in[i];
3718
                    ts = arg_temp(op->args[nb_oargs + i]);
3719

3720
                    switch (loc->kind) {
3721
                    case TCG_CALL_ARG_NORMAL:
3722
                    case TCG_CALL_ARG_EXTEND_U:
3723
                    case TCG_CALL_ARG_EXTEND_S:
3724
                        if (arg_slot_reg_p(loc->arg_slot)) {
3725
                            tcg_regset_set_reg(*la_temp_pref(ts),
3726
                                tcg_target_call_iarg_regs[loc->arg_slot]);
3727
                        }
3728
                        break;
3729
                    default:
3730
                        break;
3731
                    }
3732
                }
3733
            }
3734
            break;
3735
        case INDEX_op_insn_start:
3736
            break;
3737
        case INDEX_op_discard:
3738
            /* mark the temporary as dead */
3739
            ts = arg_temp(op->args[0]);
3740
            ts->state = TS_DEAD;
3741
            la_reset_pref(ts);
3742
            break;
3743

3744
        case INDEX_op_add2_i32:
3745
            opc_new = INDEX_op_add_i32;
3746
            goto do_addsub2;
3747
        case INDEX_op_sub2_i32:
3748
            opc_new = INDEX_op_sub_i32;
3749
            goto do_addsub2;
3750
        case INDEX_op_add2_i64:
3751
            opc_new = INDEX_op_add_i64;
3752
            goto do_addsub2;
3753
        case INDEX_op_sub2_i64:
3754
            opc_new = INDEX_op_sub_i64;
3755
        do_addsub2:
3756
            nb_iargs = 4;
3757
            nb_oargs = 2;
3758
            /* Test if the high part of the operation is dead, but not
3759
               the low part.  The result can be optimized to a simple
3760
               add or sub.  This happens often for x86_64 guest when the
3761
               cpu mode is set to 32 bit.  */
3762
            if (arg_temp(op->args[1])->state == TS_DEAD) {
3763
                if (arg_temp(op->args[0])->state == TS_DEAD) {
3764
                    goto do_remove;
3765
                }
3766
                /* Replace the opcode and adjust the args in place,
3767
                   leaving 3 unused args at the end.  */
3768
                op->opc = opc = opc_new;
3769
                op->args[1] = op->args[2];
3770
                op->args[2] = op->args[4];
3771
                /* Fall through and mark the single-word operation live.  */
3772
                nb_iargs = 2;
3773
                nb_oargs = 1;
3774
            }
3775
            goto do_not_remove;
3776

3777
        case INDEX_op_mulu2_i32:
3778
            opc_new = INDEX_op_mul_i32;
3779
            opc_new2 = INDEX_op_muluh_i32;
3780
            have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3781
            goto do_mul2;
3782
        case INDEX_op_muls2_i32:
3783
            opc_new = INDEX_op_mul_i32;
3784
            opc_new2 = INDEX_op_mulsh_i32;
3785
            have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3786
            goto do_mul2;
3787
        case INDEX_op_mulu2_i64:
3788
            opc_new = INDEX_op_mul_i64;
3789
            opc_new2 = INDEX_op_muluh_i64;
3790
            have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3791
            goto do_mul2;
3792
        case INDEX_op_muls2_i64:
3793
            opc_new = INDEX_op_mul_i64;
3794
            opc_new2 = INDEX_op_mulsh_i64;
3795
            have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3796
            goto do_mul2;
3797
        do_mul2:
3798
            nb_iargs = 2;
3799
            nb_oargs = 2;
3800
            if (arg_temp(op->args[1])->state == TS_DEAD) {
3801
                if (arg_temp(op->args[0])->state == TS_DEAD) {
3802
                    /* Both parts of the operation are dead.  */
3803
                    goto do_remove;
3804
                }
3805
                /* The high part of the operation is dead; generate the low. */
3806
                op->opc = opc = opc_new;
3807
                op->args[1] = op->args[2];
3808
                op->args[2] = op->args[3];
3809
            } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3810
                /* The low part of the operation is dead; generate the high. */
3811
                op->opc = opc = opc_new2;
3812
                op->args[0] = op->args[1];
3813
                op->args[1] = op->args[2];
3814
                op->args[2] = op->args[3];
3815
            } else {
3816
                goto do_not_remove;
3817
            }
3818
            /* Mark the single-word operation live.  */
3819
            nb_oargs = 1;
3820
            goto do_not_remove;
3821

3822
        default:
3823
            /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3824
            nb_iargs = def->nb_iargs;
3825
            nb_oargs = def->nb_oargs;
3826

3827
            /* Test if the operation can be removed because all
3828
               its outputs are dead. We assume that nb_oargs == 0
3829
               implies side effects */
3830
            if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3831
                for (i = 0; i < nb_oargs; i++) {
3832
                    if (arg_temp(op->args[i])->state != TS_DEAD) {
3833
                        goto do_not_remove;
3834
                    }
3835
                }
3836
                goto do_remove;
3837
            }
3838
            goto do_not_remove;
3839

3840
        do_remove:
3841
            tcg_op_remove(s, op);
3842
            break;
3843

3844
        do_not_remove:
3845
            for (i = 0; i < nb_oargs; i++) {
3846
                ts = arg_temp(op->args[i]);
3847

3848
                /* Remember the preference of the uses that followed.  */
3849
                if (i < ARRAY_SIZE(op->output_pref)) {
3850
                    op->output_pref[i] = *la_temp_pref(ts);
3851
                }
3852

3853
                /* Output args are dead.  */
3854
                if (ts->state & TS_DEAD) {
3855
                    arg_life |= DEAD_ARG << i;
3856
                }
3857
                if (ts->state & TS_MEM) {
3858
                    arg_life |= SYNC_ARG << i;
3859
                }
3860
                ts->state = TS_DEAD;
3861
                la_reset_pref(ts);
3862
            }
3863

3864
            /* If end of basic block, update.  */
3865
            if (def->flags & TCG_OPF_BB_EXIT) {
3866
                la_func_end(s, nb_globals, nb_temps);
3867
            } else if (def->flags & TCG_OPF_COND_BRANCH) {
3868
                la_bb_sync(s, nb_globals, nb_temps);
3869
            } else if (def->flags & TCG_OPF_BB_END) {
3870
                la_bb_end(s, nb_globals, nb_temps);
3871
            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3872
                la_global_sync(s, nb_globals);
3873
                if (def->flags & TCG_OPF_CALL_CLOBBER) {
3874
                    la_cross_call(s, nb_temps);
3875
                }
3876
            }
3877

3878
            /* Record arguments that die in this opcode.  */
3879
            for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3880
                ts = arg_temp(op->args[i]);
3881
                if (ts->state & TS_DEAD) {
3882
                    arg_life |= DEAD_ARG << i;
3883
                }
3884
            }
3885

3886
            /* Input arguments are live for preceding opcodes.  */
3887
            for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3888
                ts = arg_temp(op->args[i]);
3889
                if (ts->state & TS_DEAD) {
3890
                    /* For operands that were dead, initially allow
3891
                       all regs for the type.  */
3892
                    *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3893
                    ts->state &= ~TS_DEAD;
3894
                }
3895
            }
3896

3897
            /* Incorporate constraints for this operand.  */
3898
            switch (opc) {
3899
            case INDEX_op_mov_i32:
3900
            case INDEX_op_mov_i64:
3901
                /* Note that these are TCG_OPF_NOT_PRESENT and do not
3902
                   have proper constraints.  That said, special case
3903
                   moves to propagate preferences backward.  */
3904
                if (IS_DEAD_ARG(1)) {
3905
                    *la_temp_pref(arg_temp(op->args[0]))
3906
                        = *la_temp_pref(arg_temp(op->args[1]));
3907
                }
3908
                break;
3909

3910
            default:
3911
                for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3912
                    const TCGArgConstraint *ct = &def->args_ct[i];
3913
                    TCGRegSet set, *pset;
3914

3915
                    ts = arg_temp(op->args[i]);
3916
                    pset = la_temp_pref(ts);
3917
                    set = *pset;
3918

3919
                    set &= ct->regs;
3920
                    if (ct->ialias) {
3921
                        set &= output_pref(op, ct->alias_index);
3922
                    }
3923
                    /* If the combination is not possible, restart.  */
3924
                    if (set == 0) {
3925
                        set = ct->regs;
3926
                    }
3927
                    *pset = set;
3928
                }
3929
                break;
3930
            }
3931
            break;
3932
        }
3933
        op->life = arg_life;
3934
    }
3935
}
3936

3937
/* Liveness analysis: Convert indirect regs to direct temporaries.  */
3938
static bool __attribute__((noinline))
3939
liveness_pass_2(TCGContext *s)
3940
{
3941
    int nb_globals = s->nb_globals;
3942
    int nb_temps, i;
3943
    bool changes = false;
3944
    TCGOp *op, *op_next;
3945

3946
    /* Create a temporary for each indirect global.  */
3947
    for (i = 0; i < nb_globals; ++i) {
3948
        TCGTemp *its = &s->temps[i];
3949
        if (its->indirect_reg) {
3950
            TCGTemp *dts = tcg_temp_alloc(s);
3951
            dts->type = its->type;
3952
            dts->base_type = its->base_type;
3953
            dts->temp_subindex = its->temp_subindex;
3954
            dts->kind = TEMP_EBB;
3955
            its->state_ptr = dts;
3956
        } else {
3957
            its->state_ptr = NULL;
3958
        }
3959
        /* All globals begin dead.  */
3960
        its->state = TS_DEAD;
3961
    }
3962
    for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3963
        TCGTemp *its = &s->temps[i];
3964
        its->state_ptr = NULL;
3965
        its->state = TS_DEAD;
3966
    }
3967

3968
    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3969
        TCGOpcode opc = op->opc;
3970
        const TCGOpDef *def = &tcg_op_defs[opc];
3971
        TCGLifeData arg_life = op->life;
3972
        int nb_iargs, nb_oargs, call_flags;
3973
        TCGTemp *arg_ts, *dir_ts;
3974

3975
        if (opc == INDEX_op_call) {
3976
            nb_oargs = TCGOP_CALLO(op);
3977
            nb_iargs = TCGOP_CALLI(op);
3978
            call_flags = tcg_call_flags(op);
3979
        } else {
3980
            nb_iargs = def->nb_iargs;
3981
            nb_oargs = def->nb_oargs;
3982

3983
            /* Set flags similar to how calls require.  */
3984
            if (def->flags & TCG_OPF_COND_BRANCH) {
3985
                /* Like reading globals: sync_globals */
3986
                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3987
            } else if (def->flags & TCG_OPF_BB_END) {
3988
                /* Like writing globals: save_globals */
3989
                call_flags = 0;
3990
            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3991
                /* Like reading globals: sync_globals */
3992
                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3993
            } else {
3994
                /* No effect on globals.  */
3995
                call_flags = (TCG_CALL_NO_READ_GLOBALS |
3996
                              TCG_CALL_NO_WRITE_GLOBALS);
3997
            }
3998
        }
3999

4000
        /* Make sure that input arguments are available.  */
4001
        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4002
            arg_ts = arg_temp(op->args[i]);
4003
            dir_ts = arg_ts->state_ptr;
4004
            if (dir_ts && arg_ts->state == TS_DEAD) {
4005
                TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4006
                                  ? INDEX_op_ld_i32
4007
                                  : INDEX_op_ld_i64);
4008
                TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
4009

4010
                lop->args[0] = temp_arg(dir_ts);
4011
                lop->args[1] = temp_arg(arg_ts->mem_base);
4012
                lop->args[2] = arg_ts->mem_offset;
4013

4014
                /* Loaded, but synced with memory.  */
4015
                arg_ts->state = TS_MEM;
4016
            }
4017
        }
4018

4019
        /* Perform input replacement, and mark inputs that became dead.
4020
           No action is required except keeping temp_state up to date
4021
           so that we reload when needed.  */
4022
        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4023
            arg_ts = arg_temp(op->args[i]);
4024
            dir_ts = arg_ts->state_ptr;
4025
            if (dir_ts) {
4026
                op->args[i] = temp_arg(dir_ts);
4027
                changes = true;
4028
                if (IS_DEAD_ARG(i)) {
4029
                    arg_ts->state = TS_DEAD;
4030
                }
4031
            }
4032
        }
4033

4034
        /* Liveness analysis should ensure that the following are
4035
           all correct, for call sites and basic block end points.  */
4036
        if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4037
            /* Nothing to do */
4038
        } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4039
            for (i = 0; i < nb_globals; ++i) {
4040
                /* Liveness should see that globals are synced back,
4041
                   that is, either TS_DEAD or TS_MEM.  */
4042
                arg_ts = &s->temps[i];
4043
                tcg_debug_assert(arg_ts->state_ptr == 0
4044
                                 || arg_ts->state != 0);
4045
            }
4046
        } else {
4047
            for (i = 0; i < nb_globals; ++i) {
4048
                /* Liveness should see that globals are saved back,
4049
                   that is, TS_DEAD, waiting to be reloaded.  */
4050
                arg_ts = &s->temps[i];
4051
                tcg_debug_assert(arg_ts->state_ptr == 0
4052
                                 || arg_ts->state == TS_DEAD);
4053
            }
4054
        }
4055

4056
        /* Outputs become available.  */
4057
        if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
4058
            arg_ts = arg_temp(op->args[0]);
4059
            dir_ts = arg_ts->state_ptr;
4060
            if (dir_ts) {
4061
                op->args[0] = temp_arg(dir_ts);
4062
                changes = true;
4063

4064
                /* The output is now live and modified.  */
4065
                arg_ts->state = 0;
4066

4067
                if (NEED_SYNC_ARG(0)) {
4068
                    TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4069
                                      ? INDEX_op_st_i32
4070
                                      : INDEX_op_st_i64);
4071
                    TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4072
                    TCGTemp *out_ts = dir_ts;
4073

4074
                    if (IS_DEAD_ARG(0)) {
4075
                        out_ts = arg_temp(op->args[1]);
4076
                        arg_ts->state = TS_DEAD;
4077
                        tcg_op_remove(s, op);
4078
                    } else {
4079
                        arg_ts->state = TS_MEM;
4080
                    }
4081

4082
                    sop->args[0] = temp_arg(out_ts);
4083
                    sop->args[1] = temp_arg(arg_ts->mem_base);
4084
                    sop->args[2] = arg_ts->mem_offset;
4085
                } else {
4086
                    tcg_debug_assert(!IS_DEAD_ARG(0));
4087
                }
4088
            }
4089
        } else {
4090
            for (i = 0; i < nb_oargs; i++) {
4091
                arg_ts = arg_temp(op->args[i]);
4092
                dir_ts = arg_ts->state_ptr;
4093
                if (!dir_ts) {
4094
                    continue;
4095
                }
4096
                op->args[i] = temp_arg(dir_ts);
4097
                changes = true;
4098

4099
                /* The output is now live and modified.  */
4100
                arg_ts->state = 0;
4101

4102
                /* Sync outputs upon their last write.  */
4103
                if (NEED_SYNC_ARG(i)) {
4104
                    TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4105
                                      ? INDEX_op_st_i32
4106
                                      : INDEX_op_st_i64);
4107
                    TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4108

4109
                    sop->args[0] = temp_arg(dir_ts);
4110
                    sop->args[1] = temp_arg(arg_ts->mem_base);
4111
                    sop->args[2] = arg_ts->mem_offset;
4112

4113
                    arg_ts->state = TS_MEM;
4114
                }
4115
                /* Drop outputs that are dead.  */
4116
                if (IS_DEAD_ARG(i)) {
4117
                    arg_ts->state = TS_DEAD;
4118
                }
4119
            }
4120
        }
4121
    }
4122

4123
    return changes;
4124
}
4125

4126
static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4127
{
4128
    intptr_t off;
4129
    int size, align;
4130

4131
    /* When allocating an object, look at the full type. */
4132
    size = tcg_type_size(ts->base_type);
4133
    switch (ts->base_type) {
4134
    case TCG_TYPE_I32:
4135
        align = 4;
4136
        break;
4137
    case TCG_TYPE_I64:
4138
    case TCG_TYPE_V64:
4139
        align = 8;
4140
        break;
4141
    case TCG_TYPE_I128:
4142
    case TCG_TYPE_V128:
4143
    case TCG_TYPE_V256:
4144
        /*
4145
         * Note that we do not require aligned storage for V256,
4146
         * and that we provide alignment for I128 to match V128,
4147
         * even if that's above what the host ABI requires.
4148
         */
4149
        align = 16;
4150
        break;
4151
    default:
4152
        g_assert_not_reached();
4153
    }
4154

4155
    /*
4156
     * Assume the stack is sufficiently aligned.
4157
     * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4158
     * and do not require 16 byte vector alignment.  This seems slightly
4159
     * easier than fully parameterizing the above switch statement.
4160
     */
4161
    align = MIN(TCG_TARGET_STACK_ALIGN, align);
4162
    off = ROUND_UP(s->current_frame_offset, align);
4163

4164
    /* If we've exhausted the stack frame, restart with a smaller TB. */
4165
    if (off + size > s->frame_end) {
4166
        tcg_raise_tb_overflow(s);
4167
    }
4168
    s->current_frame_offset = off + size;
4169
#if defined(__sparc__)
4170
    off += TCG_TARGET_STACK_BIAS;
4171
#endif
4172

4173
    /* If the object was subdivided, assign memory to all the parts. */
4174
    if (ts->base_type != ts->type) {
4175
        int part_size = tcg_type_size(ts->type);
4176
        int part_count = size / part_size;
4177

4178
        /*
4179
         * Each part is allocated sequentially in tcg_temp_new_internal.
4180
         * Jump back to the first part by subtracting the current index.
4181
         */
4182
        ts -= ts->temp_subindex;
4183
        for (int i = 0; i < part_count; ++i) {
4184
            ts[i].mem_offset = off + i * part_size;
4185
            ts[i].mem_base = s->frame_temp;
4186
            ts[i].mem_allocated = 1;
4187
        }
4188
    } else {
4189
        ts->mem_offset = off;
4190
        ts->mem_base = s->frame_temp;
4191
        ts->mem_allocated = 1;
4192
    }
4193
}
4194

4195
/* Assign @reg to @ts, and update reg_to_temp[]. */
4196
static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4197
{
4198
    if (ts->val_type == TEMP_VAL_REG) {
4199
        TCGReg old = ts->reg;
4200
        tcg_debug_assert(s->reg_to_temp[old] == ts);
4201
        if (old == reg) {
4202
            return;
4203
        }
4204
        s->reg_to_temp[old] = NULL;
4205
    }
4206
    tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4207
    s->reg_to_temp[reg] = ts;
4208
    ts->val_type = TEMP_VAL_REG;
4209
    ts->reg = reg;
4210
}
4211

4212
/* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4213
static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4214
{
4215
    tcg_debug_assert(type != TEMP_VAL_REG);
4216
    if (ts->val_type == TEMP_VAL_REG) {
4217
        TCGReg reg = ts->reg;
4218
        tcg_debug_assert(s->reg_to_temp[reg] == ts);
4219
        s->reg_to_temp[reg] = NULL;
4220
    }
4221
    ts->val_type = type;
4222
}
4223

4224
static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4225

4226
/* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4227
   mark it free; otherwise mark it dead.  */
4228
static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4229
{
4230
    TCGTempVal new_type;
4231

4232
    switch (ts->kind) {
4233
    case TEMP_FIXED:
4234
        return;
4235
    case TEMP_GLOBAL:
4236
    case TEMP_TB:
4237
        new_type = TEMP_VAL_MEM;
4238
        break;
4239
    case TEMP_EBB:
4240
        new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4241
        break;
4242
    case TEMP_CONST:
4243
        new_type = TEMP_VAL_CONST;
4244
        break;
4245
    default:
4246
        g_assert_not_reached();
4247
    }
4248
    set_temp_val_nonreg(s, ts, new_type);
4249
}
4250

4251
/* Mark a temporary as dead.  */
4252
static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4253
{
4254
    temp_free_or_dead(s, ts, 1);
4255
}
4256

4257
/* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4258
   registers needs to be allocated to store a constant.  If 'free_or_dead'
4259
   is non-zero, subsequently release the temporary; if it is positive, the
4260
   temp is dead; if it is negative, the temp is free.  */
4261
static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4262
                      TCGRegSet preferred_regs, int free_or_dead)
4263
{
4264
    if (!temp_readonly(ts) && !ts->mem_coherent) {
4265
        if (!ts->mem_allocated) {
4266
            temp_allocate_frame(s, ts);
4267
        }
4268
        switch (ts->val_type) {
4269
        case TEMP_VAL_CONST:
4270
            /* If we're going to free the temp immediately, then we won't
4271
               require it later in a register, so attempt to store the
4272
               constant to memory directly.  */
4273
            if (free_or_dead
4274
                && tcg_out_sti(s, ts->type, ts->val,
4275
                               ts->mem_base->reg, ts->mem_offset)) {
4276
                break;
4277
            }
4278
            temp_load(s, ts, tcg_target_available_regs[ts->type],
4279
                      allocated_regs, preferred_regs);
4280
            /* fallthrough */
4281

4282
        case TEMP_VAL_REG:
4283
            tcg_out_st(s, ts->type, ts->reg,
4284
                       ts->mem_base->reg, ts->mem_offset);
4285
            break;
4286

4287
        case TEMP_VAL_MEM:
4288
            break;
4289

4290
        case TEMP_VAL_DEAD:
4291
        default:
4292
            g_assert_not_reached();
4293
        }
4294
        ts->mem_coherent = 1;
4295
    }
4296
    if (free_or_dead) {
4297
        temp_free_or_dead(s, ts, free_or_dead);
4298
    }
4299
}
4300

4301
/* free register 'reg' by spilling the corresponding temporary if necessary */
4302
static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4303
{
4304
    TCGTemp *ts = s->reg_to_temp[reg];
4305
    if (ts != NULL) {
4306
        temp_sync(s, ts, allocated_regs, 0, -1);
4307
    }
4308
}
4309

4310
/**
4311
 * tcg_reg_alloc:
4312
 * @required_regs: Set of registers in which we must allocate.
4313
 * @allocated_regs: Set of registers which must be avoided.
4314
 * @preferred_regs: Set of registers we should prefer.
4315
 * @rev: True if we search the registers in "indirect" order.
4316
 *
4317
 * The allocated register must be in @required_regs & ~@allocated_regs,
4318
 * but if we can put it in @preferred_regs we may save a move later.
4319
 */
4320
static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4321
                            TCGRegSet allocated_regs,
4322
                            TCGRegSet preferred_regs, bool rev)
4323
{
4324
    int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4325
    TCGRegSet reg_ct[2];
4326
    const int *order;
4327

4328
    reg_ct[1] = required_regs & ~allocated_regs;
4329
    tcg_debug_assert(reg_ct[1] != 0);
4330
    reg_ct[0] = reg_ct[1] & preferred_regs;
4331

4332
    /* Skip the preferred_regs option if it cannot be satisfied,
4333
       or if the preference made no difference.  */
4334
    f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4335

4336
    order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4337

4338
    /* Try free registers, preferences first.  */
4339
    for (j = f; j < 2; j++) {
4340
        TCGRegSet set = reg_ct[j];
4341

4342
        if (tcg_regset_single(set)) {
4343
            /* One register in the set.  */
4344
            TCGReg reg = tcg_regset_first(set);
4345
            if (s->reg_to_temp[reg] == NULL) {
4346
                return reg;
4347
            }
4348
        } else {
4349
            for (i = 0; i < n; i++) {
4350
                TCGReg reg = order[i];
4351
                if (s->reg_to_temp[reg] == NULL &&
4352
                    tcg_regset_test_reg(set, reg)) {
4353
                    return reg;
4354
                }
4355
            }
4356
        }
4357
    }
4358

4359
    /* We must spill something.  */
4360
    for (j = f; j < 2; j++) {
4361
        TCGRegSet set = reg_ct[j];
4362

4363
        if (tcg_regset_single(set)) {
4364
            /* One register in the set.  */
4365
            TCGReg reg = tcg_regset_first(set);
4366
            tcg_reg_free(s, reg, allocated_regs);
4367
            return reg;
4368
        } else {
4369
            for (i = 0; i < n; i++) {
4370
                TCGReg reg = order[i];
4371
                if (tcg_regset_test_reg(set, reg)) {
4372
                    tcg_reg_free(s, reg, allocated_regs);
4373
                    return reg;
4374
                }
4375
            }
4376
        }
4377
    }
4378

4379
    g_assert_not_reached();
4380
}
4381

4382
static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4383
                                 TCGRegSet allocated_regs,
4384
                                 TCGRegSet preferred_regs, bool rev)
4385
{
4386
    int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4387
    TCGRegSet reg_ct[2];
4388
    const int *order;
4389

4390
    /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4391
    reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4392
    tcg_debug_assert(reg_ct[1] != 0);
4393
    reg_ct[0] = reg_ct[1] & preferred_regs;
4394

4395
    order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4396

4397
    /*
4398
     * Skip the preferred_regs option if it cannot be satisfied,
4399
     * or if the preference made no difference.
4400
     */
4401
    k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4402

4403
    /*
4404
     * Minimize the number of flushes by looking for 2 free registers first,
4405
     * then a single flush, then two flushes.
4406
     */
4407
    for (fmin = 2; fmin >= 0; fmin--) {
4408
        for (j = k; j < 2; j++) {
4409
            TCGRegSet set = reg_ct[j];
4410

4411
            for (i = 0; i < n; i++) {
4412
                TCGReg reg = order[i];
4413

4414
                if (tcg_regset_test_reg(set, reg)) {
4415
                    int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4416
                    if (f >= fmin) {
4417
                        tcg_reg_free(s, reg, allocated_regs);
4418
                        tcg_reg_free(s, reg + 1, allocated_regs);
4419
                        return reg;
4420
                    }
4421
                }
4422
            }
4423
        }
4424
    }
4425
    g_assert_not_reached();
4426
}
4427

4428
/* Make sure the temporary is in a register.  If needed, allocate the register
4429
   from DESIRED while avoiding ALLOCATED.  */
4430
static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4431
                      TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4432
{
4433
    TCGReg reg;
4434

4435
    switch (ts->val_type) {
4436
    case TEMP_VAL_REG:
4437
        return;
4438
    case TEMP_VAL_CONST:
4439
        reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4440
                            preferred_regs, ts->indirect_base);
4441
        if (ts->type <= TCG_TYPE_I64) {
4442
            tcg_out_movi(s, ts->type, reg, ts->val);
4443
        } else {
4444
            uint64_t val = ts->val;
4445
            MemOp vece = MO_64;
4446

4447
            /*
4448
             * Find the minimal vector element that matches the constant.
4449
             * The targets will, in general, have to do this search anyway,
4450
             * do this generically.
4451
             */
4452
            if (val == dup_const(MO_8, val)) {
4453
                vece = MO_8;
4454
            } else if (val == dup_const(MO_16, val)) {
4455
                vece = MO_16;
4456
            } else if (val == dup_const(MO_32, val)) {
4457
                vece = MO_32;
4458
            }
4459

4460
            tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4461
        }
4462
        ts->mem_coherent = 0;
4463
        break;
4464
    case TEMP_VAL_MEM:
4465
        reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4466
                            preferred_regs, ts->indirect_base);
4467
        tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4468
        ts->mem_coherent = 1;
4469
        break;
4470
    case TEMP_VAL_DEAD:
4471
    default:
4472
        g_assert_not_reached();
4473
    }
4474
    set_temp_val_reg(s, ts, reg);
4475
}
4476

4477
/* Save a temporary to memory. 'allocated_regs' is used in case a
4478
   temporary registers needs to be allocated to store a constant.  */
4479
static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4480
{
4481
    /* The liveness analysis already ensures that globals are back
4482
       in memory. Keep an tcg_debug_assert for safety. */
4483
    tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4484
}
4485

4486
/* save globals to their canonical location and assume they can be
4487
   modified be the following code. 'allocated_regs' is used in case a
4488
   temporary registers needs to be allocated to store a constant. */
4489
static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4490
{
4491
    int i, n;
4492

4493
    for (i = 0, n = s->nb_globals; i < n; i++) {
4494
        temp_save(s, &s->temps[i], allocated_regs);
4495
    }
4496
}
4497

4498
/* sync globals to their canonical location and assume they can be
4499
   read by the following code. 'allocated_regs' is used in case a
4500
   temporary registers needs to be allocated to store a constant. */
4501
static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4502
{
4503
    int i, n;
4504

4505
    for (i = 0, n = s->nb_globals; i < n; i++) {
4506
        TCGTemp *ts = &s->temps[i];
4507
        tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4508
                         || ts->kind == TEMP_FIXED
4509
                         || ts->mem_coherent);
4510
    }
4511
}
4512

4513
/* at the end of a basic block, we assume all temporaries are dead and
4514
   all globals are stored at their canonical location. */
4515
static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4516
{
4517
    int i;
4518

4519
    for (i = s->nb_globals; i < s->nb_temps; i++) {
4520
        TCGTemp *ts = &s->temps[i];
4521

4522
        switch (ts->kind) {
4523
        case TEMP_TB:
4524
            temp_save(s, ts, allocated_regs);
4525
            break;
4526
        case TEMP_EBB:
4527
            /* The liveness analysis already ensures that temps are dead.
4528
               Keep an tcg_debug_assert for safety. */
4529
            tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4530
            break;
4531
        case TEMP_CONST:
4532
            /* Similarly, we should have freed any allocated register. */
4533
            tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4534
            break;
4535
        default:
4536
            g_assert_not_reached();
4537
        }
4538
    }
4539

4540
    save_globals(s, allocated_regs);
4541
}
4542

4543
/*
4544
 * At a conditional branch, we assume all temporaries are dead unless
4545
 * explicitly live-across-conditional-branch; all globals and local
4546
 * temps are synced to their location.
4547
 */
4548
static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4549
{
4550
    sync_globals(s, allocated_regs);
4551

4552
    for (int i = s->nb_globals; i < s->nb_temps; i++) {
4553
        TCGTemp *ts = &s->temps[i];
4554
        /*
4555
         * The liveness analysis already ensures that temps are dead.
4556
         * Keep tcg_debug_asserts for safety.
4557
         */
4558
        switch (ts->kind) {
4559
        case TEMP_TB:
4560
            tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4561
            break;
4562
        case TEMP_EBB:
4563
        case TEMP_CONST:
4564
            break;
4565
        default:
4566
            g_assert_not_reached();
4567
        }
4568
    }
4569
}
4570

4571
/*
4572
 * Specialized code generation for INDEX_op_mov_* with a constant.
4573
 */
4574
static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4575
                                  tcg_target_ulong val, TCGLifeData arg_life,
4576
                                  TCGRegSet preferred_regs)
4577
{
4578
    /* ENV should not be modified.  */
4579
    tcg_debug_assert(!temp_readonly(ots));
4580

4581
    /* The movi is not explicitly generated here.  */
4582
    set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4583
    ots->val = val;
4584
    ots->mem_coherent = 0;
4585
    if (NEED_SYNC_ARG(0)) {
4586
        temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4587
    } else if (IS_DEAD_ARG(0)) {
4588
        temp_dead(s, ots);
4589
    }
4590
}
4591

4592
/*
4593
 * Specialized code generation for INDEX_op_mov_*.
4594
 */
4595
static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4596
{
4597
    const TCGLifeData arg_life = op->life;
4598
    TCGRegSet allocated_regs, preferred_regs;
4599
    TCGTemp *ts, *ots;
4600
    TCGType otype, itype;
4601
    TCGReg oreg, ireg;
4602

4603
    allocated_regs = s->reserved_regs;
4604
    preferred_regs = output_pref(op, 0);
4605
    ots = arg_temp(op->args[0]);
4606
    ts = arg_temp(op->args[1]);
4607

4608
    /* ENV should not be modified.  */
4609
    tcg_debug_assert(!temp_readonly(ots));
4610

4611
    /* Note that otype != itype for no-op truncation.  */
4612
    otype = ots->type;
4613
    itype = ts->type;
4614

4615
    if (ts->val_type == TEMP_VAL_CONST) {
4616
        /* propagate constant or generate sti */
4617
        tcg_target_ulong val = ts->val;
4618
        if (IS_DEAD_ARG(1)) {
4619
            temp_dead(s, ts);
4620
        }
4621
        tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4622
        return;
4623
    }
4624

4625
    /* If the source value is in memory we're going to be forced
4626
       to have it in a register in order to perform the copy.  Copy
4627
       the SOURCE value into its own register first, that way we
4628
       don't have to reload SOURCE the next time it is used. */
4629
    if (ts->val_type == TEMP_VAL_MEM) {
4630
        temp_load(s, ts, tcg_target_available_regs[itype],
4631
                  allocated_regs, preferred_regs);
4632
    }
4633
    tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4634
    ireg = ts->reg;
4635

4636
    if (IS_DEAD_ARG(0)) {
4637
        /* mov to a non-saved dead register makes no sense (even with
4638
           liveness analysis disabled). */
4639
        tcg_debug_assert(NEED_SYNC_ARG(0));
4640
        if (!ots->mem_allocated) {
4641
            temp_allocate_frame(s, ots);
4642
        }
4643
        tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4644
        if (IS_DEAD_ARG(1)) {
4645
            temp_dead(s, ts);
4646
        }
4647
        temp_dead(s, ots);
4648
        return;
4649
    }
4650

4651
    if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4652
        /*
4653
         * The mov can be suppressed.  Kill input first, so that it
4654
         * is unlinked from reg_to_temp, then set the output to the
4655
         * reg that we saved from the input.
4656
         */
4657
        temp_dead(s, ts);
4658
        oreg = ireg;
4659
    } else {
4660
        if (ots->val_type == TEMP_VAL_REG) {
4661
            oreg = ots->reg;
4662
        } else {
4663
            /* Make sure to not spill the input register during allocation. */
4664
            oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4665
                                 allocated_regs | ((TCGRegSet)1 << ireg),
4666
                                 preferred_regs, ots->indirect_base);
4667
        }
4668
        if (!tcg_out_mov(s, otype, oreg, ireg)) {
4669
            /*
4670
             * Cross register class move not supported.
4671
             * Store the source register into the destination slot
4672
             * and leave the destination temp as TEMP_VAL_MEM.
4673
             */
4674
            assert(!temp_readonly(ots));
4675
            if (!ts->mem_allocated) {
4676
                temp_allocate_frame(s, ots);
4677
            }
4678
            tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4679
            set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4680
            ots->mem_coherent = 1;
4681
            return;
4682
        }
4683
    }
4684
    set_temp_val_reg(s, ots, oreg);
4685
    ots->mem_coherent = 0;
4686

4687
    if (NEED_SYNC_ARG(0)) {
4688
        temp_sync(s, ots, allocated_regs, 0, 0);
4689
    }
4690
}
4691

4692
/*
4693
 * Specialized code generation for INDEX_op_dup_vec.
4694
 */
4695
static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4696
{
4697
    const TCGLifeData arg_life = op->life;
4698
    TCGRegSet dup_out_regs, dup_in_regs;
4699
    TCGTemp *its, *ots;
4700
    TCGType itype, vtype;
4701
    unsigned vece;
4702
    int lowpart_ofs;
4703
    bool ok;
4704

4705
    ots = arg_temp(op->args[0]);
4706
    its = arg_temp(op->args[1]);
4707

4708
    /* ENV should not be modified.  */
4709
    tcg_debug_assert(!temp_readonly(ots));
4710

4711
    itype = its->type;
4712
    vece = TCGOP_VECE(op);
4713
    vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4714

4715
    if (its->val_type == TEMP_VAL_CONST) {
4716
        /* Propagate constant via movi -> dupi.  */
4717
        tcg_target_ulong val = its->val;
4718
        if (IS_DEAD_ARG(1)) {
4719
            temp_dead(s, its);
4720
        }
4721
        tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4722
        return;
4723
    }
4724

4725
    dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4726
    dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4727

4728
    /* Allocate the output register now.  */
4729
    if (ots->val_type != TEMP_VAL_REG) {
4730
        TCGRegSet allocated_regs = s->reserved_regs;
4731
        TCGReg oreg;
4732

4733
        if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4734
            /* Make sure to not spill the input register. */
4735
            tcg_regset_set_reg(allocated_regs, its->reg);
4736
        }
4737
        oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4738
                             output_pref(op, 0), ots->indirect_base);
4739
        set_temp_val_reg(s, ots, oreg);
4740
    }
4741

4742
    switch (its->val_type) {
4743
    case TEMP_VAL_REG:
4744
        /*
4745
         * The dup constriaints must be broad, covering all possible VECE.
4746
         * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4747
         * to fail, indicating that extra moves are required for that case.
4748
         */
4749
        if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4750
            if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4751
                goto done;
4752
            }
4753
            /* Try again from memory or a vector input register.  */
4754
        }
4755
        if (!its->mem_coherent) {
4756
            /*
4757
             * The input register is not synced, and so an extra store
4758
             * would be required to use memory.  Attempt an integer-vector
4759
             * register move first.  We do not have a TCGRegSet for this.
4760
             */
4761
            if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4762
                break;
4763
            }
4764
            /* Sync the temp back to its slot and load from there.  */
4765
            temp_sync(s, its, s->reserved_regs, 0, 0);
4766
        }
4767
        /* fall through */
4768

4769
    case TEMP_VAL_MEM:
4770
        lowpart_ofs = 0;
4771
        if (HOST_BIG_ENDIAN) {
4772
            lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4773
        }
4774
        if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4775
                             its->mem_offset + lowpart_ofs)) {
4776
            goto done;
4777
        }
4778
        /* Load the input into the destination vector register. */
4779
        tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4780
        break;
4781

4782
    default:
4783
        g_assert_not_reached();
4784
    }
4785

4786
    /* We now have a vector input register, so dup must succeed. */
4787
    ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4788
    tcg_debug_assert(ok);
4789

4790
 done:
4791
    ots->mem_coherent = 0;
4792
    if (IS_DEAD_ARG(1)) {
4793
        temp_dead(s, its);
4794
    }
4795
    if (NEED_SYNC_ARG(0)) {
4796
        temp_sync(s, ots, s->reserved_regs, 0, 0);
4797
    }
4798
    if (IS_DEAD_ARG(0)) {
4799
        temp_dead(s, ots);
4800
    }
4801
}
4802

4803
static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4804
{
4805
    const TCGLifeData arg_life = op->life;
4806
    const TCGOpDef * const def = &tcg_op_defs[op->opc];
4807
    TCGRegSet i_allocated_regs;
4808
    TCGRegSet o_allocated_regs;
4809
    int i, k, nb_iargs, nb_oargs;
4810
    TCGReg reg;
4811
    TCGArg arg;
4812
    const TCGArgConstraint *arg_ct;
4813
    TCGTemp *ts;
4814
    TCGArg new_args[TCG_MAX_OP_ARGS];
4815
    int const_args[TCG_MAX_OP_ARGS];
4816
    TCGCond op_cond;
4817

4818
    nb_oargs = def->nb_oargs;
4819
    nb_iargs = def->nb_iargs;
4820

4821
    /* copy constants */
4822
    memcpy(new_args + nb_oargs + nb_iargs,
4823
           op->args + nb_oargs + nb_iargs,
4824
           sizeof(TCGArg) * def->nb_cargs);
4825

4826
    i_allocated_regs = s->reserved_regs;
4827
    o_allocated_regs = s->reserved_regs;
4828

4829
    switch (op->opc) {
4830
    case INDEX_op_brcond_i32:
4831
    case INDEX_op_brcond_i64:
4832
        op_cond = op->args[2];
4833
        break;
4834
    case INDEX_op_setcond_i32:
4835
    case INDEX_op_setcond_i64:
4836
    case INDEX_op_negsetcond_i32:
4837
    case INDEX_op_negsetcond_i64:
4838
    case INDEX_op_cmp_vec:
4839
        op_cond = op->args[3];
4840
        break;
4841
    case INDEX_op_brcond2_i32:
4842
        op_cond = op->args[4];
4843
        break;
4844
    case INDEX_op_movcond_i32:
4845
    case INDEX_op_movcond_i64:
4846
    case INDEX_op_setcond2_i32:
4847
    case INDEX_op_cmpsel_vec:
4848
        op_cond = op->args[5];
4849
        break;
4850
    default:
4851
        /* No condition within opcode. */
4852
        op_cond = TCG_COND_ALWAYS;
4853
        break;
4854
    }
4855

4856
    /* satisfy input constraints */
4857
    for (k = 0; k < nb_iargs; k++) {
4858
        TCGRegSet i_preferred_regs, i_required_regs;
4859
        bool allocate_new_reg, copyto_new_reg;
4860
        TCGTemp *ts2;
4861
        int i1, i2;
4862

4863
        i = def->args_ct[nb_oargs + k].sort_index;
4864
        arg = op->args[i];
4865
        arg_ct = &def->args_ct[i];
4866
        ts = arg_temp(arg);
4867

4868
        if (ts->val_type == TEMP_VAL_CONST
4869
            && tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
4870
                                      op_cond, TCGOP_VECE(op))) {
4871
            /* constant is OK for instruction */
4872
            const_args[i] = 1;
4873
            new_args[i] = ts->val;
4874
            continue;
4875
        }
4876

4877
        reg = ts->reg;
4878
        i_preferred_regs = 0;
4879
        i_required_regs = arg_ct->regs;
4880
        allocate_new_reg = false;
4881
        copyto_new_reg = false;
4882

4883
        switch (arg_ct->pair) {
4884
        case 0: /* not paired */
4885
            if (arg_ct->ialias) {
4886
                i_preferred_regs = output_pref(op, arg_ct->alias_index);
4887

4888
                /*
4889
                 * If the input is readonly, then it cannot also be an
4890
                 * output and aliased to itself.  If the input is not
4891
                 * dead after the instruction, we must allocate a new
4892
                 * register and move it.
4893
                 */
4894
                if (temp_readonly(ts) || !IS_DEAD_ARG(i)
4895
                    || def->args_ct[arg_ct->alias_index].newreg) {
4896
                    allocate_new_reg = true;
4897
                } else if (ts->val_type == TEMP_VAL_REG) {
4898
                    /*
4899
                     * Check if the current register has already been
4900
                     * allocated for another input.
4901
                     */
4902
                    allocate_new_reg =
4903
                        tcg_regset_test_reg(i_allocated_regs, reg);
4904
                }
4905
            }
4906
            if (!allocate_new_reg) {
4907
                temp_load(s, ts, i_required_regs, i_allocated_regs,
4908
                          i_preferred_regs);
4909
                reg = ts->reg;
4910
                allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4911
            }
4912
            if (allocate_new_reg) {
4913
                /*
4914
                 * Allocate a new register matching the constraint
4915
                 * and move the temporary register into it.
4916
                 */
4917
                temp_load(s, ts, tcg_target_available_regs[ts->type],
4918
                          i_allocated_regs, 0);
4919
                reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4920
                                    i_preferred_regs, ts->indirect_base);
4921
                copyto_new_reg = true;
4922
            }
4923
            break;
4924

4925
        case 1:
4926
            /* First of an input pair; if i1 == i2, the second is an output. */
4927
            i1 = i;
4928
            i2 = arg_ct->pair_index;
4929
            ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4930

4931
            /*
4932
             * It is easier to default to allocating a new pair
4933
             * and to identify a few cases where it's not required.
4934
             */
4935
            if (arg_ct->ialias) {
4936
                i_preferred_regs = output_pref(op, arg_ct->alias_index);
4937
                if (IS_DEAD_ARG(i1) &&
4938
                    IS_DEAD_ARG(i2) &&
4939
                    !temp_readonly(ts) &&
4940
                    ts->val_type == TEMP_VAL_REG &&
4941
                    ts->reg < TCG_TARGET_NB_REGS - 1 &&
4942
                    tcg_regset_test_reg(i_required_regs, reg) &&
4943
                    !tcg_regset_test_reg(i_allocated_regs, reg) &&
4944
                    !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4945
                    (ts2
4946
                     ? ts2->val_type == TEMP_VAL_REG &&
4947
                       ts2->reg == reg + 1 &&
4948
                       !temp_readonly(ts2)
4949
                     : s->reg_to_temp[reg + 1] == NULL)) {
4950
                    break;
4951
                }
4952
            } else {
4953
                /* Without aliasing, the pair must also be an input. */
4954
                tcg_debug_assert(ts2);
4955
                if (ts->val_type == TEMP_VAL_REG &&
4956
                    ts2->val_type == TEMP_VAL_REG &&
4957
                    ts2->reg == reg + 1 &&
4958
                    tcg_regset_test_reg(i_required_regs, reg)) {
4959
                    break;
4960
                }
4961
            }
4962
            reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4963
                                     0, ts->indirect_base);
4964
            goto do_pair;
4965

4966
        case 2: /* pair second */
4967
            reg = new_args[arg_ct->pair_index] + 1;
4968
            goto do_pair;
4969

4970
        case 3: /* ialias with second output, no first input */
4971
            tcg_debug_assert(arg_ct->ialias);
4972
            i_preferred_regs = output_pref(op, arg_ct->alias_index);
4973

4974
            if (IS_DEAD_ARG(i) &&
4975
                !temp_readonly(ts) &&
4976
                ts->val_type == TEMP_VAL_REG &&
4977
                reg > 0 &&
4978
                s->reg_to_temp[reg - 1] == NULL &&
4979
                tcg_regset_test_reg(i_required_regs, reg) &&
4980
                !tcg_regset_test_reg(i_allocated_regs, reg) &&
4981
                !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4982
                tcg_regset_set_reg(i_allocated_regs, reg - 1);
4983
                break;
4984
            }
4985
            reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4986
                                     i_allocated_regs, 0,
4987
                                     ts->indirect_base);
4988
            tcg_regset_set_reg(i_allocated_regs, reg);
4989
            reg += 1;
4990
            goto do_pair;
4991

4992
        do_pair:
4993
            /*
4994
             * If an aliased input is not dead after the instruction,
4995
             * we must allocate a new register and move it.
4996
             */
4997
            if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4998
                TCGRegSet t_allocated_regs = i_allocated_regs;
4999

5000
                /*
5001
                 * Because of the alias, and the continued life, make sure
5002
                 * that the temp is somewhere *other* than the reg pair,
5003
                 * and we get a copy in reg.
5004
                 */
5005
                tcg_regset_set_reg(t_allocated_regs, reg);
5006
                tcg_regset_set_reg(t_allocated_regs, reg + 1);
5007
                if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5008
                    /* If ts was already in reg, copy it somewhere else. */
5009
                    TCGReg nr;
5010
                    bool ok;
5011

5012
                    tcg_debug_assert(ts->kind != TEMP_FIXED);
5013
                    nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5014
                                       t_allocated_regs, 0, ts->indirect_base);
5015
                    ok = tcg_out_mov(s, ts->type, nr, reg);
5016
                    tcg_debug_assert(ok);
5017

5018
                    set_temp_val_reg(s, ts, nr);
5019
                } else {
5020
                    temp_load(s, ts, tcg_target_available_regs[ts->type],
5021
                              t_allocated_regs, 0);
5022
                    copyto_new_reg = true;
5023
                }
5024
            } else {
5025
                /* Preferably allocate to reg, otherwise copy. */
5026
                i_required_regs = (TCGRegSet)1 << reg;
5027
                temp_load(s, ts, i_required_regs, i_allocated_regs,
5028
                          i_preferred_regs);
5029
                copyto_new_reg = ts->reg != reg;
5030
            }
5031
            break;
5032

5033
        default:
5034
            g_assert_not_reached();
5035
        }
5036

5037
        if (copyto_new_reg) {
5038
            if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5039
                /*
5040
                 * Cross register class move not supported.  Sync the
5041
                 * temp back to its slot and load from there.
5042
                 */
5043
                temp_sync(s, ts, i_allocated_regs, 0, 0);
5044
                tcg_out_ld(s, ts->type, reg,
5045
                           ts->mem_base->reg, ts->mem_offset);
5046
            }
5047
        }
5048
        new_args[i] = reg;
5049
        const_args[i] = 0;
5050
        tcg_regset_set_reg(i_allocated_regs, reg);
5051
    }
5052

5053
    /* mark dead temporaries and free the associated registers */
5054
    for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5055
        if (IS_DEAD_ARG(i)) {
5056
            temp_dead(s, arg_temp(op->args[i]));
5057
        }
5058
    }
5059

5060
    if (def->flags & TCG_OPF_COND_BRANCH) {
5061
        tcg_reg_alloc_cbranch(s, i_allocated_regs);
5062
    } else if (def->flags & TCG_OPF_BB_END) {
5063
        tcg_reg_alloc_bb_end(s, i_allocated_regs);
5064
    } else {
5065
        if (def->flags & TCG_OPF_CALL_CLOBBER) {
5066
            /* XXX: permit generic clobber register list ? */
5067
            for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5068
                if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5069
                    tcg_reg_free(s, i, i_allocated_regs);
5070
                }
5071
            }
5072
        }
5073
        if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5074
            /* sync globals if the op has side effects and might trigger
5075
               an exception. */
5076
            sync_globals(s, i_allocated_regs);
5077
        }
5078

5079
        /* satisfy the output constraints */
5080
        for(k = 0; k < nb_oargs; k++) {
5081
            i = def->args_ct[k].sort_index;
5082
            arg = op->args[i];
5083
            arg_ct = &def->args_ct[i];
5084
            ts = arg_temp(arg);
5085

5086
            /* ENV should not be modified.  */
5087
            tcg_debug_assert(!temp_readonly(ts));
5088

5089
            switch (arg_ct->pair) {
5090
            case 0: /* not paired */
5091
                if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5092
                    reg = new_args[arg_ct->alias_index];
5093
                } else if (arg_ct->newreg) {
5094
                    reg = tcg_reg_alloc(s, arg_ct->regs,
5095
                                        i_allocated_regs | o_allocated_regs,
5096
                                        output_pref(op, k), ts->indirect_base);
5097
                } else {
5098
                    reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5099
                                        output_pref(op, k), ts->indirect_base);
5100
                }
5101
                break;
5102

5103
            case 1: /* first of pair */
5104
                if (arg_ct->oalias) {
5105
                    reg = new_args[arg_ct->alias_index];
5106
                } else if (arg_ct->newreg) {
5107
                    reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5108
                                             i_allocated_regs | o_allocated_regs,
5109
                                             output_pref(op, k),
5110
                                             ts->indirect_base);
5111
                } else {
5112
                    reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5113
                                             output_pref(op, k),
5114
                                             ts->indirect_base);
5115
                }
5116
                break;
5117

5118
            case 2: /* second of pair */
5119
                if (arg_ct->oalias) {
5120
                    reg = new_args[arg_ct->alias_index];
5121
                } else {
5122
                    reg = new_args[arg_ct->pair_index] + 1;
5123
                }
5124
                break;
5125

5126
            case 3: /* first of pair, aliasing with a second input */
5127
                tcg_debug_assert(!arg_ct->newreg);
5128
                reg = new_args[arg_ct->pair_index] - 1;
5129
                break;
5130

5131
            default:
5132
                g_assert_not_reached();
5133
            }
5134
            tcg_regset_set_reg(o_allocated_regs, reg);
5135
            set_temp_val_reg(s, ts, reg);
5136
            ts->mem_coherent = 0;
5137
            new_args[i] = reg;
5138
        }
5139
    }
5140

5141
    /* emit instruction */
5142
    switch (op->opc) {
5143
    case INDEX_op_ext8s_i32:
5144
        tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5145
        break;
5146
    case INDEX_op_ext8s_i64:
5147
        tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5148
        break;
5149
    case INDEX_op_ext8u_i32:
5150
    case INDEX_op_ext8u_i64:
5151
        tcg_out_ext8u(s, new_args[0], new_args[1]);
5152
        break;
5153
    case INDEX_op_ext16s_i32:
5154
        tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5155
        break;
5156
    case INDEX_op_ext16s_i64:
5157
        tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5158
        break;
5159
    case INDEX_op_ext16u_i32:
5160
    case INDEX_op_ext16u_i64:
5161
        tcg_out_ext16u(s, new_args[0], new_args[1]);
5162
        break;
5163
    case INDEX_op_ext32s_i64:
5164
        tcg_out_ext32s(s, new_args[0], new_args[1]);
5165
        break;
5166
    case INDEX_op_ext32u_i64:
5167
        tcg_out_ext32u(s, new_args[0], new_args[1]);
5168
        break;
5169
    case INDEX_op_ext_i32_i64:
5170
        tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5171
        break;
5172
    case INDEX_op_extu_i32_i64:
5173
        tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5174
        break;
5175
    case INDEX_op_extrl_i64_i32:
5176
        tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5177
        break;
5178
    default:
5179
        if (def->flags & TCG_OPF_VECTOR) {
5180
            tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
5181
                           new_args, const_args);
5182
        } else {
5183
            tcg_out_op(s, op->opc, new_args, const_args);
5184
        }
5185
        break;
5186
    }
5187

5188
    /* move the outputs in the correct register if needed */
5189
    for(i = 0; i < nb_oargs; i++) {
5190
        ts = arg_temp(op->args[i]);
5191

5192
        /* ENV should not be modified.  */
5193
        tcg_debug_assert(!temp_readonly(ts));
5194

5195
        if (NEED_SYNC_ARG(i)) {
5196
            temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5197
        } else if (IS_DEAD_ARG(i)) {
5198
            temp_dead(s, ts);
5199
        }
5200
    }
5201
}
5202

5203
static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5204
{
5205
    const TCGLifeData arg_life = op->life;
5206
    TCGTemp *ots, *itsl, *itsh;
5207
    TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
5208

5209
    /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5210
    tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5211
    tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5212

5213
    ots = arg_temp(op->args[0]);
5214
    itsl = arg_temp(op->args[1]);
5215
    itsh = arg_temp(op->args[2]);
5216

5217
    /* ENV should not be modified.  */
5218
    tcg_debug_assert(!temp_readonly(ots));
5219

5220
    /* Allocate the output register now.  */
5221
    if (ots->val_type != TEMP_VAL_REG) {
5222
        TCGRegSet allocated_regs = s->reserved_regs;
5223
        TCGRegSet dup_out_regs =
5224
            tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
5225
        TCGReg oreg;
5226

5227
        /* Make sure to not spill the input registers. */
5228
        if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5229
            tcg_regset_set_reg(allocated_regs, itsl->reg);
5230
        }
5231
        if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5232
            tcg_regset_set_reg(allocated_regs, itsh->reg);
5233
        }
5234

5235
        oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5236
                             output_pref(op, 0), ots->indirect_base);
5237
        set_temp_val_reg(s, ots, oreg);
5238
    }
5239

5240
    /* Promote dup2 of immediates to dupi_vec. */
5241
    if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5242
        uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5243
        MemOp vece = MO_64;
5244

5245
        if (val == dup_const(MO_8, val)) {
5246
            vece = MO_8;
5247
        } else if (val == dup_const(MO_16, val)) {
5248
            vece = MO_16;
5249
        } else if (val == dup_const(MO_32, val)) {
5250
            vece = MO_32;
5251
        }
5252

5253
        tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5254
        goto done;
5255
    }
5256

5257
    /* If the two inputs form one 64-bit value, try dupm_vec. */
5258
    if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5259
        itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5260
        itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5261
        TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5262

5263
        temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5264
        temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5265

5266
        if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5267
                             its->mem_base->reg, its->mem_offset)) {
5268
            goto done;
5269
        }
5270
    }
5271

5272
    /* Fall back to generic expansion. */
5273
    return false;
5274

5275
 done:
5276
    ots->mem_coherent = 0;
5277
    if (IS_DEAD_ARG(1)) {
5278
        temp_dead(s, itsl);
5279
    }
5280
    if (IS_DEAD_ARG(2)) {
5281
        temp_dead(s, itsh);
5282
    }
5283
    if (NEED_SYNC_ARG(0)) {
5284
        temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5285
    } else if (IS_DEAD_ARG(0)) {
5286
        temp_dead(s, ots);
5287
    }
5288
    return true;
5289
}
5290

5291
static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5292
                         TCGRegSet allocated_regs)
5293
{
5294
    if (ts->val_type == TEMP_VAL_REG) {
5295
        if (ts->reg != reg) {
5296
            tcg_reg_free(s, reg, allocated_regs);
5297
            if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5298
                /*
5299
                 * Cross register class move not supported.  Sync the
5300
                 * temp back to its slot and load from there.
5301
                 */
5302
                temp_sync(s, ts, allocated_regs, 0, 0);
5303
                tcg_out_ld(s, ts->type, reg,
5304
                           ts->mem_base->reg, ts->mem_offset);
5305
            }
5306
        }
5307
    } else {
5308
        TCGRegSet arg_set = 0;
5309

5310
        tcg_reg_free(s, reg, allocated_regs);
5311
        tcg_regset_set_reg(arg_set, reg);
5312
        temp_load(s, ts, arg_set, allocated_regs, 0);
5313
    }
5314
}
5315

5316
static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5317
                         TCGRegSet allocated_regs)
5318
{
5319
    /*
5320
     * When the destination is on the stack, load up the temp and store.
5321
     * If there are many call-saved registers, the temp might live to
5322
     * see another use; otherwise it'll be discarded.
5323
     */
5324
    temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5325
    tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5326
               arg_slot_stk_ofs(arg_slot));
5327
}
5328

5329
static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5330
                            TCGTemp *ts, TCGRegSet *allocated_regs)
5331
{
5332
    if (arg_slot_reg_p(l->arg_slot)) {
5333
        TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5334
        load_arg_reg(s, reg, ts, *allocated_regs);
5335
        tcg_regset_set_reg(*allocated_regs, reg);
5336
    } else {
5337
        load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5338
    }
5339
}
5340

5341
static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5342
                         intptr_t ref_off, TCGRegSet *allocated_regs)
5343
{
5344
    TCGReg reg;
5345

5346
    if (arg_slot_reg_p(arg_slot)) {
5347
        reg = tcg_target_call_iarg_regs[arg_slot];
5348
        tcg_reg_free(s, reg, *allocated_regs);
5349
        tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5350
        tcg_regset_set_reg(*allocated_regs, reg);
5351
    } else {
5352
        reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5353
                            *allocated_regs, 0, false);
5354
        tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5355
        tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5356
                   arg_slot_stk_ofs(arg_slot));
5357
    }
5358
}
5359

5360
static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5361
{
5362
    const int nb_oargs = TCGOP_CALLO(op);
5363
    const int nb_iargs = TCGOP_CALLI(op);
5364
    const TCGLifeData arg_life = op->life;
5365
    const TCGHelperInfo *info = tcg_call_info(op);
5366
    TCGRegSet allocated_regs = s->reserved_regs;
5367
    int i;
5368

5369
    /*
5370
     * Move inputs into place in reverse order,
5371
     * so that we place stacked arguments first.
5372
     */
5373
    for (i = nb_iargs - 1; i >= 0; --i) {
5374
        const TCGCallArgumentLoc *loc = &info->in[i];
5375
        TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5376

5377
        switch (loc->kind) {
5378
        case TCG_CALL_ARG_NORMAL:
5379
        case TCG_CALL_ARG_EXTEND_U:
5380
        case TCG_CALL_ARG_EXTEND_S:
5381
            load_arg_normal(s, loc, ts, &allocated_regs);
5382
            break;
5383
        case TCG_CALL_ARG_BY_REF:
5384
            load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5385
            load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5386
                         arg_slot_stk_ofs(loc->ref_slot),
5387
                         &allocated_regs);
5388
            break;
5389
        case TCG_CALL_ARG_BY_REF_N:
5390
            load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5391
            break;
5392
        default:
5393
            g_assert_not_reached();
5394
        }
5395
    }
5396

5397
    /* Mark dead temporaries and free the associated registers.  */
5398
    for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5399
        if (IS_DEAD_ARG(i)) {
5400
            temp_dead(s, arg_temp(op->args[i]));
5401
        }
5402
    }
5403

5404
    /* Clobber call registers.  */
5405
    for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5406
        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5407
            tcg_reg_free(s, i, allocated_regs);
5408
        }
5409
    }
5410

5411
    /*
5412
     * Save globals if they might be written by the helper,
5413
     * sync them if they might be read.
5414
     */
5415
    if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5416
        /* Nothing to do */
5417
    } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5418
        sync_globals(s, allocated_regs);
5419
    } else {
5420
        save_globals(s, allocated_regs);
5421
    }
5422

5423
    /*
5424
     * If the ABI passes a pointer to the returned struct as the first
5425
     * argument, load that now.  Pass a pointer to the output home slot.
5426
     */
5427
    if (info->out_kind == TCG_CALL_RET_BY_REF) {
5428
        TCGTemp *ts = arg_temp(op->args[0]);
5429

5430
        if (!ts->mem_allocated) {
5431
            temp_allocate_frame(s, ts);
5432
        }
5433
        load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5434
    }
5435

5436
    tcg_out_call(s, tcg_call_func(op), info);
5437

5438
    /* Assign output registers and emit moves if needed.  */
5439
    switch (info->out_kind) {
5440
    case TCG_CALL_RET_NORMAL:
5441
        for (i = 0; i < nb_oargs; i++) {
5442
            TCGTemp *ts = arg_temp(op->args[i]);
5443
            TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5444

5445
            /* ENV should not be modified.  */
5446
            tcg_debug_assert(!temp_readonly(ts));
5447

5448
            set_temp_val_reg(s, ts, reg);
5449
            ts->mem_coherent = 0;
5450
        }
5451
        break;
5452

5453
    case TCG_CALL_RET_BY_VEC:
5454
        {
5455
            TCGTemp *ts = arg_temp(op->args[0]);
5456

5457
            tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5458
            tcg_debug_assert(ts->temp_subindex == 0);
5459
            if (!ts->mem_allocated) {
5460
                temp_allocate_frame(s, ts);
5461
            }
5462
            tcg_out_st(s, TCG_TYPE_V128,
5463
                       tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5464
                       ts->mem_base->reg, ts->mem_offset);
5465
        }
5466
        /* fall through to mark all parts in memory */
5467

5468
    case TCG_CALL_RET_BY_REF:
5469
        /* The callee has performed a write through the reference. */
5470
        for (i = 0; i < nb_oargs; i++) {
5471
            TCGTemp *ts = arg_temp(op->args[i]);
5472
            ts->val_type = TEMP_VAL_MEM;
5473
        }
5474
        break;
5475

5476
    default:
5477
        g_assert_not_reached();
5478
    }
5479

5480
    /* Flush or discard output registers as needed. */
5481
    for (i = 0; i < nb_oargs; i++) {
5482
        TCGTemp *ts = arg_temp(op->args[i]);
5483
        if (NEED_SYNC_ARG(i)) {
5484
            temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5485
        } else if (IS_DEAD_ARG(i)) {
5486
            temp_dead(s, ts);
5487
        }
5488
    }
5489
}
5490

5491
/**
5492
 * atom_and_align_for_opc:
5493
 * @s: tcg context
5494
 * @opc: memory operation code
5495
 * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5496
 * @allow_two_ops: true if we are prepared to issue two operations
5497
 *
5498
 * Return the alignment and atomicity to use for the inline fast path
5499
 * for the given memory operation.  The alignment may be larger than
5500
 * that specified in @opc, and the correct alignment will be diagnosed
5501
 * by the slow path helper.
5502
 *
5503
 * If @allow_two_ops, the host is prepared to test for 2x alignment,
5504
 * and issue two loads or stores for subalignment.
5505
 */
5506
static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5507
                                           MemOp host_atom, bool allow_two_ops)
5508
{
5509
    MemOp align = get_alignment_bits(opc);
5510
    MemOp size = opc & MO_SIZE;
5511
    MemOp half = size ? size - 1 : 0;
5512
    MemOp atom = opc & MO_ATOM_MASK;
5513
    MemOp atmax;
5514

5515
    switch (atom) {
5516
    case MO_ATOM_NONE:
5517
        /* The operation requires no specific atomicity. */
5518
        atmax = MO_8;
5519
        break;
5520

5521
    case MO_ATOM_IFALIGN:
5522
        atmax = size;
5523
        break;
5524

5525
    case MO_ATOM_IFALIGN_PAIR:
5526
        atmax = half;
5527
        break;
5528

5529
    case MO_ATOM_WITHIN16:
5530
        atmax = size;
5531
        if (size == MO_128) {
5532
            /* Misalignment implies !within16, and therefore no atomicity. */
5533
        } else if (host_atom != MO_ATOM_WITHIN16) {
5534
            /* The host does not implement within16, so require alignment. */
5535
            align = MAX(align, size);
5536
        }
5537
        break;
5538

5539
    case MO_ATOM_WITHIN16_PAIR:
5540
        atmax = size;
5541
        /*
5542
         * Misalignment implies !within16, and therefore half atomicity.
5543
         * Any host prepared for two operations can implement this with
5544
         * half alignment.
5545
         */
5546
        if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5547
            align = MAX(align, half);
5548
        }
5549
        break;
5550

5551
    case MO_ATOM_SUBALIGN:
5552
        atmax = size;
5553
        if (host_atom != MO_ATOM_SUBALIGN) {
5554
            /* If unaligned but not odd, there are subobjects up to half. */
5555
            if (allow_two_ops) {
5556
                align = MAX(align, half);
5557
            } else {
5558
                align = MAX(align, size);
5559
            }
5560
        }
5561
        break;
5562

5563
    default:
5564
        g_assert_not_reached();
5565
    }
5566

5567
    return (TCGAtomAlign){ .atom = atmax, .align = align };
5568
}
5569

5570
/*
5571
 * Similarly for qemu_ld/st slow path helpers.
5572
 * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5573
 * using only the provided backend tcg_out_* functions.
5574
 */
5575

5576
static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5577
{
5578
    int ofs = arg_slot_stk_ofs(slot);
5579

5580
    /*
5581
     * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5582
     * require extension to uint64_t, adjust the address for uint32_t.
5583
     */
5584
    if (HOST_BIG_ENDIAN &&
5585
        TCG_TARGET_REG_BITS == 64 &&
5586
        type == TCG_TYPE_I32) {
5587
        ofs += 4;
5588
    }
5589
    return ofs;
5590
}
5591

5592
static void tcg_out_helper_load_slots(TCGContext *s,
5593
                                      unsigned nmov, TCGMovExtend *mov,
5594
                                      const TCGLdstHelperParam *parm)
5595
{
5596
    unsigned i;
5597
    TCGReg dst3;
5598

5599
    /*
5600
     * Start from the end, storing to the stack first.
5601
     * This frees those registers, so we need not consider overlap.
5602
     */
5603
    for (i = nmov; i-- > 0; ) {
5604
        unsigned slot = mov[i].dst;
5605

5606
        if (arg_slot_reg_p(slot)) {
5607
            goto found_reg;
5608
        }
5609

5610
        TCGReg src = mov[i].src;
5611
        TCGType dst_type = mov[i].dst_type;
5612
        MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5613

5614
        /* The argument is going onto the stack; extend into scratch. */
5615
        if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5616
            tcg_debug_assert(parm->ntmp != 0);
5617
            mov[i].dst = src = parm->tmp[0];
5618
            tcg_out_movext1(s, &mov[i]);
5619
        }
5620

5621
        tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5622
                   tcg_out_helper_stk_ofs(dst_type, slot));
5623
    }
5624
    return;
5625

5626
 found_reg:
5627
    /*
5628
     * The remaining arguments are in registers.
5629
     * Convert slot numbers to argument registers.
5630
     */
5631
    nmov = i + 1;
5632
    for (i = 0; i < nmov; ++i) {
5633
        mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5634
    }
5635

5636
    switch (nmov) {
5637
    case 4:
5638
        /* The backend must have provided enough temps for the worst case. */
5639
        tcg_debug_assert(parm->ntmp >= 2);
5640

5641
        dst3 = mov[3].dst;
5642
        for (unsigned j = 0; j < 3; ++j) {
5643
            if (dst3 == mov[j].src) {
5644
                /*
5645
                 * Conflict. Copy the source to a temporary, perform the
5646
                 * remaining moves, then the extension from our scratch
5647
                 * on the way out.
5648
                 */
5649
                TCGReg scratch = parm->tmp[1];
5650

5651
                tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5652
                tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5653
                tcg_out_movext1_new_src(s, &mov[3], scratch);
5654
                break;
5655
            }
5656
        }
5657

5658
        /* No conflicts: perform this move and continue. */
5659
        tcg_out_movext1(s, &mov[3]);
5660
        /* fall through */
5661

5662
    case 3:
5663
        tcg_out_movext3(s, mov, mov + 1, mov + 2,
5664
                        parm->ntmp ? parm->tmp[0] : -1);
5665
        break;
5666
    case 2:
5667
        tcg_out_movext2(s, mov, mov + 1,
5668
                        parm->ntmp ? parm->tmp[0] : -1);
5669
        break;
5670
    case 1:
5671
        tcg_out_movext1(s, mov);
5672
        break;
5673
    default:
5674
        g_assert_not_reached();
5675
    }
5676
}
5677

5678
static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5679
                                    TCGType type, tcg_target_long imm,
5680
                                    const TCGLdstHelperParam *parm)
5681
{
5682
    if (arg_slot_reg_p(slot)) {
5683
        tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5684
    } else {
5685
        int ofs = tcg_out_helper_stk_ofs(type, slot);
5686
        if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5687
            tcg_debug_assert(parm->ntmp != 0);
5688
            tcg_out_movi(s, type, parm->tmp[0], imm);
5689
            tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5690
        }
5691
    }
5692
}
5693

5694
static void tcg_out_helper_load_common_args(TCGContext *s,
5695
                                            const TCGLabelQemuLdst *ldst,
5696
                                            const TCGLdstHelperParam *parm,
5697
                                            const TCGHelperInfo *info,
5698
                                            unsigned next_arg)
5699
{
5700
    TCGMovExtend ptr_mov = {
5701
        .dst_type = TCG_TYPE_PTR,
5702
        .src_type = TCG_TYPE_PTR,
5703
        .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5704
    };
5705
    const TCGCallArgumentLoc *loc = &info->in[0];
5706
    TCGType type;
5707
    unsigned slot;
5708
    tcg_target_ulong imm;
5709

5710
    /*
5711
     * Handle env, which is always first.
5712
     */
5713
    ptr_mov.dst = loc->arg_slot;
5714
    ptr_mov.src = TCG_AREG0;
5715
    tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5716

5717
    /*
5718
     * Handle oi.
5719
     */
5720
    imm = ldst->oi;
5721
    loc = &info->in[next_arg];
5722
    type = TCG_TYPE_I32;
5723
    switch (loc->kind) {
5724
    case TCG_CALL_ARG_NORMAL:
5725
        break;
5726
    case TCG_CALL_ARG_EXTEND_U:
5727
    case TCG_CALL_ARG_EXTEND_S:
5728
        /* No extension required for MemOpIdx. */
5729
        tcg_debug_assert(imm <= INT32_MAX);
5730
        type = TCG_TYPE_REG;
5731
        break;
5732
    default:
5733
        g_assert_not_reached();
5734
    }
5735
    tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5736
    next_arg++;
5737

5738
    /*
5739
     * Handle ra.
5740
     */
5741
    loc = &info->in[next_arg];
5742
    slot = loc->arg_slot;
5743
    if (parm->ra_gen) {
5744
        int arg_reg = -1;
5745
        TCGReg ra_reg;
5746

5747
        if (arg_slot_reg_p(slot)) {
5748
            arg_reg = tcg_target_call_iarg_regs[slot];
5749
        }
5750
        ra_reg = parm->ra_gen(s, ldst, arg_reg);
5751

5752
        ptr_mov.dst = slot;
5753
        ptr_mov.src = ra_reg;
5754
        tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5755
    } else {
5756
        imm = (uintptr_t)ldst->raddr;
5757
        tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5758
    }
5759
}
5760

5761
static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5762
                                       const TCGCallArgumentLoc *loc,
5763
                                       TCGType dst_type, TCGType src_type,
5764
                                       TCGReg lo, TCGReg hi)
5765
{
5766
    MemOp reg_mo;
5767

5768
    if (dst_type <= TCG_TYPE_REG) {
5769
        MemOp src_ext;
5770

5771
        switch (loc->kind) {
5772
        case TCG_CALL_ARG_NORMAL:
5773
            src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5774
            break;
5775
        case TCG_CALL_ARG_EXTEND_U:
5776
            dst_type = TCG_TYPE_REG;
5777
            src_ext = MO_UL;
5778
            break;
5779
        case TCG_CALL_ARG_EXTEND_S:
5780
            dst_type = TCG_TYPE_REG;
5781
            src_ext = MO_SL;
5782
            break;
5783
        default:
5784
            g_assert_not_reached();
5785
        }
5786

5787
        mov[0].dst = loc->arg_slot;
5788
        mov[0].dst_type = dst_type;
5789
        mov[0].src = lo;
5790
        mov[0].src_type = src_type;
5791
        mov[0].src_ext = src_ext;
5792
        return 1;
5793
    }
5794

5795
    if (TCG_TARGET_REG_BITS == 32) {
5796
        assert(dst_type == TCG_TYPE_I64);
5797
        reg_mo = MO_32;
5798
    } else {
5799
        assert(dst_type == TCG_TYPE_I128);
5800
        reg_mo = MO_64;
5801
    }
5802

5803
    mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5804
    mov[0].src = lo;
5805
    mov[0].dst_type = TCG_TYPE_REG;
5806
    mov[0].src_type = TCG_TYPE_REG;
5807
    mov[0].src_ext = reg_mo;
5808

5809
    mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5810
    mov[1].src = hi;
5811
    mov[1].dst_type = TCG_TYPE_REG;
5812
    mov[1].src_type = TCG_TYPE_REG;
5813
    mov[1].src_ext = reg_mo;
5814

5815
    return 2;
5816
}
5817

5818
static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5819
                                   const TCGLdstHelperParam *parm)
5820
{
5821
    const TCGHelperInfo *info;
5822
    const TCGCallArgumentLoc *loc;
5823
    TCGMovExtend mov[2];
5824
    unsigned next_arg, nmov;
5825
    MemOp mop = get_memop(ldst->oi);
5826

5827
    switch (mop & MO_SIZE) {
5828
    case MO_8:
5829
    case MO_16:
5830
    case MO_32:
5831
        info = &info_helper_ld32_mmu;
5832
        break;
5833
    case MO_64:
5834
        info = &info_helper_ld64_mmu;
5835
        break;
5836
    case MO_128:
5837
        info = &info_helper_ld128_mmu;
5838
        break;
5839
    default:
5840
        g_assert_not_reached();
5841
    }
5842

5843
    /* Defer env argument. */
5844
    next_arg = 1;
5845

5846
    loc = &info->in[next_arg];
5847
    if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5848
        /*
5849
         * 32-bit host with 32-bit guest: zero-extend the guest address
5850
         * to 64-bits for the helper by storing the low part, then
5851
         * load a zero for the high part.
5852
         */
5853
        tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5854
                               TCG_TYPE_I32, TCG_TYPE_I32,
5855
                               ldst->addrlo_reg, -1);
5856
        tcg_out_helper_load_slots(s, 1, mov, parm);
5857

5858
        tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
5859
                                TCG_TYPE_I32, 0, parm);
5860
        next_arg += 2;
5861
    } else {
5862
        nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5863
                                      ldst->addrlo_reg, ldst->addrhi_reg);
5864
        tcg_out_helper_load_slots(s, nmov, mov, parm);
5865
        next_arg += nmov;
5866
    }
5867

5868
    switch (info->out_kind) {
5869
    case TCG_CALL_RET_NORMAL:
5870
    case TCG_CALL_RET_BY_VEC:
5871
        break;
5872
    case TCG_CALL_RET_BY_REF:
5873
        /*
5874
         * The return reference is in the first argument slot.
5875
         * We need memory in which to return: re-use the top of stack.
5876
         */
5877
        {
5878
            int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5879

5880
            if (arg_slot_reg_p(0)) {
5881
                tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
5882
                                 TCG_REG_CALL_STACK, ofs_slot0);
5883
            } else {
5884
                tcg_debug_assert(parm->ntmp != 0);
5885
                tcg_out_addi_ptr(s, parm->tmp[0],
5886
                                 TCG_REG_CALL_STACK, ofs_slot0);
5887
                tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5888
                           TCG_REG_CALL_STACK, ofs_slot0);
5889
            }
5890
        }
5891
        break;
5892
    default:
5893
        g_assert_not_reached();
5894
    }
5895

5896
    tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5897
}
5898

5899
static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5900
                                  bool load_sign,
5901
                                  const TCGLdstHelperParam *parm)
5902
{
5903
    MemOp mop = get_memop(ldst->oi);
5904
    TCGMovExtend mov[2];
5905
    int ofs_slot0;
5906

5907
    switch (ldst->type) {
5908
    case TCG_TYPE_I64:
5909
        if (TCG_TARGET_REG_BITS == 32) {
5910
            break;
5911
        }
5912
        /* fall through */
5913

5914
    case TCG_TYPE_I32:
5915
        mov[0].dst = ldst->datalo_reg;
5916
        mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5917
        mov[0].dst_type = ldst->type;
5918
        mov[0].src_type = TCG_TYPE_REG;
5919

5920
        /*
5921
         * If load_sign, then we allowed the helper to perform the
5922
         * appropriate sign extension to tcg_target_ulong, and all
5923
         * we need now is a plain move.
5924
         *
5925
         * If they do not, then we expect the relevant extension
5926
         * instruction to be no more expensive than a move, and
5927
         * we thus save the icache etc by only using one of two
5928
         * helper functions.
5929
         */
5930
        if (load_sign || !(mop & MO_SIGN)) {
5931
            if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5932
                mov[0].src_ext = MO_32;
5933
            } else {
5934
                mov[0].src_ext = MO_64;
5935
            }
5936
        } else {
5937
            mov[0].src_ext = mop & MO_SSIZE;
5938
        }
5939
        tcg_out_movext1(s, mov);
5940
        return;
5941

5942
    case TCG_TYPE_I128:
5943
        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5944
        ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5945
        switch (TCG_TARGET_CALL_RET_I128) {
5946
        case TCG_CALL_RET_NORMAL:
5947
            break;
5948
        case TCG_CALL_RET_BY_VEC:
5949
            tcg_out_st(s, TCG_TYPE_V128,
5950
                       tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5951
                       TCG_REG_CALL_STACK, ofs_slot0);
5952
            /* fall through */
5953
        case TCG_CALL_RET_BY_REF:
5954
            tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
5955
                       TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
5956
            tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
5957
                       TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
5958
            return;
5959
        default:
5960
            g_assert_not_reached();
5961
        }
5962
        break;
5963

5964
    default:
5965
        g_assert_not_reached();
5966
    }
5967

5968
    mov[0].dst = ldst->datalo_reg;
5969
    mov[0].src =
5970
        tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5971
    mov[0].dst_type = TCG_TYPE_REG;
5972
    mov[0].src_type = TCG_TYPE_REG;
5973
    mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5974

5975
    mov[1].dst = ldst->datahi_reg;
5976
    mov[1].src =
5977
        tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5978
    mov[1].dst_type = TCG_TYPE_REG;
5979
    mov[1].src_type = TCG_TYPE_REG;
5980
    mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5981

5982
    tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5983
}
5984

5985
static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5986
                                   const TCGLdstHelperParam *parm)
5987
{
5988
    const TCGHelperInfo *info;
5989
    const TCGCallArgumentLoc *loc;
5990
    TCGMovExtend mov[4];
5991
    TCGType data_type;
5992
    unsigned next_arg, nmov, n;
5993
    MemOp mop = get_memop(ldst->oi);
5994

5995
    switch (mop & MO_SIZE) {
5996
    case MO_8:
5997
    case MO_16:
5998
    case MO_32:
5999
        info = &info_helper_st32_mmu;
6000
        data_type = TCG_TYPE_I32;
6001
        break;
6002
    case MO_64:
6003
        info = &info_helper_st64_mmu;
6004
        data_type = TCG_TYPE_I64;
6005
        break;
6006
    case MO_128:
6007
        info = &info_helper_st128_mmu;
6008
        data_type = TCG_TYPE_I128;
6009
        break;
6010
    default:
6011
        g_assert_not_reached();
6012
    }
6013

6014
    /* Defer env argument. */
6015
    next_arg = 1;
6016
    nmov = 0;
6017

6018
    /* Handle addr argument. */
6019
    loc = &info->in[next_arg];
6020
    if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6021
        /*
6022
         * 32-bit host with 32-bit guest: zero-extend the guest address
6023
         * to 64-bits for the helper by storing the low part.  Later,
6024
         * after we have processed the register inputs, we will load a
6025
         * zero for the high part.
6026
         */
6027
        tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6028
                               TCG_TYPE_I32, TCG_TYPE_I32,
6029
                               ldst->addrlo_reg, -1);
6030
        next_arg += 2;
6031
        nmov += 1;
6032
    } else {
6033
        n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6034
                                   ldst->addrlo_reg, ldst->addrhi_reg);
6035
        next_arg += n;
6036
        nmov += n;
6037
    }
6038

6039
    /* Handle data argument. */
6040
    loc = &info->in[next_arg];
6041
    switch (loc->kind) {
6042
    case TCG_CALL_ARG_NORMAL:
6043
    case TCG_CALL_ARG_EXTEND_U:
6044
    case TCG_CALL_ARG_EXTEND_S:
6045
        n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6046
                                   ldst->datalo_reg, ldst->datahi_reg);
6047
        next_arg += n;
6048
        nmov += n;
6049
        tcg_out_helper_load_slots(s, nmov, mov, parm);
6050
        break;
6051

6052
    case TCG_CALL_ARG_BY_REF:
6053
        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6054
        tcg_debug_assert(data_type == TCG_TYPE_I128);
6055
        tcg_out_st(s, TCG_TYPE_I64,
6056
                   HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6057
                   TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6058
        tcg_out_st(s, TCG_TYPE_I64,
6059
                   HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6060
                   TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6061

6062
        tcg_out_helper_load_slots(s, nmov, mov, parm);
6063

6064
        if (arg_slot_reg_p(loc->arg_slot)) {
6065
            tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6066
                             TCG_REG_CALL_STACK,
6067
                             arg_slot_stk_ofs(loc->ref_slot));
6068
        } else {
6069
            tcg_debug_assert(parm->ntmp != 0);
6070
            tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6071
                             arg_slot_stk_ofs(loc->ref_slot));
6072
            tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6073
                       TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6074
        }
6075
        next_arg += 2;
6076
        break;
6077

6078
    default:
6079
        g_assert_not_reached();
6080
    }
6081

6082
    if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6083
        /* Zero extend the address by loading a zero for the high part. */
6084
        loc = &info->in[1 + !HOST_BIG_ENDIAN];
6085
        tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6086
    }
6087

6088
    tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6089
}
6090

6091
int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6092
{
6093
    int i, start_words, num_insns;
6094
    TCGOp *op;
6095

6096
    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6097
                 && qemu_log_in_addr_range(pc_start))) {
6098
        FILE *logfile = qemu_log_trylock();
6099
        if (logfile) {
6100
            fprintf(logfile, "OP:\n");
6101
            tcg_dump_ops(s, logfile, false);
6102
            fprintf(logfile, "\n");
6103
            qemu_log_unlock(logfile);
6104
        }
6105
    }
6106

6107
#ifdef CONFIG_DEBUG_TCG
6108
    /* Ensure all labels referenced have been emitted.  */
6109
    {
6110
        TCGLabel *l;
6111
        bool error = false;
6112

6113
        QSIMPLEQ_FOREACH(l, &s->labels, next) {
6114
            if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6115
                qemu_log_mask(CPU_LOG_TB_OP,
6116
                              "$L%d referenced but not present.\n", l->id);
6117
                error = true;
6118
            }
6119
        }
6120
        assert(!error);
6121
    }
6122
#endif
6123

6124
    tcg_optimize(s);
6125

6126
    reachable_code_pass(s);
6127
    liveness_pass_0(s);
6128
    liveness_pass_1(s);
6129

6130
    if (s->nb_indirects > 0) {
6131
        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6132
                     && qemu_log_in_addr_range(pc_start))) {
6133
            FILE *logfile = qemu_log_trylock();
6134
            if (logfile) {
6135
                fprintf(logfile, "OP before indirect lowering:\n");
6136
                tcg_dump_ops(s, logfile, false);
6137
                fprintf(logfile, "\n");
6138
                qemu_log_unlock(logfile);
6139
            }
6140
        }
6141

6142
        /* Replace indirect temps with direct temps.  */
6143
        if (liveness_pass_2(s)) {
6144
            /* If changes were made, re-run liveness.  */
6145
            liveness_pass_1(s);
6146
        }
6147
    }
6148

6149
    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6150
                 && qemu_log_in_addr_range(pc_start))) {
6151
        FILE *logfile = qemu_log_trylock();
6152
        if (logfile) {
6153
            fprintf(logfile, "OP after optimization and liveness analysis:\n");
6154
            tcg_dump_ops(s, logfile, true);
6155
            fprintf(logfile, "\n");
6156
            qemu_log_unlock(logfile);
6157
        }
6158
    }
6159

6160
    /* Initialize goto_tb jump offsets. */
6161
    tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6162
    tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6163
    tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6164
    tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6165

6166
    tcg_reg_alloc_start(s);
6167

6168
    /*
6169
     * Reset the buffer pointers when restarting after overflow.
6170
     * TODO: Move this into translate-all.c with the rest of the
6171
     * buffer management.  Having only this done here is confusing.
6172
     */
6173
    s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6174
    s->code_ptr = s->code_buf;
6175

6176
#ifdef TCG_TARGET_NEED_LDST_LABELS
6177
    QSIMPLEQ_INIT(&s->ldst_labels);
6178
#endif
6179
#ifdef TCG_TARGET_NEED_POOL_LABELS
6180
    s->pool_labels = NULL;
6181
#endif
6182

6183
    start_words = s->insn_start_words;
6184
    s->gen_insn_data =
6185
        tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6186

6187
    tcg_out_tb_start(s);
6188

6189
    num_insns = -1;
6190
    QTAILQ_FOREACH(op, &s->ops, link) {
6191
        TCGOpcode opc = op->opc;
6192

6193
        switch (opc) {
6194
        case INDEX_op_mov_i32:
6195
        case INDEX_op_mov_i64:
6196
        case INDEX_op_mov_vec:
6197
            tcg_reg_alloc_mov(s, op);
6198
            break;
6199
        case INDEX_op_dup_vec:
6200
            tcg_reg_alloc_dup(s, op);
6201
            break;
6202
        case INDEX_op_insn_start:
6203
            if (num_insns >= 0) {
6204
                size_t off = tcg_current_code_size(s);
6205
                s->gen_insn_end_off[num_insns] = off;
6206
                /* Assert that we do not overflow our stored offset.  */
6207
                assert(s->gen_insn_end_off[num_insns] == off);
6208
            }
6209
            num_insns++;
6210
            for (i = 0; i < start_words; ++i) {
6211
                s->gen_insn_data[num_insns * start_words + i] =
6212
                    tcg_get_insn_start_param(op, i);
6213
            }
6214
            break;
6215
        case INDEX_op_discard:
6216
            temp_dead(s, arg_temp(op->args[0]));
6217
            break;
6218
        case INDEX_op_set_label:
6219
            tcg_reg_alloc_bb_end(s, s->reserved_regs);
6220
            tcg_out_label(s, arg_label(op->args[0]));
6221
            break;
6222
        case INDEX_op_call:
6223
            tcg_reg_alloc_call(s, op);
6224
            break;
6225
        case INDEX_op_exit_tb:
6226
            tcg_out_exit_tb(s, op->args[0]);
6227
            break;
6228
        case INDEX_op_goto_tb:
6229
            tcg_out_goto_tb(s, op->args[0]);
6230
            break;
6231
        case INDEX_op_dup2_vec:
6232
            if (tcg_reg_alloc_dup2(s, op)) {
6233
                break;
6234
            }
6235
            /* fall through */
6236
        default:
6237
            /* Sanity check that we've not introduced any unhandled opcodes. */
6238
            tcg_debug_assert(tcg_op_supported(opc));
6239
            /* Note: in order to speed up the code, it would be much
6240
               faster to have specialized register allocator functions for
6241
               some common argument patterns */
6242
            tcg_reg_alloc_op(s, op);
6243
            break;
6244
        }
6245
        /* Test for (pending) buffer overflow.  The assumption is that any
6246
           one operation beginning below the high water mark cannot overrun
6247
           the buffer completely.  Thus we can test for overflow after
6248
           generating code without having to check during generation.  */
6249
        if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6250
            return -1;
6251
        }
6252
        /* Test for TB overflow, as seen by gen_insn_end_off.  */
6253
        if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6254
            return -2;
6255
        }
6256
    }
6257
    tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6258
    s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6259

6260
    /* Generate TB finalization at the end of block */
6261
#ifdef TCG_TARGET_NEED_LDST_LABELS
6262
    i = tcg_out_ldst_finalize(s);
6263
    if (i < 0) {
6264
        return i;
6265
    }
6266
#endif
6267
#ifdef TCG_TARGET_NEED_POOL_LABELS
6268
    i = tcg_out_pool_finalize(s);
6269
    if (i < 0) {
6270
        return i;
6271
    }
6272
#endif
6273
    if (!tcg_resolve_relocs(s)) {
6274
        return -2;
6275
    }
6276

6277
#ifndef CONFIG_TCG_INTERPRETER
6278
    /* flush instruction cache */
6279
    flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6280
                        (uintptr_t)s->code_buf,
6281
                        tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6282
#endif
6283

6284
    return tcg_current_code_size(s);
6285
}
6286

6287
#ifdef ELF_HOST_MACHINE
6288
/* In order to use this feature, the backend needs to do three things:
6289

6290
   (1) Define ELF_HOST_MACHINE to indicate both what value to
6291
       put into the ELF image and to indicate support for the feature.
6292

6293
   (2) Define tcg_register_jit.  This should create a buffer containing
6294
       the contents of a .debug_frame section that describes the post-
6295
       prologue unwind info for the tcg machine.
6296

6297
   (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6298
*/
6299

6300
/* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6301
typedef enum {
6302
    JIT_NOACTION = 0,
6303
    JIT_REGISTER_FN,
6304
    JIT_UNREGISTER_FN
6305
} jit_actions_t;
6306

6307
struct jit_code_entry {
6308
    struct jit_code_entry *next_entry;
6309
    struct jit_code_entry *prev_entry;
6310
    const void *symfile_addr;
6311
    uint64_t symfile_size;
6312
};
6313

6314
struct jit_descriptor {
6315
    uint32_t version;
6316
    uint32_t action_flag;
6317
    struct jit_code_entry *relevant_entry;
6318
    struct jit_code_entry *first_entry;
6319
};
6320

6321
void __jit_debug_register_code(void) __attribute__((noinline));
6322
void __jit_debug_register_code(void)
6323
{
6324
    asm("");
6325
}
6326

6327
/* Must statically initialize the version, because GDB may check
6328
   the version before we can set it.  */
6329
struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6330

6331
/* End GDB interface.  */
6332

6333
static int find_string(const char *strtab, const char *str)
6334
{
6335
    const char *p = strtab + 1;
6336

6337
    while (1) {
6338
        if (strcmp(p, str) == 0) {
6339
            return p - strtab;
6340
        }
6341
        p += strlen(p) + 1;
6342
    }
6343
}
6344

6345
static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6346
                                 const void *debug_frame,
6347
                                 size_t debug_frame_size)
6348
{
6349
    struct __attribute__((packed)) DebugInfo {
6350
        uint32_t  len;
6351
        uint16_t  version;
6352
        uint32_t  abbrev;
6353
        uint8_t   ptr_size;
6354
        uint8_t   cu_die;
6355
        uint16_t  cu_lang;
6356
        uintptr_t cu_low_pc;
6357
        uintptr_t cu_high_pc;
6358
        uint8_t   fn_die;
6359
        char      fn_name[16];
6360
        uintptr_t fn_low_pc;
6361
        uintptr_t fn_high_pc;
6362
        uint8_t   cu_eoc;
6363
    };
6364

6365
    struct ElfImage {
6366
        ElfW(Ehdr) ehdr;
6367
        ElfW(Phdr) phdr;
6368
        ElfW(Shdr) shdr[7];
6369
        ElfW(Sym)  sym[2];
6370
        struct DebugInfo di;
6371
        uint8_t    da[24];
6372
        char       str[80];
6373
    };
6374

6375
    struct ElfImage *img;
6376

6377
    static const struct ElfImage img_template = {
6378
        .ehdr = {
6379
            .e_ident[EI_MAG0] = ELFMAG0,
6380
            .e_ident[EI_MAG1] = ELFMAG1,
6381
            .e_ident[EI_MAG2] = ELFMAG2,
6382
            .e_ident[EI_MAG3] = ELFMAG3,
6383
            .e_ident[EI_CLASS] = ELF_CLASS,
6384
            .e_ident[EI_DATA] = ELF_DATA,
6385
            .e_ident[EI_VERSION] = EV_CURRENT,
6386
            .e_type = ET_EXEC,
6387
            .e_machine = ELF_HOST_MACHINE,
6388
            .e_version = EV_CURRENT,
6389
            .e_phoff = offsetof(struct ElfImage, phdr),
6390
            .e_shoff = offsetof(struct ElfImage, shdr),
6391
            .e_ehsize = sizeof(ElfW(Shdr)),
6392
            .e_phentsize = sizeof(ElfW(Phdr)),
6393
            .e_phnum = 1,
6394
            .e_shentsize = sizeof(ElfW(Shdr)),
6395
            .e_shnum = ARRAY_SIZE(img->shdr),
6396
            .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6397
#ifdef ELF_HOST_FLAGS
6398
            .e_flags = ELF_HOST_FLAGS,
6399
#endif
6400
#ifdef ELF_OSABI
6401
            .e_ident[EI_OSABI] = ELF_OSABI,
6402
#endif
6403
        },
6404
        .phdr = {
6405
            .p_type = PT_LOAD,
6406
            .p_flags = PF_X,
6407
        },
6408
        .shdr = {
6409
            [0] = { .sh_type = SHT_NULL },
6410
            /* Trick: The contents of code_gen_buffer are not present in
6411
               this fake ELF file; that got allocated elsewhere.  Therefore
6412
               we mark .text as SHT_NOBITS (similar to .bss) so that readers
6413
               will not look for contents.  We can record any address.  */
6414
            [1] = { /* .text */
6415
                .sh_type = SHT_NOBITS,
6416
                .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6417
            },
6418
            [2] = { /* .debug_info */
6419
                .sh_type = SHT_PROGBITS,
6420
                .sh_offset = offsetof(struct ElfImage, di),
6421
                .sh_size = sizeof(struct DebugInfo),
6422
            },
6423
            [3] = { /* .debug_abbrev */
6424
                .sh_type = SHT_PROGBITS,
6425
                .sh_offset = offsetof(struct ElfImage, da),
6426
                .sh_size = sizeof(img->da),
6427
            },
6428
            [4] = { /* .debug_frame */
6429
                .sh_type = SHT_PROGBITS,
6430
                .sh_offset = sizeof(struct ElfImage),
6431
            },
6432
            [5] = { /* .symtab */
6433
                .sh_type = SHT_SYMTAB,
6434
                .sh_offset = offsetof(struct ElfImage, sym),
6435
                .sh_size = sizeof(img->sym),
6436
                .sh_info = 1,
6437
                .sh_link = ARRAY_SIZE(img->shdr) - 1,
6438
                .sh_entsize = sizeof(ElfW(Sym)),
6439
            },
6440
            [6] = { /* .strtab */
6441
                .sh_type = SHT_STRTAB,
6442
                .sh_offset = offsetof(struct ElfImage, str),
6443
                .sh_size = sizeof(img->str),
6444
            }
6445
        },
6446
        .sym = {
6447
            [1] = { /* code_gen_buffer */
6448
                .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6449
                .st_shndx = 1,
6450
            }
6451
        },
6452
        .di = {
6453
            .len = sizeof(struct DebugInfo) - 4,
6454
            .version = 2,
6455
            .ptr_size = sizeof(void *),
6456
            .cu_die = 1,
6457
            .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6458
            .fn_die = 2,
6459
            .fn_name = "code_gen_buffer"
6460
        },
6461
        .da = {
6462
            1,          /* abbrev number (the cu) */
6463
            0x11, 1,    /* DW_TAG_compile_unit, has children */
6464
            0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6465
            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6466
            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6467
            0, 0,       /* end of abbrev */
6468
            2,          /* abbrev number (the fn) */
6469
            0x2e, 0,    /* DW_TAG_subprogram, no children */
6470
            0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6471
            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6472
            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6473
            0, 0,       /* end of abbrev */
6474
            0           /* no more abbrev */
6475
        },
6476
        .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6477
               ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6478
    };
6479

6480
    /* We only need a single jit entry; statically allocate it.  */
6481
    static struct jit_code_entry one_entry;
6482

6483
    uintptr_t buf = (uintptr_t)buf_ptr;
6484
    size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6485
    DebugFrameHeader *dfh;
6486

6487
    img = g_malloc(img_size);
6488
    *img = img_template;
6489

6490
    img->phdr.p_vaddr = buf;
6491
    img->phdr.p_paddr = buf;
6492
    img->phdr.p_memsz = buf_size;
6493

6494
    img->shdr[1].sh_name = find_string(img->str, ".text");
6495
    img->shdr[1].sh_addr = buf;
6496
    img->shdr[1].sh_size = buf_size;
6497

6498
    img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6499
    img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6500

6501
    img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6502
    img->shdr[4].sh_size = debug_frame_size;
6503

6504
    img->shdr[5].sh_name = find_string(img->str, ".symtab");
6505
    img->shdr[6].sh_name = find_string(img->str, ".strtab");
6506

6507
    img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6508
    img->sym[1].st_value = buf;
6509
    img->sym[1].st_size = buf_size;
6510

6511
    img->di.cu_low_pc = buf;
6512
    img->di.cu_high_pc = buf + buf_size;
6513
    img->di.fn_low_pc = buf;
6514
    img->di.fn_high_pc = buf + buf_size;
6515

6516
    dfh = (DebugFrameHeader *)(img + 1);
6517
    memcpy(dfh, debug_frame, debug_frame_size);
6518
    dfh->fde.func_start = buf;
6519
    dfh->fde.func_len = buf_size;
6520

6521
#ifdef DEBUG_JIT
6522
    /* Enable this block to be able to debug the ELF image file creation.
6523
       One can use readelf, objdump, or other inspection utilities.  */
6524
    {
6525
        g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6526
        FILE *f = fopen(jit, "w+b");
6527
        if (f) {
6528
            if (fwrite(img, img_size, 1, f) != img_size) {
6529
                /* Avoid stupid unused return value warning for fwrite.  */
6530
            }
6531
            fclose(f);
6532
        }
6533
    }
6534
#endif
6535

6536
    one_entry.symfile_addr = img;
6537
    one_entry.symfile_size = img_size;
6538

6539
    __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6540
    __jit_debug_descriptor.relevant_entry = &one_entry;
6541
    __jit_debug_descriptor.first_entry = &one_entry;
6542
    __jit_debug_register_code();
6543
}
6544
#else
6545
/* No support for the feature.  Provide the entry point expected by exec.c,
6546
   and implement the internal function we declared earlier.  */
6547

6548
static void tcg_register_jit_int(const void *buf, size_t size,
6549
                                 const void *debug_frame,
6550
                                 size_t debug_frame_size)
6551
{
6552
}
6553

6554
void tcg_register_jit(const void *buf, size_t buf_size)
6555
{
6556
}
6557
#endif /* ELF_HOST_MACHINE */
6558

6559
#if !TCG_TARGET_MAYBE_vec
6560
void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6561
{
6562
    g_assert_not_reached();
6563
}
6564
#endif
6565

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.