SandboXP

Форк
0
/
generate_interpreter.js 
492 строки · 14.0 Кб
1
#!/usr/bin/env node
2
"use strict";
3

4
const assert = require("assert").strict;
5
const fs = require("fs");
6
const path = require("path");
7
const x86_table = require("./x86_table");
8
const rust_ast = require("./rust_ast");
9
const { hex, mkdirpSync, get_switch_value, get_switch_exist, finalize_table_rust } = require("./util");
10

11
const OUT_DIR = path.join(__dirname, "..", "src/rust/gen/");
12

13
mkdirpSync(OUT_DIR);
14

15
const table_arg = get_switch_value("--table");
16
const gen_all = get_switch_exist("--all");
17
const to_generate = {
18
    interpreter: gen_all || table_arg === "interpreter",
19
    interpreter0f: gen_all || table_arg === "interpreter0f",
20
};
21

22
assert(
23
    Object.keys(to_generate).some(k => to_generate[k]),
24
    "Pass --table [interpreter|interpreter0f] or --all to pick which tables to generate"
25
);
26

27
gen_table();
28

29
function wrap_imm_call(imm)
30
{
31
    return `match ${imm} { Ok(o) => o, Err(()) => return }`;
32
}
33

34
function gen_read_imm_call(op, size_variant)
35
{
36
    let size = (op.os || op.opcode % 2 === 1) ? size_variant : 8;
37

38
    if(op.imm8 || op.imm8s || op.imm16 || op.imm1632 || op.imm32 || op.immaddr)
39
    {
40
        if(op.imm8)
41
        {
42
            return wrap_imm_call("read_imm8()");
43
        }
44
        else if(op.imm8s)
45
        {
46
            return wrap_imm_call("read_imm8s()");
47
        }
48
        else
49
        {
50
            if(op.immaddr)
51
            {
52
                // immaddr: depends on address size
53
                return wrap_imm_call("read_moffs()");
54
            }
55
            else
56
            {
57
                assert(op.imm1632 || op.imm16 || op.imm32);
58

59
                if(op.imm1632 && size === 16 || op.imm16)
60
                {
61
                    return wrap_imm_call("read_imm16()");
62
                }
63
                else
64
                {
65
                    assert(op.imm1632 && size === 32 || op.imm32);
66
                    return wrap_imm_call("read_imm32s()");
67
                }
68
            }
69
        }
70
    }
71
    else
72
    {
73
        return undefined;
74
    }
75
}
76

77
function gen_call(name, args)
78
{
79
    args = args || [];
80
    return `${name}(${args.join(", ")});`;
81
}
82

83
/*
84
 * Current naming scheme:
85
 * instr(16|32|)_(66|F2|F3)?0F?[0-9a-f]{2}(_[0-7])?(_mem|_reg|)
86
 */
87
function make_instruction_name(encoding, size)
88
{
89
    const suffix = encoding.os ? String(size) : "";
90
    const opcode_hex = hex(encoding.opcode & 0xFF, 2);
91
    const first_prefix = (encoding.opcode & 0xFF00) === 0 ? "" : hex(encoding.opcode >> 8 & 0xFF, 2);
92
    const second_prefix = (encoding.opcode & 0xFF0000) === 0 ? "" : hex(encoding.opcode >> 16 & 0xFF, 2);
93
    const fixed_g_suffix = encoding.fixed_g === undefined ? "" : `_${encoding.fixed_g}`;
94
    const module = first_prefix === "0F" || second_prefix === "0F" ? "instructions_0f" : "instructions";
95

96
    assert(first_prefix === "" || first_prefix === "0F" || first_prefix === "F2" || first_prefix === "F3");
97
    assert(second_prefix === "" || second_prefix === "66" || second_prefix === "F2" || second_prefix === "F3");
98

99
    return `${module}::instr${suffix}_${second_prefix}${first_prefix}${opcode_hex}${fixed_g_suffix}`;
100
}
101

102
function gen_instruction_body(encodings, size)
103
{
104
    const encoding = encodings[0];
105

106
    let has_66 = [];
107
    let has_F2 = [];
108
    let has_F3 = [];
109
    let no_prefix = [];
110

111
    for(let e of encodings)
112
    {
113
        if((e.opcode >>> 16) === 0x66) has_66.push(e);
114
        else if((e.opcode >>> 8 & 0xFF) === 0xF2 || (e.opcode >>> 16) === 0xF2) has_F2.push(e);
115
        else if((e.opcode >>> 8 & 0xFF) === 0xF3 || (e.opcode >>> 16) === 0xF3) has_F3.push(e);
116
        else no_prefix.push(e);
117
    }
118

119
    if(has_F2.length || has_F3.length)
120
    {
121
        assert((encoding.opcode & 0xFF0000) === 0 || (encoding.opcode & 0xFF00) === 0x0F00);
122
    }
123

124
    if(has_66.length)
125
    {
126
        assert((encoding.opcode & 0xFF00) === 0x0F00);
127
    }
128

129
    const code = [];
130

131
    if(encoding.e)
132
    {
133
        code.push(`let modrm_byte = ${wrap_imm_call("read_imm8()")};`);
134
    }
135

136
    if(has_66.length || has_F2.length || has_F3.length)
137
    {
138
        const if_blocks = [];
139

140
        if(has_66.length) {
141
            const body = gen_instruction_body_after_prefix(has_66, size);
142
            if_blocks.push({ condition: "prefixes_ & PREFIX_66 != 0", body, });
143
        }
144
        if(has_F2.length) {
145
            const body = gen_instruction_body_after_prefix(has_F2, size);
146
            if_blocks.push({ condition: "prefixes_ & PREFIX_F2 != 0", body, });
147
        }
148
        if(has_F3.length) {
149
            const body = gen_instruction_body_after_prefix(has_F3, size);
150
            if_blocks.push({ condition: "prefixes_ & PREFIX_F3 != 0", body, });
151
        }
152

153
        const check_prefixes = encoding.sse ? "(PREFIX_66 | PREFIX_F2 | PREFIX_F3)" : "(PREFIX_F2 | PREFIX_F3)";
154

155
        const else_block = {
156
            body: [].concat(
157
                "dbg_assert!((prefixes_ & " + check_prefixes + ") == 0);",
158
                gen_instruction_body_after_prefix(no_prefix, size)
159
            )
160
        };
161

162
        return [].concat(
163
	    "let prefixes_ = *prefixes as i32;",
164
            code,
165
            {
166
                type: "if-else",
167
                if_blocks,
168
                else_block,
169
            }
170
        );
171
    }
172
    else {
173
        return [].concat(
174
            code,
175
            gen_instruction_body_after_prefix(encodings, size)
176
        );
177
    }
178
}
179

180
function gen_instruction_body_after_prefix(encodings, size)
181
{
182
    const encoding = encodings[0];
183

184
    if(encoding.fixed_g !== undefined)
185
    {
186
        assert(encoding.e);
187

188
        // instruction with modrm byte where the middle 3 bits encode the instruction
189

190
        // group by opcode without prefix plus middle bits of modrm byte
191
        let cases = encodings.reduce((cases_by_opcode, case_) => {
192
            assert(typeof case_.fixed_g === "number");
193
            cases_by_opcode[case_.opcode & 0xFFFF | case_.fixed_g << 16] = case_;
194
            return cases_by_opcode;
195
        }, Object.create(null));
196
        cases = Object.values(cases).sort((e1, e2) => e1.fixed_g - e2.fixed_g);
197

198
        return [
199
            {
200
                type: "switch",
201
                condition: "modrm_byte >> 3 & 7",
202
                cases: cases.map(case_ => {
203
                    const fixed_g = case_.fixed_g;
204
                    const body = gen_instruction_body_after_fixed_g(case_, size);
205

206
                    return {
207
                        conditions: [fixed_g],
208
                        body,
209
                    };
210
                }),
211

212
                default_case: {
213
                    body: [
214
                        `if DEBUG { panic!("Bad instruction at {:x}", *instruction_pointer); }`,
215
                        "trigger_ud();",
216
                    ],
217
                }
218
            },
219
        ];
220
    }
221
    else {
222
        assert(encodings.length === 1);
223
        return gen_instruction_body_after_fixed_g(encodings[0], size);
224
    }
225
}
226

227
function gen_instruction_body_after_fixed_g(encoding, size)
228
{
229
    const instruction_prefix = [];
230
    const instruction_postfix =
231
        (encoding.block_boundary && !encoding.no_block_boundary_in_interpreted) ||
232
        (!encoding.custom && encoding.e) ?
233
        ["after_block_boundary();"] : [];
234

235
    if(encoding.task_switch_test || encoding.sse)
236
    {
237
        instruction_prefix.push(
238
            {
239
                type: "if-else",
240
                if_blocks: [
241
                    {
242
                        condition: encoding.sse ? "!task_switch_test_mmx()" : "!task_switch_test()",
243
                        body: ["return;"],
244
                    }
245
                ],
246
            });
247
    }
248

249
    const imm_read = gen_read_imm_call(encoding, size);
250
    const instruction_name = make_instruction_name(encoding, size);
251

252
    if(encoding.e)
253
    {
254
        // instruction with modrm byte
255

256
        const imm_read = gen_read_imm_call(encoding, size);
257

258
        if(encoding.ignore_mod)
259
        {
260
            assert(!imm_read, "Unexpected instruction (ignore mod with immediate value)");
261

262
            // Has modrm byte, but the 2 mod bits are ignored and both
263
            // operands are always registers (0f20-0f24)
264

265
            return [].concat(
266
                instruction_prefix,
267
                gen_call(instruction_name, ["modrm_byte & 7", "modrm_byte >> 3 & 7"]),
268
                instruction_postfix
269
            );
270
        }
271
        else
272
        {
273
            let mem_args;
274

275
            if(encoding.custom_modrm_resolve)
276
            {
277
                // requires special handling around modrm_resolve
278
                mem_args = ["modrm_byte"];
279
            }
280
            else
281
            {
282
                mem_args = ["match modrm_resolve(modrm_byte) { Ok(a) => a, Err(()) => return }"];
283
            }
284

285
            const reg_args = ["modrm_byte & 7"];
286

287
            if(encoding.fixed_g === undefined)
288
            {
289
                mem_args.push("modrm_byte >> 3 & 7");
290
                reg_args.push("modrm_byte >> 3 & 7");
291
            }
292

293
            if(imm_read)
294
            {
295
                mem_args.push(imm_read);
296
                reg_args.push(imm_read);
297
            }
298

299
            return [].concat(
300
                instruction_prefix,
301
                {
302
                    type: "if-else",
303
                    if_blocks: [
304
                        {
305
                            condition: "modrm_byte < 0xC0",
306
                            body: [].concat(
307
                                gen_call(`${instruction_name}_mem`, mem_args)
308
                            ),
309
                        }
310
                    ],
311
                    else_block: {
312
                        body: [gen_call(`${instruction_name}_reg`, reg_args)],
313
                    },
314
                },
315
                instruction_postfix
316
            );
317
        }
318
    }
319
    else
320
    {
321
        const args = [];
322

323
        if(imm_read)
324
        {
325
            args.push(imm_read);
326
        }
327

328
        if(encoding.extra_imm16)
329
        {
330
            assert(imm_read);
331
            args.push(wrap_imm_call("read_imm16()"));
332
        }
333
        else if(encoding.extra_imm8)
334
        {
335
            assert(imm_read);
336
            args.push(wrap_imm_call("read_imm8()"));
337
        }
338

339
        return [].concat(
340
            instruction_prefix,
341
            gen_call(instruction_name, args),
342
            instruction_postfix
343
        );
344
    }
345
}
346

347
function gen_table()
348
{
349
    let by_opcode = Object.create(null);
350
    let by_opcode0f = Object.create(null);
351

352
    for(let o of x86_table)
353
    {
354
        let opcode = o.opcode;
355

356
        if((opcode & 0xFF00) === 0x0F00)
357
        {
358
            opcode &= 0xFF;
359
            by_opcode0f[opcode] = by_opcode0f[opcode] || [];
360
            by_opcode0f[opcode].push(o);
361
        }
362
        else
363
        {
364
            opcode &= 0xFF;
365
            by_opcode[opcode] = by_opcode[opcode] || [];
366
            by_opcode[opcode].push(o);
367
        }
368
    }
369

370
    let cases = [];
371
    for(let opcode = 0; opcode < 0x100; opcode++)
372
    {
373
        let encoding = by_opcode[opcode];
374
        assert(encoding && encoding.length);
375

376
        let opcode_hex = hex(opcode, 2);
377
        let opcode_high_hex = hex(opcode | 0x100, 2);
378

379
        if(encoding[0].os)
380
        {
381
            cases.push({
382
                conditions: [`0x${opcode_hex}`],
383
                body: gen_instruction_body(encoding, 16),
384
            });
385
            cases.push({
386
                conditions: [`0x${opcode_high_hex}`],
387
                body: gen_instruction_body(encoding, 32),
388
            });
389
        }
390
        else
391
        {
392
            cases.push({
393
                conditions: [`0x${opcode_hex}`, `0x${opcode_high_hex}`],
394
                body: gen_instruction_body(encoding, undefined),
395
            });
396
        }
397
    }
398
    const table = {
399
        type: "switch",
400
        condition: "opcode",
401
        cases,
402
        default_case: {
403
            body: ["assert!(false);"]
404
        },
405
    };
406
    if(to_generate.interpreter)
407
    {
408
        const code = [
409
            "#![cfg_attr(rustfmt, rustfmt_skip)]",
410

411
            "use cpu::cpu::{after_block_boundary, modrm_resolve};",
412
            "use cpu::cpu::{read_imm8, read_imm8s, read_imm16, read_imm32s, read_moffs};",
413
            "use cpu::cpu::{task_switch_test, trigger_ud, DEBUG, PREFIX_F2, PREFIX_F3};",
414
            "use cpu::instructions;",
415
            "use cpu::global_pointers::{instruction_pointer, prefixes};",
416

417
            "pub unsafe fn run(opcode: u32) {",
418
            table,
419
            "}",
420
        ];
421

422
        finalize_table_rust(
423
            OUT_DIR,
424
            "interpreter.rs",
425
            rust_ast.print_syntax_tree([].concat(code)).join("\n") + "\n"
426
        );
427
    }
428

429
    const cases0f = [];
430
    for(let opcode = 0; opcode < 0x100; opcode++)
431
    {
432
        let encoding = by_opcode0f[opcode];
433

434
        assert(encoding && encoding.length);
435

436
        let opcode_hex = hex(opcode, 2);
437
        let opcode_high_hex = hex(opcode | 0x100, 2);
438

439
        if(encoding[0].os)
440
        {
441
            cases0f.push({
442
                conditions: [`0x${opcode_hex}`],
443
                body: gen_instruction_body(encoding, 16),
444
            });
445
            cases0f.push({
446
                conditions: [`0x${opcode_high_hex}`],
447
                body: gen_instruction_body(encoding, 32),
448
            });
449
        }
450
        else
451
        {
452
            let block = {
453
                conditions: [`0x${opcode_hex}`, `0x${opcode_high_hex}`],
454
                body: gen_instruction_body(encoding, undefined),
455
            };
456
            cases0f.push(block);
457
        }
458
    }
459

460
    const table0f = {
461
        type: "switch",
462
        condition: "opcode",
463
        cases: cases0f,
464
        default_case: {
465
            body: ["assert!(false);"]
466
        },
467
    };
468

469
    if(to_generate.interpreter0f)
470
    {
471
        const code = [
472
            "#![cfg_attr(rustfmt, rustfmt_skip)]",
473

474
            "use cpu::cpu::{after_block_boundary, modrm_resolve};",
475
            "use cpu::cpu::{read_imm8, read_imm16, read_imm32s};",
476
            "use cpu::cpu::{task_switch_test, task_switch_test_mmx, trigger_ud};",
477
            "use cpu::cpu::{DEBUG, PREFIX_66, PREFIX_F2, PREFIX_F3};",
478
            "use cpu::instructions_0f;",
479
            "use cpu::global_pointers::{instruction_pointer, prefixes};",
480

481
            "pub unsafe fn run(opcode: u32) {",
482
            table0f,
483
            "}",
484
        ];
485

486
        finalize_table_rust(
487
            OUT_DIR,
488
            "interpreter0f.rs",
489
            rust_ast.print_syntax_tree([].concat(code)).join("\n") + "\n"
490
        );
491
    }
492
}
493

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.