qemu

Форк
0
/
pci-quirks.c 
1681 строка · 58.0 Кб
1
/*
2
 * device quirks for PCI devices
3
 *
4
 * Copyright Red Hat, Inc. 2012-2015
5
 *
6
 * Authors:
7
 *  Alex Williamson <alex.williamson@redhat.com>
8
 *
9
 * This work is licensed under the terms of the GNU GPL, version 2.  See
10
 * the COPYING file in the top-level directory.
11
 */
12

13
#include "qemu/osdep.h"
14
#include CONFIG_DEVICES
15
#include "exec/memop.h"
16
#include "qemu/units.h"
17
#include "qemu/log.h"
18
#include "qemu/error-report.h"
19
#include "qemu/main-loop.h"
20
#include "qemu/module.h"
21
#include "qemu/range.h"
22
#include "qapi/error.h"
23
#include "qapi/visitor.h"
24
#include <sys/ioctl.h>
25
#include "hw/nvram/fw_cfg.h"
26
#include "hw/qdev-properties.h"
27
#include "pci.h"
28
#include "trace.h"
29

30
/*
31
 * List of device ids/vendor ids for which to disable
32
 * option rom loading. This avoids the guest hangs during rom
33
 * execution as noticed with the BCM 57810 card for lack of a
34
 * more better way to handle such issues.
35
 * The  user can still override by specifying a romfile or
36
 * rombar=1.
37
 * Please see https://bugs.launchpad.net/qemu/+bug/1284874
38
 * for an analysis of the 57810 card hang. When adding
39
 * a new vendor id/device id combination below, please also add
40
 * your card/environment details and information that could
41
 * help in debugging to the bug tracking this issue
42
 */
43
static const struct {
44
    uint32_t vendor;
45
    uint32_t device;
46
} rom_denylist[] = {
47
    { 0x14e4, 0x168e }, /* Broadcom BCM 57810 */
48
};
49

50
bool vfio_opt_rom_in_denylist(VFIOPCIDevice *vdev)
51
{
52
    int i;
53

54
    for (i = 0 ; i < ARRAY_SIZE(rom_denylist); i++) {
55
        if (vfio_pci_is(vdev, rom_denylist[i].vendor, rom_denylist[i].device)) {
56
            trace_vfio_quirk_rom_in_denylist(vdev->vbasedev.name,
57
                                             rom_denylist[i].vendor,
58
                                             rom_denylist[i].device);
59
            return true;
60
        }
61
    }
62
    return false;
63
}
64

65
/*
66
 * Device specific region quirks (mostly backdoors to PCI config space)
67
 */
68

69
/*
70
 * The generic window quirks operate on an address and data register,
71
 * vfio_generic_window_address_quirk handles the address register and
72
 * vfio_generic_window_data_quirk handles the data register.  These ops
73
 * pass reads and writes through to hardware until a value matching the
74
 * stored address match/mask is written.  When this occurs, the data
75
 * register access emulated PCI config space for the device rather than
76
 * passing through accesses.  This enables devices where PCI config space
77
 * is accessible behind a window register to maintain the virtualization
78
 * provided through vfio.
79
 */
80
typedef struct VFIOConfigWindowMatch {
81
    uint32_t match;
82
    uint32_t mask;
83
} VFIOConfigWindowMatch;
84

85
typedef struct VFIOConfigWindowQuirk {
86
    struct VFIOPCIDevice *vdev;
87

88
    uint32_t address_val;
89

90
    uint32_t address_offset;
91
    uint32_t data_offset;
92

93
    bool window_enabled;
94
    uint8_t bar;
95

96
    MemoryRegion *addr_mem;
97
    MemoryRegion *data_mem;
98

99
    uint32_t nr_matches;
100
    VFIOConfigWindowMatch matches[];
101
} VFIOConfigWindowQuirk;
102

103
static uint64_t vfio_generic_window_quirk_address_read(void *opaque,
104
                                                       hwaddr addr,
105
                                                       unsigned size)
106
{
107
    VFIOConfigWindowQuirk *window = opaque;
108
    VFIOPCIDevice *vdev = window->vdev;
109

110
    return vfio_region_read(&vdev->bars[window->bar].region,
111
                            addr + window->address_offset, size);
112
}
113

114
static void vfio_generic_window_quirk_address_write(void *opaque, hwaddr addr,
115
                                                    uint64_t data,
116
                                                    unsigned size)
117
{
118
    VFIOConfigWindowQuirk *window = opaque;
119
    VFIOPCIDevice *vdev = window->vdev;
120
    int i;
121

122
    window->window_enabled = false;
123

124
    vfio_region_write(&vdev->bars[window->bar].region,
125
                      addr + window->address_offset, data, size);
126

127
    for (i = 0; i < window->nr_matches; i++) {
128
        if ((data & ~window->matches[i].mask) == window->matches[i].match) {
129
            window->window_enabled = true;
130
            window->address_val = data & window->matches[i].mask;
131
            trace_vfio_quirk_generic_window_address_write(vdev->vbasedev.name,
132
                                    memory_region_name(window->addr_mem), data);
133
            break;
134
        }
135
    }
136
}
137

138
static const MemoryRegionOps vfio_generic_window_address_quirk = {
139
    .read = vfio_generic_window_quirk_address_read,
140
    .write = vfio_generic_window_quirk_address_write,
141
    .endianness = DEVICE_LITTLE_ENDIAN,
142
};
143

144
static uint64_t vfio_generic_window_quirk_data_read(void *opaque,
145
                                                    hwaddr addr, unsigned size)
146
{
147
    VFIOConfigWindowQuirk *window = opaque;
148
    VFIOPCIDevice *vdev = window->vdev;
149
    uint64_t data;
150

151
    /* Always read data reg, discard if window enabled */
152
    data = vfio_region_read(&vdev->bars[window->bar].region,
153
                            addr + window->data_offset, size);
154

155
    if (window->window_enabled) {
156
        data = vfio_pci_read_config(&vdev->pdev, window->address_val, size);
157
        trace_vfio_quirk_generic_window_data_read(vdev->vbasedev.name,
158
                                    memory_region_name(window->data_mem), data);
159
    }
160

161
    return data;
162
}
163

164
static void vfio_generic_window_quirk_data_write(void *opaque, hwaddr addr,
165
                                                 uint64_t data, unsigned size)
166
{
167
    VFIOConfigWindowQuirk *window = opaque;
168
    VFIOPCIDevice *vdev = window->vdev;
169

170
    if (window->window_enabled) {
171
        vfio_pci_write_config(&vdev->pdev, window->address_val, data, size);
172
        trace_vfio_quirk_generic_window_data_write(vdev->vbasedev.name,
173
                                    memory_region_name(window->data_mem), data);
174
        return;
175
    }
176

177
    vfio_region_write(&vdev->bars[window->bar].region,
178
                      addr + window->data_offset, data, size);
179
}
180

181
static const MemoryRegionOps vfio_generic_window_data_quirk = {
182
    .read = vfio_generic_window_quirk_data_read,
183
    .write = vfio_generic_window_quirk_data_write,
184
    .endianness = DEVICE_LITTLE_ENDIAN,
185
};
186

187
/*
188
 * The generic mirror quirk handles devices which expose PCI config space
189
 * through a region within a BAR.  When enabled, reads and writes are
190
 * redirected through to emulated PCI config space.  XXX if PCI config space
191
 * used memory regions, this could just be an alias.
192
 */
193
typedef struct VFIOConfigMirrorQuirk {
194
    struct VFIOPCIDevice *vdev;
195
    uint32_t offset;
196
    uint8_t bar;
197
    MemoryRegion *mem;
198
    uint8_t data[];
199
} VFIOConfigMirrorQuirk;
200

201
static uint64_t vfio_generic_quirk_mirror_read(void *opaque,
202
                                               hwaddr addr, unsigned size)
203
{
204
    VFIOConfigMirrorQuirk *mirror = opaque;
205
    VFIOPCIDevice *vdev = mirror->vdev;
206
    uint64_t data;
207

208
    /* Read and discard in case the hardware cares */
209
    (void)vfio_region_read(&vdev->bars[mirror->bar].region,
210
                           addr + mirror->offset, size);
211

212
    data = vfio_pci_read_config(&vdev->pdev, addr, size);
213
    trace_vfio_quirk_generic_mirror_read(vdev->vbasedev.name,
214
                                         memory_region_name(mirror->mem),
215
                                         addr, data);
216
    return data;
217
}
218

219
static void vfio_generic_quirk_mirror_write(void *opaque, hwaddr addr,
220
                                            uint64_t data, unsigned size)
221
{
222
    VFIOConfigMirrorQuirk *mirror = opaque;
223
    VFIOPCIDevice *vdev = mirror->vdev;
224

225
    vfio_pci_write_config(&vdev->pdev, addr, data, size);
226
    trace_vfio_quirk_generic_mirror_write(vdev->vbasedev.name,
227
                                          memory_region_name(mirror->mem),
228
                                          addr, data);
229
}
230

231
static const MemoryRegionOps vfio_generic_mirror_quirk = {
232
    .read = vfio_generic_quirk_mirror_read,
233
    .write = vfio_generic_quirk_mirror_write,
234
    .endianness = DEVICE_LITTLE_ENDIAN,
235
};
236

237
/* Is range1 fully contained within range2?  */
238
static bool vfio_range_contained(uint64_t first1, uint64_t len1,
239
                                 uint64_t first2, uint64_t len2) {
240
    return (first1 >= first2 && first1 + len1 <= first2 + len2);
241
}
242

243
#define PCI_VENDOR_ID_ATI               0x1002
244

245
/*
246
 * Radeon HD cards (HD5450 & HD7850) report the upper byte of the I/O port BAR
247
 * through VGA register 0x3c3.  On newer cards, the I/O port BAR is always
248
 * BAR4 (older cards like the X550 used BAR1, but we don't care to support
249
 * those).  Note that on bare metal, a read of 0x3c3 doesn't always return the
250
 * I/O port BAR address.  Originally this was coded to return the virtual BAR
251
 * address only if the physical register read returns the actual BAR address,
252
 * but users have reported greater success if we return the virtual address
253
 * unconditionally.
254
 */
255
static uint64_t vfio_ati_3c3_quirk_read(void *opaque,
256
                                        hwaddr addr, unsigned size)
257
{
258
    VFIOPCIDevice *vdev = opaque;
259
    uint64_t data = vfio_pci_read_config(&vdev->pdev,
260
                                         PCI_BASE_ADDRESS_4 + 1, size);
261

262
    trace_vfio_quirk_ati_3c3_read(vdev->vbasedev.name, data);
263

264
    return data;
265
}
266

267
static void vfio_ati_3c3_quirk_write(void *opaque, hwaddr addr,
268
                                        uint64_t data, unsigned size)
269
{
270
    qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid access\n", __func__);
271
}
272

273
static const MemoryRegionOps vfio_ati_3c3_quirk = {
274
    .read = vfio_ati_3c3_quirk_read,
275
    .write = vfio_ati_3c3_quirk_write,
276
    .endianness = DEVICE_LITTLE_ENDIAN,
277
};
278

279
VFIOQuirk *vfio_quirk_alloc(int nr_mem)
280
{
281
    VFIOQuirk *quirk = g_new0(VFIOQuirk, 1);
282
    QLIST_INIT(&quirk->ioeventfds);
283
    quirk->mem = g_new0(MemoryRegion, nr_mem);
284
    quirk->nr_mem = nr_mem;
285

286
    return quirk;
287
}
288

289
static void vfio_ioeventfd_exit(VFIOPCIDevice *vdev, VFIOIOEventFD *ioeventfd)
290
{
291
    QLIST_REMOVE(ioeventfd, next);
292
    memory_region_del_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size,
293
                              true, ioeventfd->data, &ioeventfd->e);
294

295
    if (ioeventfd->vfio) {
296
        struct vfio_device_ioeventfd vfio_ioeventfd;
297

298
        vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
299
        vfio_ioeventfd.flags = ioeventfd->size;
300
        vfio_ioeventfd.data = ioeventfd->data;
301
        vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
302
                                ioeventfd->region_addr;
303
        vfio_ioeventfd.fd = -1;
304

305
        if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd)) {
306
            error_report("Failed to remove vfio ioeventfd for %s+0x%"
307
                         HWADDR_PRIx"[%d]:0x%"PRIx64" (%m)",
308
                         memory_region_name(ioeventfd->mr), ioeventfd->addr,
309
                         ioeventfd->size, ioeventfd->data);
310
        }
311
    } else {
312
        qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
313
                            NULL, NULL, NULL);
314
    }
315

316
    event_notifier_cleanup(&ioeventfd->e);
317
    trace_vfio_ioeventfd_exit(memory_region_name(ioeventfd->mr),
318
                              (uint64_t)ioeventfd->addr, ioeventfd->size,
319
                              ioeventfd->data);
320
    g_free(ioeventfd);
321
}
322

323
static void vfio_drop_dynamic_eventfds(VFIOPCIDevice *vdev, VFIOQuirk *quirk)
324
{
325
    VFIOIOEventFD *ioeventfd, *tmp;
326

327
    QLIST_FOREACH_SAFE(ioeventfd, &quirk->ioeventfds, next, tmp) {
328
        if (ioeventfd->dynamic) {
329
            vfio_ioeventfd_exit(vdev, ioeventfd);
330
        }
331
    }
332
}
333

334
static void vfio_ioeventfd_handler(void *opaque)
335
{
336
    VFIOIOEventFD *ioeventfd = opaque;
337

338
    if (event_notifier_test_and_clear(&ioeventfd->e)) {
339
        vfio_region_write(ioeventfd->region, ioeventfd->region_addr,
340
                          ioeventfd->data, ioeventfd->size);
341
        trace_vfio_ioeventfd_handler(memory_region_name(ioeventfd->mr),
342
                                     (uint64_t)ioeventfd->addr, ioeventfd->size,
343
                                     ioeventfd->data);
344
    }
345
}
346

347
static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev,
348
                                          MemoryRegion *mr, hwaddr addr,
349
                                          unsigned size, uint64_t data,
350
                                          VFIORegion *region,
351
                                          hwaddr region_addr, bool dynamic)
352
{
353
    VFIOIOEventFD *ioeventfd;
354

355
    if (vdev->no_kvm_ioeventfd) {
356
        return NULL;
357
    }
358

359
    ioeventfd = g_malloc0(sizeof(*ioeventfd));
360

361
    if (event_notifier_init(&ioeventfd->e, 0)) {
362
        g_free(ioeventfd);
363
        return NULL;
364
    }
365

366
    /*
367
     * MemoryRegion and relative offset, plus additional ioeventfd setup
368
     * parameters for configuring and later tearing down KVM ioeventfd.
369
     */
370
    ioeventfd->mr = mr;
371
    ioeventfd->addr = addr;
372
    ioeventfd->size = size;
373
    ioeventfd->data = data;
374
    ioeventfd->dynamic = dynamic;
375
    /*
376
     * VFIORegion and relative offset for implementing the userspace
377
     * handler.  data & size fields shared for both uses.
378
     */
379
    ioeventfd->region = region;
380
    ioeventfd->region_addr = region_addr;
381

382
    if (!vdev->no_vfio_ioeventfd) {
383
        struct vfio_device_ioeventfd vfio_ioeventfd;
384

385
        vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
386
        vfio_ioeventfd.flags = ioeventfd->size;
387
        vfio_ioeventfd.data = ioeventfd->data;
388
        vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
389
                                ioeventfd->region_addr;
390
        vfio_ioeventfd.fd = event_notifier_get_fd(&ioeventfd->e);
391

392
        ioeventfd->vfio = !ioctl(vdev->vbasedev.fd,
393
                                 VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd);
394
    }
395

396
    if (!ioeventfd->vfio) {
397
        qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
398
                            vfio_ioeventfd_handler, NULL, ioeventfd);
399
    }
400

401
    memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size,
402
                              true, ioeventfd->data, &ioeventfd->e);
403
    trace_vfio_ioeventfd_init(memory_region_name(mr), (uint64_t)addr,
404
                              size, data, ioeventfd->vfio);
405

406
    return ioeventfd;
407
}
408

409
static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
410
{
411
    VFIOQuirk *quirk;
412

413
    /*
414
     * As long as the BAR is >= 256 bytes it will be aligned such that the
415
     * lower byte is always zero.  Filter out anything else, if it exists.
416
     */
417
    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
418
        !vdev->bars[4].ioport || vdev->bars[4].region.size < 256) {
419
        return;
420
    }
421

422
    quirk = vfio_quirk_alloc(1);
423

424
    memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev,
425
                          "vfio-ati-3c3-quirk", 1);
426
    memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
427
                                3 /* offset 3 bytes from 0x3c0 */, quirk->mem);
428

429
    QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks,
430
                      quirk, next);
431

432
    trace_vfio_quirk_ati_3c3_probe(vdev->vbasedev.name);
433
}
434

435
/*
436
 * Newer ATI/AMD devices, including HD5450 and HD7850, have a mirror to PCI
437
 * config space through MMIO BAR2 at offset 0x4000.  Nothing seems to access
438
 * the MMIO space directly, but a window to this space is provided through
439
 * I/O port BAR4.  Offset 0x0 is the address register and offset 0x4 is the
440
 * data register.  When the address is programmed to a range of 0x4000-0x4fff
441
 * PCI configuration space is available.  Experimentation seems to indicate
442
 * that read-only may be provided by hardware.
443
 */
444
static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr)
445
{
446
    VFIOQuirk *quirk;
447
    VFIOConfigWindowQuirk *window;
448

449
    /* This windows doesn't seem to be used except by legacy VGA code */
450
    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
451
        !vdev->vga || nr != 4) {
452
        return;
453
    }
454

455
    quirk = vfio_quirk_alloc(2);
456
    window = quirk->data = g_malloc0(sizeof(*window) +
457
                                     sizeof(VFIOConfigWindowMatch));
458
    window->vdev = vdev;
459
    window->address_offset = 0;
460
    window->data_offset = 4;
461
    window->nr_matches = 1;
462
    window->matches[0].match = 0x4000;
463
    window->matches[0].mask = vdev->config_size - 1;
464
    window->bar = nr;
465
    window->addr_mem = &quirk->mem[0];
466
    window->data_mem = &quirk->mem[1];
467

468
    memory_region_init_io(window->addr_mem, OBJECT(vdev),
469
                          &vfio_generic_window_address_quirk, window,
470
                          "vfio-ati-bar4-window-address-quirk", 4);
471
    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
472
                                        window->address_offset,
473
                                        window->addr_mem, 1);
474

475
    memory_region_init_io(window->data_mem, OBJECT(vdev),
476
                          &vfio_generic_window_data_quirk, window,
477
                          "vfio-ati-bar4-window-data-quirk", 4);
478
    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
479
                                        window->data_offset,
480
                                        window->data_mem, 1);
481

482
    QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
483

484
    trace_vfio_quirk_ati_bar4_probe(vdev->vbasedev.name);
485
}
486

487
/*
488
 * Trap the BAR2 MMIO mirror to config space as well.
489
 */
490
static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice *vdev, int nr)
491
{
492
    VFIOQuirk *quirk;
493
    VFIOConfigMirrorQuirk *mirror;
494

495
    /* Only enable on newer devices where BAR2 is 64bit */
496
    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
497
        !vdev->vga || nr != 2 || !vdev->bars[2].mem64) {
498
        return;
499
    }
500

501
    quirk = vfio_quirk_alloc(1);
502
    mirror = quirk->data = g_malloc0(sizeof(*mirror));
503
    mirror->mem = quirk->mem;
504
    mirror->vdev = vdev;
505
    mirror->offset = 0x4000;
506
    mirror->bar = nr;
507

508
    memory_region_init_io(mirror->mem, OBJECT(vdev),
509
                          &vfio_generic_mirror_quirk, mirror,
510
                          "vfio-ati-bar2-4000-quirk", PCI_CONFIG_SPACE_SIZE);
511
    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
512
                                        mirror->offset, mirror->mem, 1);
513

514
    QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
515

516
    trace_vfio_quirk_ati_bar2_probe(vdev->vbasedev.name);
517
}
518

519
/*
520
 * Older ATI/AMD cards like the X550 have a similar window to that above.
521
 * I/O port BAR1 provides a window to a mirror of PCI config space located
522
 * in BAR2 at offset 0xf00.  We don't care to support such older cards, but
523
 * note it for future reference.
524
 */
525

526
/*
527
 * Nvidia has several different methods to get to config space, the
528
 * nouveu project has several of these documented here:
529
 * https://github.com/pathscale/envytools/tree/master/hwdocs
530
 *
531
 * The first quirk is actually not documented in envytools and is found
532
 * on 10de:01d1 (NVIDIA Corporation G72 [GeForce 7300 LE]).  This is an
533
 * NV46 chipset.  The backdoor uses the legacy VGA I/O ports to access
534
 * the mirror of PCI config space found at BAR0 offset 0x1800.  The access
535
 * sequence first writes 0x338 to I/O port 0x3d4.  The target offset is
536
 * then written to 0x3d0.  Finally 0x538 is written for a read and 0x738
537
 * is written for a write to 0x3d4.  The BAR0 offset is then accessible
538
 * through 0x3d0.  This quirk doesn't seem to be necessary on newer cards
539
 * that use the I/O port BAR5 window but it doesn't hurt to leave it.
540
 */
541
typedef enum {NONE = 0, SELECT, WINDOW, READ, WRITE} VFIONvidia3d0State;
542
static const char *nv3d0_states[] = { "NONE", "SELECT",
543
                                      "WINDOW", "READ", "WRITE" };
544

545
typedef struct VFIONvidia3d0Quirk {
546
    VFIOPCIDevice *vdev;
547
    VFIONvidia3d0State state;
548
    uint32_t offset;
549
} VFIONvidia3d0Quirk;
550

551
static uint64_t vfio_nvidia_3d4_quirk_read(void *opaque,
552
                                           hwaddr addr, unsigned size)
553
{
554
    VFIONvidia3d0Quirk *quirk = opaque;
555
    VFIOPCIDevice *vdev = quirk->vdev;
556

557
    quirk->state = NONE;
558

559
    return vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
560
                         addr + 0x14, size);
561
}
562

563
static void vfio_nvidia_3d4_quirk_write(void *opaque, hwaddr addr,
564
                                        uint64_t data, unsigned size)
565
{
566
    VFIONvidia3d0Quirk *quirk = opaque;
567
    VFIOPCIDevice *vdev = quirk->vdev;
568
    VFIONvidia3d0State old_state = quirk->state;
569

570
    quirk->state = NONE;
571

572
    switch (data) {
573
    case 0x338:
574
        if (old_state == NONE) {
575
            quirk->state = SELECT;
576
            trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
577
                                              nv3d0_states[quirk->state]);
578
        }
579
        break;
580
    case 0x538:
581
        if (old_state == WINDOW) {
582
            quirk->state = READ;
583
            trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
584
                                              nv3d0_states[quirk->state]);
585
        }
586
        break;
587
    case 0x738:
588
        if (old_state == WINDOW) {
589
            quirk->state = WRITE;
590
            trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
591
                                              nv3d0_states[quirk->state]);
592
        }
593
        break;
594
    }
595

596
    vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
597
                   addr + 0x14, data, size);
598
}
599

600
static const MemoryRegionOps vfio_nvidia_3d4_quirk = {
601
    .read = vfio_nvidia_3d4_quirk_read,
602
    .write = vfio_nvidia_3d4_quirk_write,
603
    .endianness = DEVICE_LITTLE_ENDIAN,
604
};
605

606
static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque,
607
                                           hwaddr addr, unsigned size)
608
{
609
    VFIONvidia3d0Quirk *quirk = opaque;
610
    VFIOPCIDevice *vdev = quirk->vdev;
611
    VFIONvidia3d0State old_state = quirk->state;
612
    uint64_t data = vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
613
                                  addr + 0x10, size);
614

615
    quirk->state = NONE;
616

617
    if (old_state == READ &&
618
        (quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
619
        uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
620

621
        data = vfio_pci_read_config(&vdev->pdev, offset, size);
622
        trace_vfio_quirk_nvidia_3d0_read(vdev->vbasedev.name,
623
                                         offset, size, data);
624
    }
625

626
    return data;
627
}
628

629
static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr,
630
                                        uint64_t data, unsigned size)
631
{
632
    VFIONvidia3d0Quirk *quirk = opaque;
633
    VFIOPCIDevice *vdev = quirk->vdev;
634
    VFIONvidia3d0State old_state = quirk->state;
635

636
    quirk->state = NONE;
637

638
    if (old_state == SELECT) {
639
        quirk->offset = (uint32_t)data;
640
        quirk->state = WINDOW;
641
        trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
642
                                          nv3d0_states[quirk->state]);
643
    } else if (old_state == WRITE) {
644
        if ((quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
645
            uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
646

647
            vfio_pci_write_config(&vdev->pdev, offset, data, size);
648
            trace_vfio_quirk_nvidia_3d0_write(vdev->vbasedev.name,
649
                                              offset, data, size);
650
            return;
651
        }
652
    }
653

654
    vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
655
                   addr + 0x10, data, size);
656
}
657

658
static const MemoryRegionOps vfio_nvidia_3d0_quirk = {
659
    .read = vfio_nvidia_3d0_quirk_read,
660
    .write = vfio_nvidia_3d0_quirk_write,
661
    .endianness = DEVICE_LITTLE_ENDIAN,
662
};
663

664
static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
665
{
666
    VFIOQuirk *quirk;
667
    VFIONvidia3d0Quirk *data;
668

669
    if (vdev->no_geforce_quirks ||
670
        !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
671
        !vdev->bars[1].region.size) {
672
        return;
673
    }
674

675
    quirk = vfio_quirk_alloc(2);
676
    quirk->data = data = g_malloc0(sizeof(*data));
677
    data->vdev = vdev;
678

679
    memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_nvidia_3d4_quirk,
680
                          data, "vfio-nvidia-3d4-quirk", 2);
681
    memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
682
                                0x14 /* 0x3c0 + 0x14 */, &quirk->mem[0]);
683

684
    memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_nvidia_3d0_quirk,
685
                          data, "vfio-nvidia-3d0-quirk", 2);
686
    memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
687
                                0x10 /* 0x3c0 + 0x10 */, &quirk->mem[1]);
688

689
    QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks,
690
                      quirk, next);
691

692
    trace_vfio_quirk_nvidia_3d0_probe(vdev->vbasedev.name);
693
}
694

695
/*
696
 * The second quirk is documented in envytools.  The I/O port BAR5 is just
697
 * a set of address/data ports to the MMIO BARs.  The BAR we care about is
698
 * again BAR0.  This backdoor is apparently a bit newer than the one above
699
 * so we need to not only trap 256 bytes @0x1800, but all of PCI config
700
 * space, including extended space is available at the 4k @0x88000.
701
 */
702
typedef struct VFIONvidiaBAR5Quirk {
703
    uint32_t master;
704
    uint32_t enable;
705
    MemoryRegion *addr_mem;
706
    MemoryRegion *data_mem;
707
    bool enabled;
708
    VFIOConfigWindowQuirk window; /* last for match data */
709
} VFIONvidiaBAR5Quirk;
710

711
static void vfio_nvidia_bar5_enable(VFIONvidiaBAR5Quirk *bar5)
712
{
713
    VFIOPCIDevice *vdev = bar5->window.vdev;
714

715
    if (((bar5->master & bar5->enable) & 0x1) == bar5->enabled) {
716
        return;
717
    }
718

719
    bar5->enabled = !bar5->enabled;
720
    trace_vfio_quirk_nvidia_bar5_state(vdev->vbasedev.name,
721
                                       bar5->enabled ?  "Enable" : "Disable");
722
    memory_region_set_enabled(bar5->addr_mem, bar5->enabled);
723
    memory_region_set_enabled(bar5->data_mem, bar5->enabled);
724
}
725

726
static uint64_t vfio_nvidia_bar5_quirk_master_read(void *opaque,
727
                                                   hwaddr addr, unsigned size)
728
{
729
    VFIONvidiaBAR5Quirk *bar5 = opaque;
730
    VFIOPCIDevice *vdev = bar5->window.vdev;
731

732
    return vfio_region_read(&vdev->bars[5].region, addr, size);
733
}
734

735
static void vfio_nvidia_bar5_quirk_master_write(void *opaque, hwaddr addr,
736
                                                uint64_t data, unsigned size)
737
{
738
    VFIONvidiaBAR5Quirk *bar5 = opaque;
739
    VFIOPCIDevice *vdev = bar5->window.vdev;
740

741
    vfio_region_write(&vdev->bars[5].region, addr, data, size);
742

743
    bar5->master = data;
744
    vfio_nvidia_bar5_enable(bar5);
745
}
746

747
static const MemoryRegionOps vfio_nvidia_bar5_quirk_master = {
748
    .read = vfio_nvidia_bar5_quirk_master_read,
749
    .write = vfio_nvidia_bar5_quirk_master_write,
750
    .endianness = DEVICE_LITTLE_ENDIAN,
751
};
752

753
static uint64_t vfio_nvidia_bar5_quirk_enable_read(void *opaque,
754
                                                   hwaddr addr, unsigned size)
755
{
756
    VFIONvidiaBAR5Quirk *bar5 = opaque;
757
    VFIOPCIDevice *vdev = bar5->window.vdev;
758

759
    return vfio_region_read(&vdev->bars[5].region, addr + 4, size);
760
}
761

762
static void vfio_nvidia_bar5_quirk_enable_write(void *opaque, hwaddr addr,
763
                                                uint64_t data, unsigned size)
764
{
765
    VFIONvidiaBAR5Quirk *bar5 = opaque;
766
    VFIOPCIDevice *vdev = bar5->window.vdev;
767

768
    vfio_region_write(&vdev->bars[5].region, addr + 4, data, size);
769

770
    bar5->enable = data;
771
    vfio_nvidia_bar5_enable(bar5);
772
}
773

774
static const MemoryRegionOps vfio_nvidia_bar5_quirk_enable = {
775
    .read = vfio_nvidia_bar5_quirk_enable_read,
776
    .write = vfio_nvidia_bar5_quirk_enable_write,
777
    .endianness = DEVICE_LITTLE_ENDIAN,
778
};
779

780
static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
781
{
782
    VFIOQuirk *quirk;
783
    VFIONvidiaBAR5Quirk *bar5;
784
    VFIOConfigWindowQuirk *window;
785

786
    if (vdev->no_geforce_quirks ||
787
        !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
788
        !vdev->vga || nr != 5 || !vdev->bars[5].ioport) {
789
        return;
790
    }
791

792
    quirk = vfio_quirk_alloc(4);
793
    bar5 = quirk->data = g_malloc0(sizeof(*bar5) +
794
                                   (sizeof(VFIOConfigWindowMatch) * 2));
795
    window = &bar5->window;
796

797
    window->vdev = vdev;
798
    window->address_offset = 0x8;
799
    window->data_offset = 0xc;
800
    window->nr_matches = 2;
801
    window->matches[0].match = 0x1800;
802
    window->matches[0].mask = PCI_CONFIG_SPACE_SIZE - 1;
803
    window->matches[1].match = 0x88000;
804
    window->matches[1].mask = vdev->config_size - 1;
805
    window->bar = nr;
806
    window->addr_mem = bar5->addr_mem = &quirk->mem[0];
807
    window->data_mem = bar5->data_mem = &quirk->mem[1];
808

809
    memory_region_init_io(window->addr_mem, OBJECT(vdev),
810
                          &vfio_generic_window_address_quirk, window,
811
                          "vfio-nvidia-bar5-window-address-quirk", 4);
812
    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
813
                                        window->address_offset,
814
                                        window->addr_mem, 1);
815
    memory_region_set_enabled(window->addr_mem, false);
816

817
    memory_region_init_io(window->data_mem, OBJECT(vdev),
818
                          &vfio_generic_window_data_quirk, window,
819
                          "vfio-nvidia-bar5-window-data-quirk", 4);
820
    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
821
                                        window->data_offset,
822
                                        window->data_mem, 1);
823
    memory_region_set_enabled(window->data_mem, false);
824

825
    memory_region_init_io(&quirk->mem[2], OBJECT(vdev),
826
                          &vfio_nvidia_bar5_quirk_master, bar5,
827
                          "vfio-nvidia-bar5-master-quirk", 4);
828
    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
829
                                        0, &quirk->mem[2], 1);
830

831
    memory_region_init_io(&quirk->mem[3], OBJECT(vdev),
832
                          &vfio_nvidia_bar5_quirk_enable, bar5,
833
                          "vfio-nvidia-bar5-enable-quirk", 4);
834
    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
835
                                        4, &quirk->mem[3], 1);
836

837
    QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
838

839
    trace_vfio_quirk_nvidia_bar5_probe(vdev->vbasedev.name);
840
}
841

842
typedef struct LastDataSet {
843
    VFIOQuirk *quirk;
844
    hwaddr addr;
845
    uint64_t data;
846
    unsigned size;
847
    int hits;
848
    int added;
849
} LastDataSet;
850

851
#define MAX_DYN_IOEVENTFD 10
852
#define HITS_FOR_IOEVENTFD 10
853

854
/*
855
 * Finally, BAR0 itself.  We want to redirect any accesses to either
856
 * 0x1800 or 0x88000 through the PCI config space access functions.
857
 */
858
static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr,
859
                                           uint64_t data, unsigned size)
860
{
861
    VFIOConfigMirrorQuirk *mirror = opaque;
862
    VFIOPCIDevice *vdev = mirror->vdev;
863
    PCIDevice *pdev = &vdev->pdev;
864
    LastDataSet *last = (LastDataSet *)&mirror->data;
865

866
    vfio_generic_quirk_mirror_write(opaque, addr, data, size);
867

868
    /*
869
     * Nvidia seems to acknowledge MSI interrupts by writing 0xff to the
870
     * MSI capability ID register.  Both the ID and next register are
871
     * read-only, so we allow writes covering either of those to real hw.
872
     */
873
    if ((pdev->cap_present & QEMU_PCI_CAP_MSI) &&
874
        vfio_range_contained(addr, size, pdev->msi_cap, PCI_MSI_FLAGS)) {
875
        vfio_region_write(&vdev->bars[mirror->bar].region,
876
                          addr + mirror->offset, data, size);
877
        trace_vfio_quirk_nvidia_bar0_msi_ack(vdev->vbasedev.name);
878
    }
879

880
    /*
881
     * Automatically add an ioeventfd to handle any repeated write with the
882
     * same data and size above the standard PCI config space header.  This is
883
     * primarily expected to accelerate the MSI-ACK behavior, such as noted
884
     * above.  Current hardware/drivers should trigger an ioeventfd at config
885
     * offset 0x704 (region offset 0x88704), with data 0x0, size 4.
886
     *
887
     * The criteria of 10 successive hits is arbitrary but reliably adds the
888
     * MSI-ACK region.  Note that as some writes are bypassed via the ioeventfd,
889
     * the remaining ones have a greater chance of being seen successively.
890
     * To avoid the pathological case of burning up all of QEMU's open file
891
     * handles, arbitrarily limit this algorithm from adding no more than 10
892
     * ioeventfds, print an error if we would have added an 11th, and then
893
     * stop counting.
894
     */
895
    if (!vdev->no_kvm_ioeventfd &&
896
        addr >= PCI_STD_HEADER_SIZEOF && last->added <= MAX_DYN_IOEVENTFD) {
897
        if (addr != last->addr || data != last->data || size != last->size) {
898
            last->addr = addr;
899
            last->data = data;
900
            last->size = size;
901
            last->hits = 1;
902
        } else if (++last->hits >= HITS_FOR_IOEVENTFD) {
903
            if (last->added < MAX_DYN_IOEVENTFD) {
904
                VFIOIOEventFD *ioeventfd;
905
                ioeventfd = vfio_ioeventfd_init(vdev, mirror->mem, addr, size,
906
                                        data, &vdev->bars[mirror->bar].region,
907
                                        mirror->offset + addr, true);
908
                if (ioeventfd) {
909
                    VFIOQuirk *quirk = last->quirk;
910

911
                    QLIST_INSERT_HEAD(&quirk->ioeventfds, ioeventfd, next);
912
                    last->added++;
913
                }
914
            } else {
915
                last->added++;
916
                warn_report("NVIDIA ioeventfd queue full for %s, unable to "
917
                            "accelerate 0x%"HWADDR_PRIx", data 0x%"PRIx64", "
918
                            "size %u", vdev->vbasedev.name, addr, data, size);
919
            }
920
        }
921
    }
922
}
923

924
static const MemoryRegionOps vfio_nvidia_mirror_quirk = {
925
    .read = vfio_generic_quirk_mirror_read,
926
    .write = vfio_nvidia_quirk_mirror_write,
927
    .endianness = DEVICE_LITTLE_ENDIAN,
928
};
929

930
static void vfio_nvidia_bar0_quirk_reset(VFIOPCIDevice *vdev, VFIOQuirk *quirk)
931
{
932
    VFIOConfigMirrorQuirk *mirror = quirk->data;
933
    LastDataSet *last = (LastDataSet *)&mirror->data;
934

935
    last->addr = last->data = last->size = last->hits = last->added = 0;
936

937
    vfio_drop_dynamic_eventfds(vdev, quirk);
938
}
939

940
static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
941
{
942
    VFIOQuirk *quirk;
943
    VFIOConfigMirrorQuirk *mirror;
944
    LastDataSet *last;
945

946
    if (vdev->no_geforce_quirks ||
947
        !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
948
        !vfio_is_vga(vdev) || nr != 0) {
949
        return;
950
    }
951

952
    quirk = vfio_quirk_alloc(1);
953
    quirk->reset = vfio_nvidia_bar0_quirk_reset;
954
    mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
955
    mirror->mem = quirk->mem;
956
    mirror->vdev = vdev;
957
    mirror->offset = 0x88000;
958
    mirror->bar = nr;
959
    last = (LastDataSet *)&mirror->data;
960
    last->quirk = quirk;
961

962
    memory_region_init_io(mirror->mem, OBJECT(vdev),
963
                          &vfio_nvidia_mirror_quirk, mirror,
964
                          "vfio-nvidia-bar0-88000-mirror-quirk",
965
                          vdev->config_size);
966
    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
967
                                        mirror->offset, mirror->mem, 1);
968

969
    QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
970

971
    /* The 0x1800 offset mirror only seems to get used by legacy VGA */
972
    if (vdev->vga) {
973
        quirk = vfio_quirk_alloc(1);
974
        quirk->reset = vfio_nvidia_bar0_quirk_reset;
975
        mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
976
        mirror->mem = quirk->mem;
977
        mirror->vdev = vdev;
978
        mirror->offset = 0x1800;
979
        mirror->bar = nr;
980
        last = (LastDataSet *)&mirror->data;
981
        last->quirk = quirk;
982

983
        memory_region_init_io(mirror->mem, OBJECT(vdev),
984
                              &vfio_nvidia_mirror_quirk, mirror,
985
                              "vfio-nvidia-bar0-1800-mirror-quirk",
986
                              PCI_CONFIG_SPACE_SIZE);
987
        memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
988
                                            mirror->offset, mirror->mem, 1);
989

990
        QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
991
    }
992

993
    trace_vfio_quirk_nvidia_bar0_probe(vdev->vbasedev.name);
994
}
995

996
/*
997
 * TODO - Some Nvidia devices provide config access to their companion HDA
998
 * device and even to their parent bridge via these config space mirrors.
999
 * Add quirks for those regions.
1000
 */
1001

1002
#define PCI_VENDOR_ID_REALTEK 0x10ec
1003

1004
/*
1005
 * RTL8168 devices have a backdoor that can access the MSI-X table.  At BAR2
1006
 * offset 0x70 there is a dword data register, offset 0x74 is a dword address
1007
 * register.  According to the Linux r8169 driver, the MSI-X table is addressed
1008
 * when the "type" portion of the address register is set to 0x1.  This appears
1009
 * to be bits 16:30.  Bit 31 is both a write indicator and some sort of
1010
 * "address latched" indicator.  Bits 12:15 are a mask field, which we can
1011
 * ignore because the MSI-X table should always be accessed as a dword (full
1012
 * mask).  Bits 0:11 is offset within the type.
1013
 *
1014
 * Example trace:
1015
 *
1016
 * Read from MSI-X table offset 0
1017
 * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x1f000, 4) // store read addr
1018
 * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x8001f000 // latch
1019
 * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x70, 4) = 0xfee00398 // read data
1020
 *
1021
 * Write 0xfee00000 to MSI-X table offset 0
1022
 * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x70, 0xfee00000, 4) // write data
1023
 * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x8001f000, 4) // do write
1024
 * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x1f000 // complete
1025
 */
1026
typedef struct VFIOrtl8168Quirk {
1027
    VFIOPCIDevice *vdev;
1028
    uint32_t addr;
1029
    uint32_t data;
1030
    bool enabled;
1031
} VFIOrtl8168Quirk;
1032

1033
static uint64_t vfio_rtl8168_quirk_address_read(void *opaque,
1034
                                                hwaddr addr, unsigned size)
1035
{
1036
    VFIOrtl8168Quirk *rtl = opaque;
1037
    VFIOPCIDevice *vdev = rtl->vdev;
1038
    uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x74, size);
1039

1040
    if (rtl->enabled) {
1041
        data = rtl->addr ^ 0x80000000U; /* latch/complete */
1042
        trace_vfio_quirk_rtl8168_fake_latch(vdev->vbasedev.name, data);
1043
    }
1044

1045
    return data;
1046
}
1047

1048
static void vfio_rtl8168_quirk_address_write(void *opaque, hwaddr addr,
1049
                                             uint64_t data, unsigned size)
1050
{
1051
    VFIOrtl8168Quirk *rtl = opaque;
1052
    VFIOPCIDevice *vdev = rtl->vdev;
1053

1054
    rtl->enabled = false;
1055

1056
    if ((data & 0x7fff0000) == 0x10000) { /* MSI-X table */
1057
        rtl->enabled = true;
1058
        rtl->addr = (uint32_t)data;
1059

1060
        if (data & 0x80000000U) { /* Do write */
1061
            if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) {
1062
                hwaddr offset = data & 0xfff;
1063
                uint64_t val = rtl->data;
1064

1065
                trace_vfio_quirk_rtl8168_msix_write(vdev->vbasedev.name,
1066
                                                    (uint16_t)offset, val);
1067

1068
                /* Write to the proper guest MSI-X table instead */
1069
                memory_region_dispatch_write(&vdev->pdev.msix_table_mmio,
1070
                                             offset, val,
1071
                                             size_memop(size) | MO_LE,
1072
                                             MEMTXATTRS_UNSPECIFIED);
1073
            }
1074
            return; /* Do not write guest MSI-X data to hardware */
1075
        }
1076
    }
1077

1078
    vfio_region_write(&vdev->bars[2].region, addr + 0x74, data, size);
1079
}
1080

1081
static const MemoryRegionOps vfio_rtl_address_quirk = {
1082
    .read = vfio_rtl8168_quirk_address_read,
1083
    .write = vfio_rtl8168_quirk_address_write,
1084
    .valid = {
1085
        .min_access_size = 4,
1086
        .max_access_size = 4,
1087
        .unaligned = false,
1088
    },
1089
    .endianness = DEVICE_LITTLE_ENDIAN,
1090
};
1091

1092
static uint64_t vfio_rtl8168_quirk_data_read(void *opaque,
1093
                                             hwaddr addr, unsigned size)
1094
{
1095
    VFIOrtl8168Quirk *rtl = opaque;
1096
    VFIOPCIDevice *vdev = rtl->vdev;
1097
    uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x70, size);
1098

1099
    if (rtl->enabled && (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) {
1100
        hwaddr offset = rtl->addr & 0xfff;
1101
        memory_region_dispatch_read(&vdev->pdev.msix_table_mmio, offset,
1102
                                    &data, size_memop(size) | MO_LE,
1103
                                    MEMTXATTRS_UNSPECIFIED);
1104
        trace_vfio_quirk_rtl8168_msix_read(vdev->vbasedev.name, offset, data);
1105
    }
1106

1107
    return data;
1108
}
1109

1110
static void vfio_rtl8168_quirk_data_write(void *opaque, hwaddr addr,
1111
                                          uint64_t data, unsigned size)
1112
{
1113
    VFIOrtl8168Quirk *rtl = opaque;
1114
    VFIOPCIDevice *vdev = rtl->vdev;
1115

1116
    rtl->data = (uint32_t)data;
1117

1118
    vfio_region_write(&vdev->bars[2].region, addr + 0x70, data, size);
1119
}
1120

1121
static const MemoryRegionOps vfio_rtl_data_quirk = {
1122
    .read = vfio_rtl8168_quirk_data_read,
1123
    .write = vfio_rtl8168_quirk_data_write,
1124
    .valid = {
1125
        .min_access_size = 4,
1126
        .max_access_size = 4,
1127
        .unaligned = false,
1128
    },
1129
    .endianness = DEVICE_LITTLE_ENDIAN,
1130
};
1131

1132
static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
1133
{
1134
    VFIOQuirk *quirk;
1135
    VFIOrtl8168Quirk *rtl;
1136

1137
    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_REALTEK, 0x8168) || nr != 2) {
1138
        return;
1139
    }
1140

1141
    quirk = vfio_quirk_alloc(2);
1142
    quirk->data = rtl = g_malloc0(sizeof(*rtl));
1143
    rtl->vdev = vdev;
1144

1145
    memory_region_init_io(&quirk->mem[0], OBJECT(vdev),
1146
                          &vfio_rtl_address_quirk, rtl,
1147
                          "vfio-rtl8168-window-address-quirk", 4);
1148
    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
1149
                                        0x74, &quirk->mem[0], 1);
1150

1151
    memory_region_init_io(&quirk->mem[1], OBJECT(vdev),
1152
                          &vfio_rtl_data_quirk, rtl,
1153
                          "vfio-rtl8168-window-data-quirk", 4);
1154
    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
1155
                                        0x70, &quirk->mem[1], 1);
1156

1157
    QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
1158

1159
    trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name);
1160
}
1161

1162
#define IGD_ASLS 0xfc /* ASL Storage Register */
1163

1164
/*
1165
 * The OpRegion includes the Video BIOS Table, which seems important for
1166
 * telling the driver what sort of outputs it has.  Without this, the device
1167
 * may work in the guest, but we may not get output.  This also requires BIOS
1168
 * support to reserve and populate a section of guest memory sufficient for
1169
 * the table and to write the base address of that memory to the ASLS register
1170
 * of the IGD device.
1171
 */
1172
bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
1173
                                struct vfio_region_info *info, Error **errp)
1174
{
1175
    int ret;
1176

1177
    vdev->igd_opregion = g_malloc0(info->size);
1178
    ret = pread(vdev->vbasedev.fd, vdev->igd_opregion,
1179
                info->size, info->offset);
1180
    if (ret != info->size) {
1181
        error_setg(errp, "failed to read IGD OpRegion");
1182
        g_free(vdev->igd_opregion);
1183
        vdev->igd_opregion = NULL;
1184
        return false;
1185
    }
1186

1187
    /*
1188
     * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to
1189
     * allocate 32bit reserved memory for, copy these contents into, and write
1190
     * the reserved memory base address to the device ASLS register at 0xFC.
1191
     * Alignment of this reserved region seems flexible, but using a 4k page
1192
     * alignment seems to work well.  This interface assumes a single IGD
1193
     * device, which may be at VM address 00:02.0 in legacy mode or another
1194
     * address in UPT mode.
1195
     *
1196
     * NB, there may be future use cases discovered where the VM should have
1197
     * direct interaction with the host OpRegion, in which case the write to
1198
     * the ASLS register would trigger MemoryRegion setup to enable that.
1199
     */
1200
    fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion",
1201
                    vdev->igd_opregion, info->size);
1202

1203
    trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name);
1204

1205
    pci_set_long(vdev->pdev.config + IGD_ASLS, 0);
1206
    pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0);
1207
    pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0);
1208

1209
    return true;
1210
}
1211

1212
/*
1213
 * Common quirk probe entry points.
1214
 */
1215
void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
1216
{
1217
    vfio_vga_probe_ati_3c3_quirk(vdev);
1218
    vfio_vga_probe_nvidia_3d0_quirk(vdev);
1219
}
1220

1221
void vfio_vga_quirk_exit(VFIOPCIDevice *vdev)
1222
{
1223
    VFIOQuirk *quirk;
1224
    int i, j;
1225

1226
    for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
1227
        QLIST_FOREACH(quirk, &vdev->vga->region[i].quirks, next) {
1228
            for (j = 0; j < quirk->nr_mem; j++) {
1229
                memory_region_del_subregion(&vdev->vga->region[i].mem,
1230
                                            &quirk->mem[j]);
1231
            }
1232
        }
1233
    }
1234
}
1235

1236
void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev)
1237
{
1238
    int i, j;
1239

1240
    for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
1241
        while (!QLIST_EMPTY(&vdev->vga->region[i].quirks)) {
1242
            VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga->region[i].quirks);
1243
            QLIST_REMOVE(quirk, next);
1244
            for (j = 0; j < quirk->nr_mem; j++) {
1245
                object_unparent(OBJECT(&quirk->mem[j]));
1246
            }
1247
            g_free(quirk->mem);
1248
            g_free(quirk->data);
1249
            g_free(quirk);
1250
        }
1251
    }
1252
}
1253

1254
void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
1255
{
1256
    vfio_probe_ati_bar4_quirk(vdev, nr);
1257
    vfio_probe_ati_bar2_quirk(vdev, nr);
1258
    vfio_probe_nvidia_bar5_quirk(vdev, nr);
1259
    vfio_probe_nvidia_bar0_quirk(vdev, nr);
1260
    vfio_probe_rtl8168_bar2_quirk(vdev, nr);
1261
#ifdef CONFIG_VFIO_IGD
1262
    vfio_probe_igd_bar4_quirk(vdev, nr);
1263
#endif
1264
}
1265

1266
void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr)
1267
{
1268
    VFIOBAR *bar = &vdev->bars[nr];
1269
    VFIOQuirk *quirk;
1270
    int i;
1271

1272
    QLIST_FOREACH(quirk, &bar->quirks, next) {
1273
        while (!QLIST_EMPTY(&quirk->ioeventfds)) {
1274
            vfio_ioeventfd_exit(vdev, QLIST_FIRST(&quirk->ioeventfds));
1275
        }
1276

1277
        for (i = 0; i < quirk->nr_mem; i++) {
1278
            memory_region_del_subregion(bar->region.mem, &quirk->mem[i]);
1279
        }
1280
    }
1281
}
1282

1283
void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr)
1284
{
1285
    VFIOBAR *bar = &vdev->bars[nr];
1286
    int i;
1287

1288
    while (!QLIST_EMPTY(&bar->quirks)) {
1289
        VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
1290
        QLIST_REMOVE(quirk, next);
1291
        for (i = 0; i < quirk->nr_mem; i++) {
1292
            object_unparent(OBJECT(&quirk->mem[i]));
1293
        }
1294
        g_free(quirk->mem);
1295
        g_free(quirk->data);
1296
        g_free(quirk);
1297
    }
1298
}
1299

1300
/*
1301
 * Reset quirks
1302
 */
1303
void vfio_quirk_reset(VFIOPCIDevice *vdev)
1304
{
1305
    int i;
1306

1307
    for (i = 0; i < PCI_ROM_SLOT; i++) {
1308
        VFIOQuirk *quirk;
1309
        VFIOBAR *bar = &vdev->bars[i];
1310

1311
        QLIST_FOREACH(quirk, &bar->quirks, next) {
1312
            if (quirk->reset) {
1313
                quirk->reset(vdev, quirk);
1314
            }
1315
        }
1316
    }
1317
}
1318

1319
/*
1320
 * AMD Radeon PCI config reset, based on Linux:
1321
 *   drivers/gpu/drm/radeon/ci_smc.c:ci_is_smc_running()
1322
 *   drivers/gpu/drm/radeon/radeon_device.c:radeon_pci_config_reset
1323
 *   drivers/gpu/drm/radeon/ci_smc.c:ci_reset_smc()
1324
 *   drivers/gpu/drm/radeon/ci_smc.c:ci_stop_smc_clock()
1325
 * IDs: include/drm/drm_pciids.h
1326
 * Registers: http://cgit.freedesktop.org/~agd5f/linux/commit/?id=4e2aa447f6f0
1327
 *
1328
 * Bonaire and Hawaii GPUs do not respond to a bus reset.  This is a bug in the
1329
 * hardware that should be fixed on future ASICs.  The symptom of this is that
1330
 * once the accerlated driver loads, Windows guests will bsod on subsequent
1331
 * attmpts to load the driver, such as after VM reset or shutdown/restart.  To
1332
 * work around this, we do an AMD specific PCI config reset, followed by an SMC
1333
 * reset.  The PCI config reset only works if SMC firmware is running, so we
1334
 * have a dependency on the state of the device as to whether this reset will
1335
 * be effective.  There are still cases where we won't be able to kick the
1336
 * device into working, but this greatly improves the usability overall.  The
1337
 * config reset magic is relatively common on AMD GPUs, but the setup and SMC
1338
 * poking is largely ASIC specific.
1339
 */
1340
static bool vfio_radeon_smc_is_running(VFIOPCIDevice *vdev)
1341
{
1342
    uint32_t clk, pc_c;
1343

1344
    /*
1345
     * Registers 200h and 204h are index and data registers for accessing
1346
     * indirect configuration registers within the device.
1347
     */
1348
    vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
1349
    clk = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1350
    vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000370, 4);
1351
    pc_c = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1352

1353
    return (!(clk & 1) && (0x20100 <= pc_c));
1354
}
1355

1356
/*
1357
 * The scope of a config reset is controlled by a mode bit in the misc register
1358
 * and a fuse, exposed as a bit in another register.  The fuse is the default
1359
 * (0 = GFX, 1 = whole GPU), the misc bit is a toggle, with the formula
1360
 * scope = !(misc ^ fuse), where the resulting scope is defined the same as
1361
 * the fuse.  A truth table therefore tells us that if misc == fuse, we need
1362
 * to flip the value of the bit in the misc register.
1363
 */
1364
static void vfio_radeon_set_gfx_only_reset(VFIOPCIDevice *vdev)
1365
{
1366
    uint32_t misc, fuse;
1367
    bool a, b;
1368

1369
    vfio_region_write(&vdev->bars[5].region, 0x200, 0xc00c0000, 4);
1370
    fuse = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1371
    b = fuse & 64;
1372

1373
    vfio_region_write(&vdev->bars[5].region, 0x200, 0xc0000010, 4);
1374
    misc = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1375
    a = misc & 2;
1376

1377
    if (a == b) {
1378
        vfio_region_write(&vdev->bars[5].region, 0x204, misc ^ 2, 4);
1379
        vfio_region_read(&vdev->bars[5].region, 0x204, 4); /* flush */
1380
    }
1381
}
1382

1383
static int vfio_radeon_reset(VFIOPCIDevice *vdev)
1384
{
1385
    PCIDevice *pdev = &vdev->pdev;
1386
    int i, ret = 0;
1387
    uint32_t data;
1388

1389
    /* Defer to a kernel implemented reset */
1390
    if (vdev->vbasedev.reset_works) {
1391
        trace_vfio_quirk_ati_bonaire_reset_skipped(vdev->vbasedev.name);
1392
        return -ENODEV;
1393
    }
1394

1395
    /* Enable only memory BAR access */
1396
    vfio_pci_write_config(pdev, PCI_COMMAND, PCI_COMMAND_MEMORY, 2);
1397

1398
    /* Reset only works if SMC firmware is loaded and running */
1399
    if (!vfio_radeon_smc_is_running(vdev)) {
1400
        ret = -EINVAL;
1401
        trace_vfio_quirk_ati_bonaire_reset_no_smc(vdev->vbasedev.name);
1402
        goto out;
1403
    }
1404

1405
    /* Make sure only the GFX function is reset */
1406
    vfio_radeon_set_gfx_only_reset(vdev);
1407

1408
    /* AMD PCI config reset */
1409
    vfio_pci_write_config(pdev, 0x7c, 0x39d5e86b, 4);
1410
    usleep(100);
1411

1412
    /* Read back the memory size to make sure we're out of reset */
1413
    for (i = 0; i < 100000; i++) {
1414
        if (vfio_region_read(&vdev->bars[5].region, 0x5428, 4) != 0xffffffff) {
1415
            goto reset_smc;
1416
        }
1417
        usleep(1);
1418
    }
1419

1420
    trace_vfio_quirk_ati_bonaire_reset_timeout(vdev->vbasedev.name);
1421

1422
reset_smc:
1423
    /* Reset SMC */
1424
    vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000000, 4);
1425
    data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1426
    data |= 1;
1427
    vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
1428

1429
    /* Disable SMC clock */
1430
    vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
1431
    data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1432
    data |= 1;
1433
    vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
1434

1435
    trace_vfio_quirk_ati_bonaire_reset_done(vdev->vbasedev.name);
1436

1437
out:
1438
    /* Restore PCI command register */
1439
    vfio_pci_write_config(pdev, PCI_COMMAND, 0, 2);
1440

1441
    return ret;
1442
}
1443

1444
void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev)
1445
{
1446
    switch (vdev->vendor_id) {
1447
    case 0x1002:
1448
        switch (vdev->device_id) {
1449
        /* Bonaire */
1450
        case 0x6649: /* Bonaire [FirePro W5100] */
1451
        case 0x6650:
1452
        case 0x6651:
1453
        case 0x6658: /* Bonaire XTX [Radeon R7 260X] */
1454
        case 0x665c: /* Bonaire XT [Radeon HD 7790/8770 / R9 260 OEM] */
1455
        case 0x665d: /* Bonaire [Radeon R7 200 Series] */
1456
        /* Hawaii */
1457
        case 0x67A0: /* Hawaii XT GL [FirePro W9100] */
1458
        case 0x67A1: /* Hawaii PRO GL [FirePro W8100] */
1459
        case 0x67A2:
1460
        case 0x67A8:
1461
        case 0x67A9:
1462
        case 0x67AA:
1463
        case 0x67B0: /* Hawaii XT [Radeon R9 290X] */
1464
        case 0x67B1: /* Hawaii PRO [Radeon R9 290] */
1465
        case 0x67B8:
1466
        case 0x67B9:
1467
        case 0x67BA:
1468
        case 0x67BE:
1469
            vdev->resetfn = vfio_radeon_reset;
1470
            trace_vfio_quirk_ati_bonaire_reset(vdev->vbasedev.name);
1471
            break;
1472
        }
1473
        break;
1474
    }
1475
}
1476

1477
/*
1478
 * The NVIDIA GPUDirect P2P Vendor capability allows the user to specify
1479
 * devices as a member of a clique.  Devices within the same clique ID
1480
 * are capable of direct P2P.  It's the user's responsibility that this
1481
 * is correct.  The spec says that this may reside at any unused config
1482
 * offset, but reserves and recommends hypervisors place this at C8h.
1483
 * The spec also states that the hypervisor should place this capability
1484
 * at the end of the capability list, thus next is defined as 0h.
1485
 *
1486
 * +----------------+----------------+----------------+----------------+
1487
 * | sig 7:0 ('P')  |  vndr len (8h) |    next (0h)   |   cap id (9h)  |
1488
 * +----------------+----------------+----------------+----------------+
1489
 * | rsvd 15:7(0h),id 6:3,ver 2:0(0h)|          sig 23:8 ('P2')        |
1490
 * +---------------------------------+---------------------------------+
1491
 *
1492
 * https://lists.gnu.org/archive/html/qemu-devel/2017-08/pdfUda5iEpgOS.pdf
1493
 *
1494
 * Specification for Turning and later GPU architectures:
1495
 * https://lists.gnu.org/archive/html/qemu-devel/2023-06/pdf142OR4O4c2.pdf
1496
 */
1497
static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v,
1498
                                       const char *name, void *opaque,
1499
                                       Error **errp)
1500
{
1501
    Property *prop = opaque;
1502
    uint8_t *ptr = object_field_prop_ptr(obj, prop);
1503

1504
    visit_type_uint8(v, name, ptr, errp);
1505
}
1506

1507
static void set_nv_gpudirect_clique_id(Object *obj, Visitor *v,
1508
                                       const char *name, void *opaque,
1509
                                       Error **errp)
1510
{
1511
    Property *prop = opaque;
1512
    uint8_t value, *ptr = object_field_prop_ptr(obj, prop);
1513

1514
    if (!visit_type_uint8(v, name, &value, errp)) {
1515
        return;
1516
    }
1517

1518
    if (value & ~0xF) {
1519
        error_setg(errp, "Property %s: valid range 0-15", name);
1520
        return;
1521
    }
1522

1523
    *ptr = value;
1524
}
1525

1526
const PropertyInfo qdev_prop_nv_gpudirect_clique = {
1527
    .name = "uint4",
1528
    .description = "NVIDIA GPUDirect Clique ID (0 - 15)",
1529
    .get = get_nv_gpudirect_clique_id,
1530
    .set = set_nv_gpudirect_clique_id,
1531
};
1532

1533
static bool is_valid_std_cap_offset(uint8_t pos)
1534
{
1535
    return (pos >= PCI_STD_HEADER_SIZEOF &&
1536
            pos <= (PCI_CFG_SPACE_SIZE - PCI_CAP_SIZEOF));
1537
}
1538

1539
static bool vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
1540
{
1541
    ERRP_GUARD();
1542
    PCIDevice *pdev = &vdev->pdev;
1543
    int ret, pos;
1544
    bool c8_conflict = false, d4_conflict = false;
1545
    uint8_t tmp;
1546

1547
    if (vdev->nv_gpudirect_clique == 0xFF) {
1548
        return true;
1549
    }
1550

1551
    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID)) {
1552
        error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid device vendor");
1553
        return false;
1554
    }
1555

1556
    if (pci_get_byte(pdev->config + PCI_CLASS_DEVICE + 1) !=
1557
        PCI_BASE_CLASS_DISPLAY) {
1558
        error_setg(errp, "NVIDIA GPUDirect Clique ID: unsupported PCI class");
1559
        return false;
1560
    }
1561

1562
    /*
1563
     * Per the updated specification above, it's recommended to use offset
1564
     * D4h for Turing and later GPU architectures due to a conflict of the
1565
     * MSI-X capability at C8h.  We don't know how to determine the GPU
1566
     * architecture, instead we walk the capability chain to mark conflicts
1567
     * and choose one or error based on the result.
1568
     *
1569
     * NB. Cap list head in pdev->config is already cleared, read from device.
1570
     */
1571
    ret = pread(vdev->vbasedev.fd, &tmp, 1,
1572
                vdev->config_offset + PCI_CAPABILITY_LIST);
1573
    if (ret != 1 || !is_valid_std_cap_offset(tmp)) {
1574
        error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list");
1575
        return false;
1576
    }
1577

1578
    do {
1579
        if (tmp == 0xC8) {
1580
            c8_conflict = true;
1581
        } else if (tmp == 0xD4) {
1582
            d4_conflict = true;
1583
        }
1584
        tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT];
1585
    } while (is_valid_std_cap_offset(tmp));
1586

1587
    if (!c8_conflict) {
1588
        pos = 0xC8;
1589
    } else if (!d4_conflict) {
1590
        pos = 0xD4;
1591
    } else {
1592
        error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid config space");
1593
        return false;
1594
    }
1595

1596
    ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp);
1597
    if (ret < 0) {
1598
        error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: ");
1599
        return false;
1600
    }
1601

1602
    memset(vdev->emulated_config_bits + pos, 0xFF, 8);
1603
    pos += PCI_CAP_FLAGS;
1604
    pci_set_byte(pdev->config + pos++, 8);
1605
    pci_set_byte(pdev->config + pos++, 'P');
1606
    pci_set_byte(pdev->config + pos++, '2');
1607
    pci_set_byte(pdev->config + pos++, 'P');
1608
    pci_set_byte(pdev->config + pos++, vdev->nv_gpudirect_clique << 3);
1609
    pci_set_byte(pdev->config + pos, 0);
1610

1611
    return true;
1612
}
1613

1614
/*
1615
 * The VMD endpoint provides a real PCIe domain to the guest and the guest
1616
 * kernel performs enumeration of the VMD sub-device domain. Guest transactions
1617
 * to VMD sub-devices go through MMU translation from guest addresses to
1618
 * physical addresses. When MMIO goes to an endpoint after being translated to
1619
 * physical addresses, the bridge rejects the transaction because the window
1620
 * has been programmed with guest addresses.
1621
 *
1622
 * VMD can use the Host Physical Address in order to correctly program the
1623
 * bridge windows in its PCIe domain. VMD device 28C0 has HPA shadow registers
1624
 * located at offset 0x2000 in MEMBAR2 (BAR 4). This quirk provides the HPA
1625
 * shadow registers in a vendor-specific capability register for devices
1626
 * without native support. The position of 0xE8-0xFF is in the reserved range
1627
 * of the VMD device capability space following the Power Management
1628
 * Capability.
1629
 */
1630
#define VMD_SHADOW_CAP_VER 1
1631
#define VMD_SHADOW_CAP_LEN 24
1632
static bool vfio_add_vmd_shadow_cap(VFIOPCIDevice *vdev, Error **errp)
1633
{
1634
    ERRP_GUARD();
1635
    uint8_t membar_phys[16];
1636
    int ret, pos = 0xE8;
1637

1638
    if (!(vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, 0x201D) ||
1639
          vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, 0x467F) ||
1640
          vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, 0x4C3D) ||
1641
          vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, 0x9A0B))) {
1642
        return true;
1643
    }
1644

1645
    ret = pread(vdev->vbasedev.fd, membar_phys, 16,
1646
                vdev->config_offset + PCI_BASE_ADDRESS_2);
1647
    if (ret != 16) {
1648
        error_report("VMD %s cannot read MEMBARs (%d)",
1649
                     vdev->vbasedev.name, ret);
1650
        return false;
1651
    }
1652

1653
    ret = pci_add_capability(&vdev->pdev, PCI_CAP_ID_VNDR, pos,
1654
                             VMD_SHADOW_CAP_LEN, errp);
1655
    if (ret < 0) {
1656
        error_prepend(errp, "Failed to add VMD MEMBAR Shadow cap: ");
1657
        return false;
1658
    }
1659

1660
    memset(vdev->emulated_config_bits + pos, 0xFF, VMD_SHADOW_CAP_LEN);
1661
    pos += PCI_CAP_FLAGS;
1662
    pci_set_byte(vdev->pdev.config + pos++, VMD_SHADOW_CAP_LEN);
1663
    pci_set_byte(vdev->pdev.config + pos++, VMD_SHADOW_CAP_VER);
1664
    pci_set_long(vdev->pdev.config + pos, 0x53484457); /* SHDW */
1665
    memcpy(vdev->pdev.config + pos + 4, membar_phys, 16);
1666

1667
    return true;
1668
}
1669

1670
bool vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp)
1671
{
1672
    if (!vfio_add_nv_gpudirect_cap(vdev, errp)) {
1673
        return false;
1674
    }
1675

1676
    if (!vfio_add_vmd_shadow_cap(vdev, errp)) {
1677
        return false;
1678
    }
1679

1680
    return true;
1681
}
1682

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.