qemu
1/*
2* IGD device quirks
3*
4* Copyright Red Hat, Inc. 2016
5*
6* Authors:
7* Alex Williamson <alex.williamson@redhat.com>
8*
9* This work is licensed under the terms of the GNU GPL, version 2. See
10* the COPYING file in the top-level directory.
11*/
12
13#include "qemu/osdep.h"14#include "qemu/units.h"15#include "qemu/error-report.h"16#include "qapi/error.h"17#include "hw/hw.h"18#include "hw/nvram/fw_cfg.h"19#include "pci.h"20#include "trace.h"21
22/*
23* Intel IGD support
24*
25* Obviously IGD is not a discrete device, this is evidenced not only by it
26* being integrated into the CPU, but by the various chipset and BIOS
27* dependencies that it brings along with it. Intel is trying to move away
28* from this and Broadwell and newer devices can run in what Intel calls
29* "Universal Pass-Through" mode, or UPT. Theoretically in UPT mode, nothing
30* more is required beyond assigning the IGD device to a VM. There are
31* however support limitations to this mode. It only supports IGD as a
32* secondary graphics device in the VM and it doesn't officially support any
33* physical outputs.
34*
35* The code here attempts to enable what we'll call legacy mode assignment,
36* IGD retains most of the capabilities we expect for it to have on bare
37* metal. To enable this mode, the IGD device must be assigned to the VM
38* at PCI address 00:02.0, it must have a ROM, it very likely needs VGA
39* support, we must have VM BIOS support for reserving and populating some
40* of the required tables, and we need to tweak the chipset with revisions
41* and IDs and an LPC/ISA bridge device. The intention is to make all of
42* this happen automatically by installing the device at the correct VM PCI
43* bus address. If any of the conditions are not met, we cross our fingers
44* and hope the user knows better.
45*
46* NB - It is possible to enable physical outputs in UPT mode by supplying
47* an OpRegion table. We don't do this by default because the guest driver
48* behaves differently if an OpRegion is provided and no monitor is attached
49* vs no OpRegion and a monitor being attached or not. Effectively, if a
50* headless setup is desired, the OpRegion gets in the way of that.
51*/
52
53/*
54* This presumes the device is already known to be an Intel VGA device, so we
55* take liberties in which device ID bits match which generation. This should
56* not be taken as an indication that all the devices are supported, or even
57* supportable, some of them don't even support VT-d.
58* See linux:include/drm/i915_pciids.h for IDs.
59*/
60static int igd_gen(VFIOPCIDevice *vdev)61{
62if ((vdev->device_id & 0xfff) == 0xa84) {63return 8; /* Broxton */64}65
66switch (vdev->device_id & 0xff00) {67/* Old, untested, unavailable, unknown */68case 0x0000:69case 0x2500:70case 0x2700:71case 0x2900:72case 0x2a00:73case 0x2e00:74case 0x3500:75case 0xa000:76return -1;77/* SandyBridge, IvyBridge, ValleyView, Haswell */78case 0x0100:79case 0x0400:80case 0x0a00:81case 0x0c00:82case 0x0d00:83case 0x0f00:84return 6;85/* BroadWell, CherryView, SkyLake, KabyLake */86case 0x1600:87case 0x1900:88case 0x2200:89case 0x5900:90return 8;91}92
93return 8; /* Assume newer is compatible */94}
95
96typedef struct VFIOIGDQuirk {97struct VFIOPCIDevice *vdev;98uint32_t index;99uint32_t bdsm;100} VFIOIGDQuirk;101
102#define IGD_GMCH 0x50 /* Graphics Control Register */103#define IGD_BDSM 0x5c /* Base Data of Stolen Memory */104
105
106/*
107* The rather short list of registers that we copy from the host devices.
108* The LPC/ISA bridge values are definitely needed to support the vBIOS, the
109* host bridge values may or may not be needed depending on the guest OS.
110* Since we're only munging revision and subsystem values on the host bridge,
111* we don't require our own device. The LPC/ISA bridge needs to be our very
112* own though.
113*/
114typedef struct {115uint8_t offset;116uint8_t len;117} IGDHostInfo;118
119static const IGDHostInfo igd_host_bridge_infos[] = {120{PCI_REVISION_ID, 2},121{PCI_SUBSYSTEM_VENDOR_ID, 2},122{PCI_SUBSYSTEM_ID, 2},123};124
125static const IGDHostInfo igd_lpc_bridge_infos[] = {126{PCI_VENDOR_ID, 2},127{PCI_DEVICE_ID, 2},128{PCI_REVISION_ID, 2},129{PCI_SUBSYSTEM_VENDOR_ID, 2},130{PCI_SUBSYSTEM_ID, 2},131};132
133static int vfio_pci_igd_copy(VFIOPCIDevice *vdev, PCIDevice *pdev,134struct vfio_region_info *info,135const IGDHostInfo *list, int len)136{
137int i, ret;138
139for (i = 0; i < len; i++) {140ret = pread(vdev->vbasedev.fd, pdev->config + list[i].offset,141list[i].len, info->offset + list[i].offset);142if (ret != list[i].len) {143error_report("IGD copy failed: %m");144return -errno;145}146}147
148return 0;149}
150
151/*
152* Stuff a few values into the host bridge.
153*/
154static int vfio_pci_igd_host_init(VFIOPCIDevice *vdev,155struct vfio_region_info *info)156{
157PCIBus *bus;158PCIDevice *host_bridge;159int ret;160
161bus = pci_device_root_bus(&vdev->pdev);162host_bridge = pci_find_device(bus, 0, PCI_DEVFN(0, 0));163
164if (!host_bridge) {165error_report("Can't find host bridge");166return -ENODEV;167}168
169ret = vfio_pci_igd_copy(vdev, host_bridge, info, igd_host_bridge_infos,170ARRAY_SIZE(igd_host_bridge_infos));171if (!ret) {172trace_vfio_pci_igd_host_bridge_enabled(vdev->vbasedev.name);173}174
175return ret;176}
177
178/*
179* IGD LPC/ISA bridge support code. The vBIOS needs this, but we can't write
180* arbitrary values into just any bridge, so we must create our own. We try
181* to handle if the user has created it for us, which they might want to do
182* to enable multifunction so we don't occupy the whole PCI slot.
183*/
184static void vfio_pci_igd_lpc_bridge_realize(PCIDevice *pdev, Error **errp)185{
186if (pdev->devfn != PCI_DEVFN(0x1f, 0)) {187error_setg(errp, "VFIO dummy ISA/LPC bridge must have address 1f.0");188}189}
190
191static void vfio_pci_igd_lpc_bridge_class_init(ObjectClass *klass, void *data)192{
193DeviceClass *dc = DEVICE_CLASS(klass);194PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);195
196set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);197dc->desc = "VFIO dummy ISA/LPC bridge for IGD assignment";198dc->hotpluggable = false;199k->realize = vfio_pci_igd_lpc_bridge_realize;200k->class_id = PCI_CLASS_BRIDGE_ISA;201}
202
203static const TypeInfo vfio_pci_igd_lpc_bridge_info = {204.name = "vfio-pci-igd-lpc-bridge",205.parent = TYPE_PCI_DEVICE,206.class_init = vfio_pci_igd_lpc_bridge_class_init,207.interfaces = (InterfaceInfo[]) {208{ INTERFACE_CONVENTIONAL_PCI_DEVICE },209{ },210},211};212
213static void vfio_pci_igd_register_types(void)214{
215type_register_static(&vfio_pci_igd_lpc_bridge_info);216}
217
218type_init(vfio_pci_igd_register_types)219
220static int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev,221struct vfio_region_info *info)222{
223PCIDevice *lpc_bridge;224int ret;225
226lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev),2270, PCI_DEVFN(0x1f, 0));228if (!lpc_bridge) {229lpc_bridge = pci_create_simple(pci_device_root_bus(&vdev->pdev),230PCI_DEVFN(0x1f, 0), "vfio-pci-igd-lpc-bridge");231}232
233ret = vfio_pci_igd_copy(vdev, lpc_bridge, info, igd_lpc_bridge_infos,234ARRAY_SIZE(igd_lpc_bridge_infos));235if (!ret) {236trace_vfio_pci_igd_lpc_bridge_enabled(vdev->vbasedev.name);237}238
239return ret;240}
241
242/*
243* IGD Gen8 and newer support up to 8MB for the GTT and use a 64bit PTE
244* entry, older IGDs use 2MB and 32bit. Each PTE maps a 4k page. Therefore
245* we either have 2M/4k * 4 = 2k or 8M/4k * 8 = 16k as the maximum iobar index
246* for programming the GTT.
247*
248* See linux:include/drm/i915_drm.h for shift and mask values.
249*/
250static int vfio_igd_gtt_max(VFIOPCIDevice *vdev)251{
252uint32_t gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, sizeof(gmch));253int ggms, gen = igd_gen(vdev);254
255gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, sizeof(gmch));256ggms = (gmch >> (gen < 8 ? 8 : 6)) & 0x3;257if (gen > 6) {258ggms = 1 << ggms;259}260
261ggms *= MiB;262
263return (ggms / (4 * KiB)) * (gen < 8 ? 4 : 8);264}
265
266/*
267* The IGD ROM will make use of stolen memory (GGMS) for support of VESA modes.
268* Somehow the host stolen memory range is used for this, but how the ROM gets
269* it is a mystery, perhaps it's hardcoded into the ROM. Thankfully though, it
270* reprograms the GTT through the IOBAR where we can trap it and transpose the
271* programming to the VM allocated buffer. That buffer gets reserved by the VM
272* firmware via the fw_cfg entry added below. Here we're just monitoring the
273* IOBAR address and data registers to detect a write sequence targeting the
274* GTTADR. This code is developed by observed behavior and doesn't have a
275* direct spec reference, unfortunately.
276*/
277static uint64_t vfio_igd_quirk_data_read(void *opaque,278hwaddr addr, unsigned size)279{
280VFIOIGDQuirk *igd = opaque;281VFIOPCIDevice *vdev = igd->vdev;282
283igd->index = ~0;284
285return vfio_region_read(&vdev->bars[4].region, addr + 4, size);286}
287
288static void vfio_igd_quirk_data_write(void *opaque, hwaddr addr,289uint64_t data, unsigned size)290{
291VFIOIGDQuirk *igd = opaque;292VFIOPCIDevice *vdev = igd->vdev;293uint64_t val = data;294int gen = igd_gen(vdev);295
296/*297* Programming the GGMS starts at index 0x1 and uses every 4th index (ie.
298* 0x1, 0x5, 0x9, 0xd,...). For pre-Gen8 each 4-byte write is a whole PTE
299* entry, with 0th bit enable set. For Gen8 and up, PTEs are 64bit, so
300* entries 0x5 & 0xd are the high dword, in our case zero. Each PTE points
301* to a 4k page, which we translate to a page from the VM allocated region,
302* pointed to by the BDSM register. If this is not set, we fail.
303*
304* We trap writes to the full configured GTT size, but we typically only
305* see the vBIOS writing up to (nearly) the 1MB barrier. In fact it often
306* seems to miss the last entry for an even 1MB GTT. Doing a gratuitous
307* write of that last entry does work, but is hopefully unnecessary since
308* we clear the previous GTT on initialization.
309*/
310if ((igd->index % 4 == 1) && igd->index < vfio_igd_gtt_max(vdev)) {311if (gen < 8 || (igd->index % 8 == 1)) {312uint32_t base;313
314base = pci_get_long(vdev->pdev.config + IGD_BDSM);315if (!base) {316hw_error("vfio-igd: Guest attempted to program IGD GTT before "317"BIOS reserved stolen memory. Unsupported BIOS?");318}319
320val = data - igd->bdsm + base;321} else {322val = 0; /* upper 32bits of pte, we only enable below 4G PTEs */323}324
325trace_vfio_pci_igd_bar4_write(vdev->vbasedev.name,326igd->index, data, val);327}328
329vfio_region_write(&vdev->bars[4].region, addr + 4, val, size);330
331igd->index = ~0;332}
333
334static const MemoryRegionOps vfio_igd_data_quirk = {335.read = vfio_igd_quirk_data_read,336.write = vfio_igd_quirk_data_write,337.endianness = DEVICE_LITTLE_ENDIAN,338};339
340static uint64_t vfio_igd_quirk_index_read(void *opaque,341hwaddr addr, unsigned size)342{
343VFIOIGDQuirk *igd = opaque;344VFIOPCIDevice *vdev = igd->vdev;345
346igd->index = ~0;347
348return vfio_region_read(&vdev->bars[4].region, addr, size);349}
350
351static void vfio_igd_quirk_index_write(void *opaque, hwaddr addr,352uint64_t data, unsigned size)353{
354VFIOIGDQuirk *igd = opaque;355VFIOPCIDevice *vdev = igd->vdev;356
357igd->index = data;358
359vfio_region_write(&vdev->bars[4].region, addr, data, size);360}
361
362static const MemoryRegionOps vfio_igd_index_quirk = {363.read = vfio_igd_quirk_index_read,364.write = vfio_igd_quirk_index_write,365.endianness = DEVICE_LITTLE_ENDIAN,366};367
368void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)369{
370g_autofree struct vfio_region_info *rom = NULL;371g_autofree struct vfio_region_info *opregion = NULL;372g_autofree struct vfio_region_info *host = NULL;373g_autofree struct vfio_region_info *lpc = NULL;374VFIOQuirk *quirk;375VFIOIGDQuirk *igd;376PCIDevice *lpc_bridge;377int i, ret, ggms_mb, gms_mb = 0, gen;378uint64_t *bdsm_size;379uint32_t gmch;380uint16_t cmd_orig, cmd;381Error *err = NULL;382
383/*384* This must be an Intel VGA device at address 00:02.0 for us to even
385* consider enabling legacy mode. The vBIOS has dependencies on the
386* PCI bus address.
387*/
388if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||389!vfio_is_vga(vdev) || nr != 4 ||390&vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev),3910, PCI_DEVFN(0x2, 0))) {392return;393}394
395/*396* We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we
397* can stuff host values into, so if there's already one there and it's not
398* one we can hack on, legacy mode is no-go. Sorry Q35.
399*/
400lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev),4010, PCI_DEVFN(0x1f, 0));402if (lpc_bridge && !object_dynamic_cast(OBJECT(lpc_bridge),403"vfio-pci-igd-lpc-bridge")) {404error_report("IGD device %s cannot support legacy mode due to existing "405"devices at address 1f.0", vdev->vbasedev.name);406return;407}408
409/*410* IGD is not a standard, they like to change their specs often. We
411* only attempt to support back to SandBridge and we hope that newer
412* devices maintain compatibility with generation 8.
413*/
414gen = igd_gen(vdev);415if (gen != 6 && gen != 8) {416error_report("IGD device %s is unsupported in legacy mode, "417"try SandyBridge or newer", vdev->vbasedev.name);418return;419}420
421/*422* Most of what we're doing here is to enable the ROM to run, so if
423* there's no ROM, there's no point in setting up this quirk.
424* NB. We only seem to get BIOS ROMs, so a UEFI VM would need CSM support.
425*/
426ret = vfio_get_region_info(&vdev->vbasedev,427VFIO_PCI_ROM_REGION_INDEX, &rom);428if ((ret || !rom->size) && !vdev->pdev.romfile) {429error_report("IGD device %s has no ROM, legacy mode disabled",430vdev->vbasedev.name);431return;432}433
434/*435* Ignore the hotplug corner case, mark the ROM failed, we can't
436* create the devices we need for legacy mode in the hotplug scenario.
437*/
438if (vdev->pdev.qdev.hotplugged) {439error_report("IGD device %s hotplugged, ROM disabled, "440"legacy mode disabled", vdev->vbasedev.name);441vdev->rom_read_failed = true;442return;443}444
445/*446* Check whether we have all the vfio device specific regions to
447* support legacy mode (added in Linux v4.6). If not, bail.
448*/
449ret = vfio_get_dev_region_info(&vdev->vbasedev,450VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,451VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion);452if (ret) {453error_report("IGD device %s does not support OpRegion access,"454"legacy mode disabled", vdev->vbasedev.name);455return;456}457
458ret = vfio_get_dev_region_info(&vdev->vbasedev,459VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,460VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host);461if (ret) {462error_report("IGD device %s does not support host bridge access,"463"legacy mode disabled", vdev->vbasedev.name);464return;465}466
467ret = vfio_get_dev_region_info(&vdev->vbasedev,468VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,469VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG, &lpc);470if (ret) {471error_report("IGD device %s does not support LPC bridge access,"472"legacy mode disabled", vdev->vbasedev.name);473return;474}475
476gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4);477
478/*479* If IGD VGA Disable is clear (expected) and VGA is not already enabled,
480* try to enable it. Probably shouldn't be using legacy mode without VGA,
481* but also no point in us enabling VGA if disabled in hardware.
482*/
483if (!(gmch & 0x2) && !vdev->vga && !vfio_populate_vga(vdev, &err)) {484error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);485error_report("IGD device %s failed to enable VGA access, "486"legacy mode disabled", vdev->vbasedev.name);487return;488}489
490/* Create our LPC/ISA bridge */491ret = vfio_pci_igd_lpc_init(vdev, lpc);492if (ret) {493error_report("IGD device %s failed to create LPC bridge, "494"legacy mode disabled", vdev->vbasedev.name);495return;496}497
498/* Stuff some host values into the VM PCI host bridge */499ret = vfio_pci_igd_host_init(vdev, host);500if (ret) {501error_report("IGD device %s failed to modify host bridge, "502"legacy mode disabled", vdev->vbasedev.name);503return;504}505
506/* Setup OpRegion access */507if (!vfio_pci_igd_opregion_init(vdev, opregion, &err)) {508error_append_hint(&err, "IGD legacy mode disabled\n");509error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);510return;511}512
513/* Setup our quirk to munge GTT addresses to the VM allocated buffer */514quirk = vfio_quirk_alloc(2);515igd = quirk->data = g_malloc0(sizeof(*igd));516igd->vdev = vdev;517igd->index = ~0;518igd->bdsm = vfio_pci_read_config(&vdev->pdev, IGD_BDSM, 4);519igd->bdsm &= ~((1 * MiB) - 1); /* 1MB aligned */520
521memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_igd_index_quirk,522igd, "vfio-igd-index-quirk", 4);523memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,5240, &quirk->mem[0], 1);525
526memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_igd_data_quirk,527igd, "vfio-igd-data-quirk", 4);528memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,5294, &quirk->mem[1], 1);530
531QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);532
533/* Determine the size of stolen memory needed for GTT */534ggms_mb = (gmch >> (gen < 8 ? 8 : 6)) & 0x3;535if (gen > 6) {536ggms_mb = 1 << ggms_mb;537}538
539/*540* Assume we have no GMS memory, but allow it to be overridden by device
541* option (experimental). The spec doesn't actually allow zero GMS when
542* when IVD (IGD VGA Disable) is clear, but the claim is that it's unused,
543* so let's not waste VM memory for it.
544*/
545gmch &= ~((gen < 8 ? 0x1f : 0xff) << (gen < 8 ? 3 : 8));546
547if (vdev->igd_gms) {548if (vdev->igd_gms <= 0x10) {549gms_mb = vdev->igd_gms * 32;550gmch |= vdev->igd_gms << (gen < 8 ? 3 : 8);551} else {552error_report("Unsupported IGD GMS value 0x%x", vdev->igd_gms);553vdev->igd_gms = 0;554}555}556
557/*558* Request reserved memory for stolen memory via fw_cfg. VM firmware
559* must allocate a 1MB aligned reserved memory region below 4GB with
560* the requested size (in bytes) for use by the Intel PCI class VGA
561* device at VM address 00:02.0. The base address of this reserved
562* memory region must be written to the device BDSM register at PCI
563* config offset 0x5C.
564*/
565bdsm_size = g_malloc(sizeof(*bdsm_size));566*bdsm_size = cpu_to_le64((ggms_mb + gms_mb) * MiB);567fw_cfg_add_file(fw_cfg_find(), "etc/igd-bdsm-size",568bdsm_size, sizeof(*bdsm_size));569
570/* GMCH is read-only, emulated */571pci_set_long(vdev->pdev.config + IGD_GMCH, gmch);572pci_set_long(vdev->pdev.wmask + IGD_GMCH, 0);573pci_set_long(vdev->emulated_config_bits + IGD_GMCH, ~0);574
575/* BDSM is read-write, emulated. The BIOS needs to be able to write it */576pci_set_long(vdev->pdev.config + IGD_BDSM, 0);577pci_set_long(vdev->pdev.wmask + IGD_BDSM, ~0);578pci_set_long(vdev->emulated_config_bits + IGD_BDSM, ~0);579
580/*581* This IOBAR gives us access to GTTADR, which allows us to write to
582* the GTT itself. So let's go ahead and write zero to all the GTT
583* entries to avoid spurious DMA faults. Be sure I/O access is enabled
584* before talking to the device.
585*/
586if (pread(vdev->vbasedev.fd, &cmd_orig, sizeof(cmd_orig),587vdev->config_offset + PCI_COMMAND) != sizeof(cmd_orig)) {588error_report("IGD device %s - failed to read PCI command register",589vdev->vbasedev.name);590}591
592cmd = cmd_orig | PCI_COMMAND_IO;593
594if (pwrite(vdev->vbasedev.fd, &cmd, sizeof(cmd),595vdev->config_offset + PCI_COMMAND) != sizeof(cmd)) {596error_report("IGD device %s - failed to write PCI command register",597vdev->vbasedev.name);598}599
600for (i = 1; i < vfio_igd_gtt_max(vdev); i += 4) {601vfio_region_write(&vdev->bars[4].region, 0, i, 4);602vfio_region_write(&vdev->bars[4].region, 4, 0, 4);603}604
605if (pwrite(vdev->vbasedev.fd, &cmd_orig, sizeof(cmd_orig),606vdev->config_offset + PCI_COMMAND) != sizeof(cmd_orig)) {607error_report("IGD device %s - failed to restore PCI command register",608vdev->vbasedev.name);609}610
611trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, ggms_mb + gms_mb);612}
613