qemu

Форк
0
/
iommufd.c 
871 строка · 26.6 Кб
1
/*
2
 * iommufd container backend
3
 *
4
 * Copyright (C) 2023 Intel Corporation.
5
 * Copyright Red Hat, Inc. 2023
6
 *
7
 * Authors: Yi Liu <yi.l.liu@intel.com>
8
 *          Eric Auger <eric.auger@redhat.com>
9
 *
10
 * SPDX-License-Identifier: GPL-2.0-or-later
11
 */
12

13
#include "qemu/osdep.h"
14
#include <sys/ioctl.h>
15
#include <linux/vfio.h>
16
#include <linux/iommufd.h>
17

18
#include "hw/vfio/vfio-common.h"
19
#include "qemu/error-report.h"
20
#include "trace.h"
21
#include "qapi/error.h"
22
#include "sysemu/iommufd.h"
23
#include "hw/qdev-core.h"
24
#include "sysemu/reset.h"
25
#include "qemu/cutils.h"
26
#include "qemu/chardev_open.h"
27
#include "pci.h"
28
#include "exec/ram_addr.h"
29

30
static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
31
                            ram_addr_t size, void *vaddr, bool readonly)
32
{
33
    const VFIOIOMMUFDContainer *container =
34
        container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
35

36
    return iommufd_backend_map_dma(container->be,
37
                                   container->ioas_id,
38
                                   iova, size, vaddr, readonly);
39
}
40

41
static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer,
42
                              hwaddr iova, ram_addr_t size,
43
                              IOMMUTLBEntry *iotlb)
44
{
45
    const VFIOIOMMUFDContainer *container =
46
        container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
47

48
    /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */
49
    return iommufd_backend_unmap_dma(container->be,
50
                                     container->ioas_id, iova, size);
51
}
52

53
static bool iommufd_cdev_kvm_device_add(VFIODevice *vbasedev, Error **errp)
54
{
55
    return !vfio_kvm_device_add_fd(vbasedev->fd, errp);
56
}
57

58
static void iommufd_cdev_kvm_device_del(VFIODevice *vbasedev)
59
{
60
    Error *err = NULL;
61

62
    if (vfio_kvm_device_del_fd(vbasedev->fd, &err)) {
63
        error_report_err(err);
64
    }
65
}
66

67
static bool iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp)
68
{
69
    IOMMUFDBackend *iommufd = vbasedev->iommufd;
70
    struct vfio_device_bind_iommufd bind = {
71
        .argsz = sizeof(bind),
72
        .flags = 0,
73
    };
74

75
    if (!iommufd_backend_connect(iommufd, errp)) {
76
        return false;
77
    }
78

79
    /*
80
     * Add device to kvm-vfio to be prepared for the tracking
81
     * in KVM. Especially for some emulated devices, it requires
82
     * to have kvm information in the device open.
83
     */
84
    if (!iommufd_cdev_kvm_device_add(vbasedev, errp)) {
85
        goto err_kvm_device_add;
86
    }
87

88
    /* Bind device to iommufd */
89
    bind.iommufd = iommufd->fd;
90
    if (ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind)) {
91
        error_setg_errno(errp, errno, "error bind device fd=%d to iommufd=%d",
92
                         vbasedev->fd, bind.iommufd);
93
        goto err_bind;
94
    }
95

96
    vbasedev->devid = bind.out_devid;
97
    trace_iommufd_cdev_connect_and_bind(bind.iommufd, vbasedev->name,
98
                                        vbasedev->fd, vbasedev->devid);
99
    return true;
100
err_bind:
101
    iommufd_cdev_kvm_device_del(vbasedev);
102
err_kvm_device_add:
103
    iommufd_backend_disconnect(iommufd);
104
    return false;
105
}
106

107
static void iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev)
108
{
109
    /* Unbind is automatically conducted when device fd is closed */
110
    iommufd_cdev_kvm_device_del(vbasedev);
111
    iommufd_backend_disconnect(vbasedev->iommufd);
112
}
113

114
static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
115
{
116
    return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
117
}
118

119
static int iommufd_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
120
                                           bool start, Error **errp)
121
{
122
    const VFIOIOMMUFDContainer *container =
123
        container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
124
    VFIOIOASHwpt *hwpt;
125

126
    QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
127
        if (!iommufd_hwpt_dirty_tracking(hwpt)) {
128
            continue;
129
        }
130

131
        if (!iommufd_backend_set_dirty_tracking(container->be,
132
                                                hwpt->hwpt_id, start, errp)) {
133
            goto err;
134
        }
135
    }
136

137
    return 0;
138

139
err:
140
    QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
141
        if (!iommufd_hwpt_dirty_tracking(hwpt)) {
142
            continue;
143
        }
144
        iommufd_backend_set_dirty_tracking(container->be,
145
                                           hwpt->hwpt_id, !start, NULL);
146
    }
147
    return -EINVAL;
148
}
149

150
static int iommufd_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
151
                                      VFIOBitmap *vbmap, hwaddr iova,
152
                                      hwaddr size, Error **errp)
153
{
154
    VFIOIOMMUFDContainer *container = container_of(bcontainer,
155
                                                   VFIOIOMMUFDContainer,
156
                                                   bcontainer);
157
    unsigned long page_size = qemu_real_host_page_size();
158
    VFIOIOASHwpt *hwpt;
159

160
    QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
161
        if (!iommufd_hwpt_dirty_tracking(hwpt)) {
162
            continue;
163
        }
164

165
        if (!iommufd_backend_get_dirty_bitmap(container->be, hwpt->hwpt_id,
166
                                              iova, size, page_size,
167
                                              (uint64_t *)vbmap->bitmap,
168
                                              errp)) {
169
            return -EINVAL;
170
        }
171
    }
172

173
    return 0;
174
}
175

176
static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
177
{
178
    ERRP_GUARD();
179
    long int ret = -ENOTTY;
180
    g_autofree char *path = NULL;
181
    g_autofree char *vfio_dev_path = NULL;
182
    g_autofree char *vfio_path = NULL;
183
    DIR *dir = NULL;
184
    struct dirent *dent;
185
    g_autofree gchar *contents = NULL;
186
    gsize length;
187
    int major, minor;
188
    dev_t vfio_devt;
189

190
    path = g_strdup_printf("%s/vfio-dev", sysfs_path);
191
    dir = opendir(path);
192
    if (!dir) {
193
        error_setg_errno(errp, errno, "couldn't open directory %s", path);
194
        goto out;
195
    }
196

197
    while ((dent = readdir(dir))) {
198
        if (!strncmp(dent->d_name, "vfio", 4)) {
199
            vfio_dev_path = g_strdup_printf("%s/%s/dev", path, dent->d_name);
200
            break;
201
        }
202
    }
203

204
    if (!vfio_dev_path) {
205
        error_setg(errp, "failed to find vfio-dev/vfioX/dev");
206
        goto out_close_dir;
207
    }
208

209
    if (!g_file_get_contents(vfio_dev_path, &contents, &length, NULL)) {
210
        error_setg(errp, "failed to load \"%s\"", vfio_dev_path);
211
        goto out_close_dir;
212
    }
213

214
    if (sscanf(contents, "%d:%d", &major, &minor) != 2) {
215
        error_setg(errp, "failed to get major:minor for \"%s\"", vfio_dev_path);
216
        goto out_close_dir;
217
    }
218
    vfio_devt = makedev(major, minor);
219

220
    vfio_path = g_strdup_printf("/dev/vfio/devices/%s", dent->d_name);
221
    ret = open_cdev(vfio_path, vfio_devt);
222
    if (ret < 0) {
223
        error_setg(errp, "Failed to open %s", vfio_path);
224
    }
225

226
    trace_iommufd_cdev_getfd(vfio_path, ret);
227

228
out_close_dir:
229
    closedir(dir);
230
out:
231
    if (*errp) {
232
        error_prepend(errp, VFIO_MSG_PREFIX, path);
233
    }
234

235
    return ret;
236
}
237

238
static int iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id,
239
                                         Error **errp)
240
{
241
    int iommufd = vbasedev->iommufd->fd;
242
    struct vfio_device_attach_iommufd_pt attach_data = {
243
        .argsz = sizeof(attach_data),
244
        .flags = 0,
245
        .pt_id = id,
246
    };
247

248
    /* Attach device to an IOAS or hwpt within iommufd */
249
    if (ioctl(vbasedev->fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &attach_data)) {
250
        error_setg_errno(errp, errno,
251
                         "[iommufd=%d] error attach %s (%d) to id=%d",
252
                         iommufd, vbasedev->name, vbasedev->fd, id);
253
        return -errno;
254
    }
255

256
    trace_iommufd_cdev_attach_ioas_hwpt(iommufd, vbasedev->name,
257
                                        vbasedev->fd, id);
258
    return 0;
259
}
260

261
static bool iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp)
262
{
263
    int iommufd = vbasedev->iommufd->fd;
264
    struct vfio_device_detach_iommufd_pt detach_data = {
265
        .argsz = sizeof(detach_data),
266
        .flags = 0,
267
    };
268

269
    if (ioctl(vbasedev->fd, VFIO_DEVICE_DETACH_IOMMUFD_PT, &detach_data)) {
270
        error_setg_errno(errp, errno, "detach %s failed", vbasedev->name);
271
        return false;
272
    }
273

274
    trace_iommufd_cdev_detach_ioas_hwpt(iommufd, vbasedev->name);
275
    return true;
276
}
277

278
static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
279
                                         VFIOIOMMUFDContainer *container,
280
                                         Error **errp)
281
{
282
    ERRP_GUARD();
283
    IOMMUFDBackend *iommufd = vbasedev->iommufd;
284
    uint32_t flags = 0;
285
    VFIOIOASHwpt *hwpt;
286
    uint32_t hwpt_id;
287
    int ret;
288

289
    /* Try to find a domain */
290
    QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
291
        ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
292
        if (ret) {
293
            /* -EINVAL means the domain is incompatible with the device. */
294
            if (ret == -EINVAL) {
295
                /*
296
                 * It is an expected failure and it just means we will try
297
                 * another domain, or create one if no existing compatible
298
                 * domain is found. Hence why the error is discarded below.
299
                 */
300
                error_free(*errp);
301
                *errp = NULL;
302
                continue;
303
            }
304

305
            return false;
306
        } else {
307
            vbasedev->hwpt = hwpt;
308
            QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
309
            vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt);
310
            return true;
311
        }
312
    }
313

314
    /*
315
     * This is quite early and VFIO Migration state isn't yet fully
316
     * initialized, thus rely only on IOMMU hardware capabilities as to
317
     * whether IOMMU dirty tracking is going to be requested. Later
318
     * vfio_migration_realize() may decide to use VF dirty tracking
319
     * instead.
320
     */
321
    if (vbasedev->hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) {
322
        flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
323
    }
324

325
    if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid,
326
                                    container->ioas_id, flags,
327
                                    IOMMU_HWPT_DATA_NONE, 0, NULL,
328
                                    &hwpt_id, errp)) {
329
        return false;
330
    }
331

332
    hwpt = g_malloc0(sizeof(*hwpt));
333
    hwpt->hwpt_id = hwpt_id;
334
    hwpt->hwpt_flags = flags;
335
    QLIST_INIT(&hwpt->device_list);
336

337
    ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
338
    if (ret) {
339
        iommufd_backend_free_id(container->be, hwpt->hwpt_id);
340
        g_free(hwpt);
341
        return false;
342
    }
343

344
    vbasedev->hwpt = hwpt;
345
    vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt);
346
    QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
347
    QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next);
348
    container->bcontainer.dirty_pages_supported |=
349
                                vbasedev->iommu_dirty_tracking;
350
    if (container->bcontainer.dirty_pages_supported &&
351
        !vbasedev->iommu_dirty_tracking) {
352
        warn_report("IOMMU instance for device %s doesn't support dirty tracking",
353
                    vbasedev->name);
354
    }
355
    return true;
356
}
357

358
static void iommufd_cdev_autodomains_put(VFIODevice *vbasedev,
359
                                         VFIOIOMMUFDContainer *container)
360
{
361
    VFIOIOASHwpt *hwpt = vbasedev->hwpt;
362

363
    QLIST_REMOVE(vbasedev, hwpt_next);
364
    vbasedev->hwpt = NULL;
365

366
    if (QLIST_EMPTY(&hwpt->device_list)) {
367
        QLIST_REMOVE(hwpt, next);
368
        iommufd_backend_free_id(container->be, hwpt->hwpt_id);
369
        g_free(hwpt);
370
    }
371
}
372

373
static bool iommufd_cdev_attach_container(VFIODevice *vbasedev,
374
                                          VFIOIOMMUFDContainer *container,
375
                                          Error **errp)
376
{
377
    /* mdevs aren't physical devices and will fail with auto domains */
378
    if (!vbasedev->mdev) {
379
        return iommufd_cdev_autodomains_get(vbasedev, container, errp);
380
    }
381

382
    return !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp);
383
}
384

385
static void iommufd_cdev_detach_container(VFIODevice *vbasedev,
386
                                          VFIOIOMMUFDContainer *container)
387
{
388
    Error *err = NULL;
389

390
    if (!iommufd_cdev_detach_ioas_hwpt(vbasedev, &err)) {
391
        error_report_err(err);
392
    }
393

394
    if (vbasedev->hwpt) {
395
        iommufd_cdev_autodomains_put(vbasedev, container);
396
    }
397

398
}
399

400
static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container)
401
{
402
    VFIOContainerBase *bcontainer = &container->bcontainer;
403

404
    if (!QLIST_EMPTY(&bcontainer->device_list)) {
405
        return;
406
    }
407
    memory_listener_unregister(&bcontainer->listener);
408
    iommufd_backend_free_id(container->be, container->ioas_id);
409
    object_unref(container);
410
}
411

412
static int iommufd_cdev_ram_block_discard_disable(bool state)
413
{
414
    /*
415
     * We support coordinated discarding of RAM via the RamDiscardManager.
416
     */
417
    return ram_block_uncoordinated_discard_disable(state);
418
}
419

420
static bool iommufd_cdev_get_info_iova_range(VFIOIOMMUFDContainer *container,
421
                                             uint32_t ioas_id, Error **errp)
422
{
423
    VFIOContainerBase *bcontainer = &container->bcontainer;
424
    g_autofree struct iommu_ioas_iova_ranges *info = NULL;
425
    struct iommu_iova_range *iova_ranges;
426
    int sz, fd = container->be->fd;
427

428
    info = g_malloc0(sizeof(*info));
429
    info->size = sizeof(*info);
430
    info->ioas_id = ioas_id;
431

432
    if (ioctl(fd, IOMMU_IOAS_IOVA_RANGES, info) && errno != EMSGSIZE) {
433
        goto error;
434
    }
435

436
    sz = info->num_iovas * sizeof(struct iommu_iova_range);
437
    info = g_realloc(info, sizeof(*info) + sz);
438
    info->allowed_iovas = (uintptr_t)(info + 1);
439

440
    if (ioctl(fd, IOMMU_IOAS_IOVA_RANGES, info)) {
441
        goto error;
442
    }
443

444
    iova_ranges = (struct iommu_iova_range *)(uintptr_t)info->allowed_iovas;
445

446
    for (int i = 0; i < info->num_iovas; i++) {
447
        Range *range = g_new(Range, 1);
448

449
        range_set_bounds(range, iova_ranges[i].start, iova_ranges[i].last);
450
        bcontainer->iova_ranges =
451
            range_list_insert(bcontainer->iova_ranges, range);
452
    }
453
    bcontainer->pgsizes = info->out_iova_alignment;
454

455
    return true;
456

457
error:
458
    error_setg_errno(errp, errno, "Cannot get IOVA ranges");
459
    return false;
460
}
461

462
static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
463
                                AddressSpace *as, Error **errp)
464
{
465
    VFIOContainerBase *bcontainer;
466
    VFIOIOMMUFDContainer *container;
467
    VFIOAddressSpace *space;
468
    struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
469
    int ret, devfd;
470
    uint32_t ioas_id;
471
    Error *err = NULL;
472
    const VFIOIOMMUClass *iommufd_vioc =
473
        VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
474

475
    if (vbasedev->fd < 0) {
476
        devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
477
        if (devfd < 0) {
478
            return false;
479
        }
480
        vbasedev->fd = devfd;
481
    } else {
482
        devfd = vbasedev->fd;
483
    }
484

485
    if (!iommufd_cdev_connect_and_bind(vbasedev, errp)) {
486
        goto err_connect_bind;
487
    }
488

489
    space = vfio_get_address_space(as);
490

491
    /*
492
     * The HostIOMMUDevice data from legacy backend is static and doesn't need
493
     * any information from the (type1-iommu) backend to be initialized. In
494
     * contrast however, the IOMMUFD HostIOMMUDevice data requires the iommufd
495
     * FD to be connected and having a devid to be able to successfully call
496
     * iommufd_backend_get_device_info().
497
     */
498
    if (!vfio_device_hiod_realize(vbasedev, errp)) {
499
        goto err_alloc_ioas;
500
    }
501

502
    /* try to attach to an existing container in this space */
503
    QLIST_FOREACH(bcontainer, &space->containers, next) {
504
        container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
505
        if (VFIO_IOMMU_GET_CLASS(bcontainer) != iommufd_vioc ||
506
            vbasedev->iommufd != container->be) {
507
            continue;
508
        }
509
        if (!iommufd_cdev_attach_container(vbasedev, container, &err)) {
510
            const char *msg = error_get_pretty(err);
511

512
            trace_iommufd_cdev_fail_attach_existing_container(msg);
513
            error_free(err);
514
            err = NULL;
515
        } else {
516
            ret = iommufd_cdev_ram_block_discard_disable(true);
517
            if (ret) {
518
                error_setg(errp,
519
                              "Cannot set discarding of RAM broken (%d)", ret);
520
                goto err_discard_disable;
521
            }
522
            goto found_container;
523
        }
524
    }
525

526
    /* Need to allocate a new dedicated container */
527
    if (!iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp)) {
528
        goto err_alloc_ioas;
529
    }
530

531
    trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id);
532

533
    container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD));
534
    container->be = vbasedev->iommufd;
535
    container->ioas_id = ioas_id;
536
    QLIST_INIT(&container->hwpt_list);
537

538
    bcontainer = &container->bcontainer;
539
    vfio_address_space_insert(space, bcontainer);
540

541
    if (!iommufd_cdev_attach_container(vbasedev, container, errp)) {
542
        goto err_attach_container;
543
    }
544

545
    ret = iommufd_cdev_ram_block_discard_disable(true);
546
    if (ret) {
547
        goto err_discard_disable;
548
    }
549

550
    if (!iommufd_cdev_get_info_iova_range(container, ioas_id, &err)) {
551
        error_append_hint(&err,
552
                   "Fallback to default 64bit IOVA range and 4K page size\n");
553
        warn_report_err(err);
554
        err = NULL;
555
        bcontainer->pgsizes = qemu_real_host_page_size();
556
    }
557

558
    bcontainer->listener = vfio_memory_listener;
559
    memory_listener_register(&bcontainer->listener, bcontainer->space->as);
560

561
    if (bcontainer->error) {
562
        error_propagate_prepend(errp, bcontainer->error,
563
                                "memory listener initialization failed: ");
564
        goto err_listener_register;
565
    }
566

567
    bcontainer->initialized = true;
568

569
found_container:
570
    ret = ioctl(devfd, VFIO_DEVICE_GET_INFO, &dev_info);
571
    if (ret) {
572
        error_setg_errno(errp, errno, "error getting device info");
573
        goto err_listener_register;
574
    }
575

576
    if (!vfio_cpr_register_container(bcontainer, errp)) {
577
        goto err_listener_register;
578
    }
579

580
    /*
581
     * TODO: examine RAM_BLOCK_DISCARD stuff, should we do group level
582
     * for discarding incompatibility check as well?
583
     */
584
    if (vbasedev->ram_block_discard_allowed) {
585
        iommufd_cdev_ram_block_discard_disable(false);
586
    }
587

588
    vbasedev->group = 0;
589
    vbasedev->num_irqs = dev_info.num_irqs;
590
    vbasedev->num_regions = dev_info.num_regions;
591
    vbasedev->flags = dev_info.flags;
592
    vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET);
593
    vbasedev->bcontainer = bcontainer;
594
    QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next);
595
    QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next);
596

597
    trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs,
598
                                   vbasedev->num_regions, vbasedev->flags);
599
    return true;
600

601
err_listener_register:
602
    iommufd_cdev_ram_block_discard_disable(false);
603
err_discard_disable:
604
    iommufd_cdev_detach_container(vbasedev, container);
605
err_attach_container:
606
    iommufd_cdev_container_destroy(container);
607
err_alloc_ioas:
608
    vfio_put_address_space(space);
609
    iommufd_cdev_unbind_and_disconnect(vbasedev);
610
err_connect_bind:
611
    close(vbasedev->fd);
612
    return false;
613
}
614

615
static void iommufd_cdev_detach(VFIODevice *vbasedev)
616
{
617
    VFIOContainerBase *bcontainer = vbasedev->bcontainer;
618
    VFIOAddressSpace *space = bcontainer->space;
619
    VFIOIOMMUFDContainer *container = container_of(bcontainer,
620
                                                   VFIOIOMMUFDContainer,
621
                                                   bcontainer);
622
    QLIST_REMOVE(vbasedev, global_next);
623
    QLIST_REMOVE(vbasedev, container_next);
624
    vbasedev->bcontainer = NULL;
625

626
    if (!vbasedev->ram_block_discard_allowed) {
627
        iommufd_cdev_ram_block_discard_disable(false);
628
    }
629

630
    vfio_cpr_unregister_container(bcontainer);
631
    iommufd_cdev_detach_container(vbasedev, container);
632
    iommufd_cdev_container_destroy(container);
633
    vfio_put_address_space(space);
634

635
    iommufd_cdev_unbind_and_disconnect(vbasedev);
636
    close(vbasedev->fd);
637
}
638

639
static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid)
640
{
641
    VFIODevice *vbasedev_iter;
642
    const VFIOIOMMUClass *iommufd_vioc =
643
        VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
644

645
    QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) {
646
        if (VFIO_IOMMU_GET_CLASS(vbasedev_iter->bcontainer) != iommufd_vioc) {
647
            continue;
648
        }
649
        if (devid == vbasedev_iter->devid) {
650
            return vbasedev_iter;
651
        }
652
    }
653
    return NULL;
654
}
655

656
static VFIOPCIDevice *
657
iommufd_cdev_dep_get_realized_vpdev(struct vfio_pci_dependent_device *dep_dev,
658
                                    VFIODevice *reset_dev)
659
{
660
    VFIODevice *vbasedev_tmp;
661

662
    if (dep_dev->devid == reset_dev->devid ||
663
        dep_dev->devid == VFIO_PCI_DEVID_OWNED) {
664
        return NULL;
665
    }
666

667
    vbasedev_tmp = iommufd_cdev_pci_find_by_devid(dep_dev->devid);
668
    if (!vbasedev_tmp || !vbasedev_tmp->dev->realized ||
669
        vbasedev_tmp->type != VFIO_DEVICE_TYPE_PCI) {
670
        return NULL;
671
    }
672

673
    return container_of(vbasedev_tmp, VFIOPCIDevice, vbasedev);
674
}
675

676
static int iommufd_cdev_pci_hot_reset(VFIODevice *vbasedev, bool single)
677
{
678
    VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
679
    struct vfio_pci_hot_reset_info *info = NULL;
680
    struct vfio_pci_dependent_device *devices;
681
    struct vfio_pci_hot_reset *reset;
682
    int ret, i;
683
    bool multi = false;
684

685
    trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
686

687
    if (!single) {
688
        vfio_pci_pre_reset(vdev);
689
    }
690
    vdev->vbasedev.needs_reset = false;
691

692
    ret = vfio_pci_get_pci_hot_reset_info(vdev, &info);
693

694
    if (ret) {
695
        goto out_single;
696
    }
697

698
    assert(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID);
699

700
    devices = &info->devices[0];
701

702
    if (!(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED)) {
703
        if (!vdev->has_pm_reset) {
704
            for (i = 0; i < info->count; i++) {
705
                if (devices[i].devid == VFIO_PCI_DEVID_NOT_OWNED) {
706
                    error_report("vfio: Cannot reset device %s, "
707
                                 "depends on device %04x:%02x:%02x.%x "
708
                                 "which is not owned.",
709
                                 vdev->vbasedev.name, devices[i].segment,
710
                                 devices[i].bus, PCI_SLOT(devices[i].devfn),
711
                                 PCI_FUNC(devices[i].devfn));
712
                }
713
            }
714
        }
715
        ret = -EPERM;
716
        goto out_single;
717
    }
718

719
    trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
720

721
    for (i = 0; i < info->count; i++) {
722
        VFIOPCIDevice *tmp;
723

724
        trace_iommufd_cdev_pci_hot_reset_dep_devices(devices[i].segment,
725
                                                     devices[i].bus,
726
                                                     PCI_SLOT(devices[i].devfn),
727
                                                     PCI_FUNC(devices[i].devfn),
728
                                                     devices[i].devid);
729

730
        /*
731
         * If a VFIO cdev device is resettable, all the dependent devices
732
         * are either bound to same iommufd or within same iommu_groups as
733
         * one of the iommufd bound devices.
734
         */
735
        assert(devices[i].devid != VFIO_PCI_DEVID_NOT_OWNED);
736

737
        tmp = iommufd_cdev_dep_get_realized_vpdev(&devices[i], &vdev->vbasedev);
738
        if (!tmp) {
739
            continue;
740
        }
741

742
        if (single) {
743
            ret = -EINVAL;
744
            goto out_single;
745
        }
746
        vfio_pci_pre_reset(tmp);
747
        tmp->vbasedev.needs_reset = false;
748
        multi = true;
749
    }
750

751
    if (!single && !multi) {
752
        ret = -EINVAL;
753
        goto out_single;
754
    }
755

756
    /* Use zero length array for hot reset with iommufd backend */
757
    reset = g_malloc0(sizeof(*reset));
758
    reset->argsz = sizeof(*reset);
759

760
     /* Bus reset! */
761
    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
762
    g_free(reset);
763
    if (ret) {
764
        ret = -errno;
765
    }
766

767
    trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
768
                                    ret ? strerror(errno) : "Success");
769

770
    /* Re-enable INTx on affected devices */
771
    for (i = 0; i < info->count; i++) {
772
        VFIOPCIDevice *tmp;
773

774
        tmp = iommufd_cdev_dep_get_realized_vpdev(&devices[i], &vdev->vbasedev);
775
        if (!tmp) {
776
            continue;
777
        }
778
        vfio_pci_post_reset(tmp);
779
    }
780
out_single:
781
    if (!single) {
782
        vfio_pci_post_reset(vdev);
783
    }
784
    g_free(info);
785

786
    return ret;
787
}
788

789
static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data)
790
{
791
    VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
792

793
    vioc->hiod_typename = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO;
794

795
    vioc->dma_map = iommufd_cdev_map;
796
    vioc->dma_unmap = iommufd_cdev_unmap;
797
    vioc->attach_device = iommufd_cdev_attach;
798
    vioc->detach_device = iommufd_cdev_detach;
799
    vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset;
800
    vioc->set_dirty_page_tracking = iommufd_set_dirty_page_tracking;
801
    vioc->query_dirty_bitmap = iommufd_query_dirty_bitmap;
802
};
803

804
static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
805
                                      Error **errp)
806
{
807
    VFIODevice *vdev = opaque;
808
    HostIOMMUDeviceCaps *caps = &hiod->caps;
809
    enum iommu_hw_info_type type;
810
    union {
811
        struct iommu_hw_info_vtd vtd;
812
    } data;
813
    uint64_t hw_caps;
814

815
    hiod->agent = opaque;
816

817
    if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid,
818
                                         &type, &data, sizeof(data),
819
                                         &hw_caps, errp)) {
820
        return false;
821
    }
822

823
    hiod->name = g_strdup(vdev->name);
824
    caps->type = type;
825
    caps->hw_caps = hw_caps;
826

827
    return true;
828
}
829

830
static GList *
831
hiod_iommufd_vfio_get_iova_ranges(HostIOMMUDevice *hiod)
832
{
833
    VFIODevice *vdev = hiod->agent;
834

835
    g_assert(vdev);
836
    return vfio_container_get_iova_ranges(vdev->bcontainer);
837
}
838

839
static uint64_t
840
hiod_iommufd_vfio_get_page_size_mask(HostIOMMUDevice *hiod)
841
{
842
    VFIODevice *vdev = hiod->agent;
843

844
    g_assert(vdev);
845
    return vfio_container_get_page_size_mask(vdev->bcontainer);
846
}
847

848

849
static void hiod_iommufd_vfio_class_init(ObjectClass *oc, void *data)
850
{
851
    HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc);
852

853
    hiodc->realize = hiod_iommufd_vfio_realize;
854
    hiodc->get_iova_ranges = hiod_iommufd_vfio_get_iova_ranges;
855
    hiodc->get_page_size_mask = hiod_iommufd_vfio_get_page_size_mask;
856
};
857

858
static const TypeInfo types[] = {
859
    {
860
        .name = TYPE_VFIO_IOMMU_IOMMUFD,
861
        .parent = TYPE_VFIO_IOMMU,
862
        .instance_size = sizeof(VFIOIOMMUFDContainer),
863
        .class_init = vfio_iommu_iommufd_class_init,
864
    }, {
865
        .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO,
866
        .parent = TYPE_HOST_IOMMU_DEVICE_IOMMUFD,
867
        .class_init = hiod_iommufd_vfio_class_init,
868
    }
869
};
870

871
DEFINE_TYPES(types)
872

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.