kvm-guest-drivers-windows
651 строка · 20.8 Кб
1/*
2* Packed virtio ring manipulation routines
3*
4* Copyright 2019 Red Hat, Inc.
5*
6* Authors:
7* Yuri Benditovich <ybendito@redhat.com>
8*
9* Redistribution and use in source and binary forms, with or without
10* modification, are permitted provided that the following conditions
11* are met :
12* 1. Redistributions of source code must retain the above copyright
13* notice, this list of conditions and the following disclaimer.
14* 2. Redistributions in binary form must reproduce the above copyright
15* notice, this list of conditions and the following disclaimer in the
16* documentation and / or other materials provided with the distribution.
17* 3. Neither the names of the copyright holders nor the names of their contributors
18* may be used to endorse or promote products derived from this software
19* without specific prior written permission.
20* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
21* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23* ARE DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE
24* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30* SUCH DAMAGE.
31*/
32
33#include "osdep.h"34#include "virtio_pci.h"35#include "virtio.h"36#include "kdebugprint.h"37#include "virtio_ring.h"38#include "windows\virtio_ring_allocation.h"39
40#include <pshpack1.h>41
42struct vring_packed_desc_event {43/* Descriptor Ring Change Event Offset/Wrap Counter. */44__le16 off_wrap;45/* Descriptor Ring Change Event Flags. */46__le16 flags;47};48
49struct vring_packed_desc {50/* Buffer Address. */51__virtio64 addr;52/* Buffer Length. */53__le32 len;54/* Buffer ID. */55__le16 id;56/* The flags depending on descriptor type. */57__le16 flags;58};59
60#include <poppack.h>61
62#define BUG_ON(condition) { if (condition) { KeBugCheck(0xE0E1E2E3); }}63#define BAD_RING(vq, fmt, ...) DPrintf(0, "%s: queue %d: " fmt, __FUNCTION__, vq->vq.index, __VA_ARGS__); BUG_ON(true)64
65/* This marks a buffer as continuing via the next field. */
66#define VRING_DESC_F_NEXT 167/* This marks a buffer as write-only (otherwise read-only). */
68#define VRING_DESC_F_WRITE 269/* This means the buffer contains a list of buffer descriptors. */
70#define VRING_DESC_F_INDIRECT 471
72/*
73* Mark a descriptor as available or used in packed ring.
74* Notice: they are defined as shifts instead of shifted values.
75*/
76#define VRING_PACKED_DESC_F_AVAIL 777#define VRING_PACKED_DESC_F_USED 1578
79/* Enable events in packed ring. */
80#define VRING_PACKED_EVENT_FLAG_ENABLE 0x081/* Disable events in packed ring. */
82#define VRING_PACKED_EVENT_FLAG_DISABLE 0x183
84/*
85* Enable events for a specific descriptor in packed ring.
86* (as specified by Descriptor Ring Change Event Offset/Wrap Counter).
87* Only valid if VIRTIO_RING_F_EVENT_IDX has been negotiated.
88*/
89#define VRING_PACKED_EVENT_FLAG_DESC 0x290/*91* Wrap counter bit shift in event suppression structure
92* of packed ring.
93*/
94#define VRING_PACKED_EVENT_F_WRAP_CTR 1595
96/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */
97/* Assuming a given event_idx value from the other side, if
98* we have just incremented index from old to new_idx,
99* should we trigger an event?
100*/
101static inline bool vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old)102{
103/* Note: Xen has similar logic for notification hold-off104* in include/xen/interface/io/ring.h with req_event and req_prod
105* corresponding to event_idx + 1 and new_idx respectively.
106* Note also that req_event and req_prod in Xen start at 1,
107* event indexes in virtio start at 0. */
108return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old);109}
110
111struct vring_desc_state_packed {112void *data; /* Data for callback. */113u16 num; /* Descriptor list length. */114u16 next; /* The next desc state in a list. */115u16 last; /* The last desc state in a list. */116};117
118struct virtqueue_packed {119struct virtqueue vq;120/* Number we've added since last sync. */121unsigned int num_added;122/* Head of free buffer list. */123unsigned int free_head;124/* Number of free descriptors */125unsigned int num_free;126/* Last used index we've seen. */127u16 last_used_idx;128/* Avail used flags. */129u16 avail_used_flags;130struct131{132/* Driver ring wrap counter. */133bool avail_wrap_counter;134/* Device ring wrap counter. */135bool used_wrap_counter;136/* Index of the next avail descriptor. */137u16 next_avail_idx;138/*139* Last written value to driver->flags in
140* guest byte order.
141*/
142u16 event_flags_shadow;143struct {144unsigned int num;145struct vring_packed_desc *desc;146struct vring_packed_desc_event *driver;147struct vring_packed_desc_event *device;148} vring;149/* Per-descriptor state. */150struct vring_desc_state_packed *desc_state;151} packed;152struct vring_desc_state_packed desc_states[];153};154
155#define packedvq(vq) ((struct virtqueue_packed *)vq)156
157unsigned int vring_control_block_size_packed(u16 qsize)158{
159return sizeof(struct virtqueue_packed) + sizeof(struct vring_desc_state_packed) * qsize;160}
161
162unsigned long vring_size_packed(unsigned int num, unsigned long align)163{
164/* array of descriptors */165unsigned long res = num * sizeof(struct vring_packed_desc);166/* driver and device event */167res += 2 * sizeof(struct vring_packed_desc_event);168return res;169}
170
171static int virtqueue_add_buf_packed(172struct virtqueue *_vq, /* the queue */173struct scatterlist sg[], /* sg array of length out + in */174unsigned int out, /* number of driver->device buffer descriptors in sg */175unsigned int in, /* number of device->driver buffer descriptors in sg */176void *opaque, /* later returned from virtqueue_get_buf */177void *va_indirect, /* VA of the indirect page or NULL */178ULONGLONG phys_indirect) /* PA of the indirect page or 0 */179{
180struct virtqueue_packed *vq = packedvq(_vq);181unsigned int descs_used;182struct vring_packed_desc *desc;183u16 head, id, i;184
185descs_used = out + in;186head = vq->packed.next_avail_idx;187id = (u16)vq->free_head;188
189BUG_ON(descs_used == 0);190BUG_ON(id >= vq->packed.vring.num);191
192if (va_indirect && vq->num_free > 0) {193desc = va_indirect;194for (i = 0; i < descs_used; i++) {195desc[i].flags = i < out ? 0 : VRING_DESC_F_WRITE;196desc[i].addr = sg[i].physAddr.QuadPart;197desc[i].len = sg[i].length;198}199vq->packed.vring.desc[head].addr = phys_indirect;200vq->packed.vring.desc[head].len = descs_used * sizeof(struct vring_packed_desc);201vq->packed.vring.desc[head].id = id;202
203KeMemoryBarrier();204vq->packed.vring.desc[head].flags = VRING_DESC_F_INDIRECT | vq->avail_used_flags;205
206DPrintf(5, "Added buffer head %i to Q%d\n", head, vq->vq.index);207head++;208if (head >= vq->packed.vring.num) {209head = 0;210vq->packed.avail_wrap_counter ^= 1;211vq->avail_used_flags ^=2121 << VRING_PACKED_DESC_F_AVAIL |2131 << VRING_PACKED_DESC_F_USED;214}215vq->packed.next_avail_idx = head;216/* We're using some buffers from the free list. */217vq->num_free -= 1;218vq->num_added += 1;219
220vq->free_head = vq->packed.desc_state[id].next;221
222/* Store token and indirect buffer state. */223vq->packed.desc_state[id].num = 1;224vq->packed.desc_state[id].data = opaque;225vq->packed.desc_state[id].last = id;226
227} else {228unsigned int n;229u16 curr, prev, head_flags;230if (vq->num_free < descs_used) {231DPrintf(6, "Can't add buffer to Q%d\n", vq->vq.index);232return -ENOSPC;233}234desc = vq->packed.vring.desc;235i = head;236curr = id;237for (n = 0; n < descs_used; n++) {238u16 flags = vq->avail_used_flags;239flags |= n < out ? 0 : VRING_DESC_F_WRITE;240if (n != descs_used - 1) {241flags |= VRING_DESC_F_NEXT;242}243desc[i].addr = sg[n].physAddr.QuadPart;244desc[i].len = sg[n].length;245desc[i].id = id;246if (n == 0) {247head_flags = flags;248}249else {250desc[i].flags = flags;251}252
253prev = curr;254curr = vq->packed.desc_state[curr].next;255
256if (++i >= vq->packed.vring.num) {257i = 0;258vq->avail_used_flags ^=2591 << VRING_PACKED_DESC_F_AVAIL |2601 << VRING_PACKED_DESC_F_USED;261}262}263
264if (i < head)265vq->packed.avail_wrap_counter ^= 1;266
267/* We're using some buffers from the free list. */268vq->num_free -= descs_used;269
270/* Update free pointer */271vq->packed.next_avail_idx = i;272vq->free_head = curr;273
274/* Store token. */275vq->packed.desc_state[id].num = (u16)descs_used;276vq->packed.desc_state[id].data = opaque;277vq->packed.desc_state[id].last = prev;278
279/*280* A driver MUST NOT make the first descriptor in the list
281* available before all subsequent descriptors comprising
282* the list are made available.
283*/
284KeMemoryBarrier();285vq->packed.vring.desc[head].flags = head_flags;286vq->num_added += descs_used;287
288DPrintf(5, "Added buffer head @%i+%d to Q%d\n", head, descs_used, vq->vq.index);289}290
291return 0;292}
293
294static void detach_buf_packed(struct virtqueue_packed *vq, unsigned int id)295{
296struct vring_desc_state_packed *state = &vq->packed.desc_state[id];297
298/* Clear data ptr. */299state->data = NULL;300
301vq->packed.desc_state[state->last].next = (u16)vq->free_head;302vq->free_head = id;303vq->num_free += state->num;304}
305
306static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)307{
308struct virtqueue_packed *vq = packedvq(_vq);309unsigned int i;310void *buf;311
312for (i = 0; i < vq->packed.vring.num; i++) {313if (!vq->packed.desc_state[i].data)314continue;315/* detach_buf clears data, so grab it now. */316buf = vq->packed.desc_state[i].data;317detach_buf_packed(vq, i);318return buf;319}320/* That should have freed everything. */321BUG_ON(vq->num_free != vq->packed.vring.num);322
323return NULL;324}
325
326static void virtqueue_disable_cb_packed(struct virtqueue *_vq)327{
328struct virtqueue_packed *vq = packedvq(_vq);329
330if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {331vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;332vq->packed.vring.driver->flags = vq->packed.event_flags_shadow;333}334}
335
336static inline bool is_used_desc_packed(const struct virtqueue_packed *vq,337u16 idx, bool used_wrap_counter)338{
339bool avail, used;340u16 flags;341
342flags = vq->packed.vring.desc[idx].flags;343avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));344used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));345
346return avail == used && used == used_wrap_counter;347}
348
349static inline bool virtqueue_poll_packed(struct virtqueue_packed *vq, u16 off_wrap)350{
351bool wrap_counter;352u16 used_idx;353KeMemoryBarrier();354
355wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;356used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);357
358return is_used_desc_packed(vq, used_idx, wrap_counter);359
360}
361
362static inline unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue_packed *vq)363{
364bool event_suppression_enabled = vq->vq.vdev->event_suppression_enabled;365/*366* We optimistically turn back on interrupts, then check if there was
367* more to do.
368*/
369
370if (event_suppression_enabled) {371vq->packed.vring.driver->off_wrap =372vq->last_used_idx |373(vq->packed.used_wrap_counter <<374VRING_PACKED_EVENT_F_WRAP_CTR);375/*376* We need to update event offset and event wrap
377* counter first before updating event flags.
378*/
379KeMemoryBarrier();380}381
382if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {383vq->packed.event_flags_shadow = event_suppression_enabled ?384VRING_PACKED_EVENT_FLAG_DESC :385VRING_PACKED_EVENT_FLAG_ENABLE;386vq->packed.vring.driver->flags = vq->packed.event_flags_shadow;387}388
389return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter <<390VRING_PACKED_EVENT_F_WRAP_CTR);391}
392
393static bool virtqueue_enable_cb_packed(struct virtqueue *_vq)394{
395struct virtqueue_packed *vq = packedvq(_vq);396unsigned last_used_idx = virtqueue_enable_cb_prepare_packed(vq);397
398return !virtqueue_poll_packed(vq, (u16)last_used_idx);399}
400
401static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)402{
403struct virtqueue_packed *vq = packedvq(_vq);404bool event_suppression_enabled = vq->vq.vdev->event_suppression_enabled;405u16 used_idx, wrap_counter;406u16 bufs;407
408/*409* We optimistically turn back on interrupts, then check if there was
410* more to do.
411*/
412
413if (event_suppression_enabled) {414/* TODO: tune this threshold */415bufs = (vq->packed.vring.num - vq->num_free) * 3 / 4;416wrap_counter = vq->packed.used_wrap_counter;417
418used_idx = vq->last_used_idx + bufs;419if (used_idx >= vq->packed.vring.num) {420used_idx -= (u16)vq->packed.vring.num;421wrap_counter ^= 1;422}423
424vq->packed.vring.driver->off_wrap = used_idx |425(wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR);426
427/*428* We need to update event offset and event wrap
429* counter first before updating event flags.
430*/
431KeMemoryBarrier();432}433
434if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {435vq->packed.event_flags_shadow = event_suppression_enabled ?436VRING_PACKED_EVENT_FLAG_DESC :437VRING_PACKED_EVENT_FLAG_ENABLE;438vq->packed.vring.driver->flags = vq->packed.event_flags_shadow;439}440
441/*442* We need to update event suppression structure first
443* before re-checking for more used buffers.
444*/
445KeMemoryBarrier();446
447if (is_used_desc_packed(vq,448vq->last_used_idx,449vq->packed.used_wrap_counter)) {450return false;451}452
453return true;454}
455
456static BOOLEAN virtqueue_is_interrupt_enabled_packed(struct virtqueue *_vq)457{
458struct virtqueue_packed *vq = packedvq(_vq);459return vq->packed.event_flags_shadow & VRING_PACKED_EVENT_FLAG_DISABLE;460}
461
462static void virtqueue_shutdown_packed(struct virtqueue *_vq)463{
464struct virtqueue_packed *vq = packedvq(_vq);465unsigned int num = vq->packed.vring.num;466void *pages = vq->packed.vring.desc;467unsigned int vring_align = _vq->vdev->addr ? PAGE_SIZE : SMP_CACHE_BYTES;468
469RtlZeroMemory(pages, vring_size_packed(num, vring_align));470vring_new_virtqueue_packed(471_vq->index,472num,473vring_align,474_vq->vdev,475pages,476_vq->notification_cb,477_vq);478}
479
480static inline bool more_used_packed(const struct virtqueue_packed *vq)481{
482return is_used_desc_packed(vq, vq->last_used_idx,483vq->packed.used_wrap_counter);484}
485
486static void *virtqueue_get_buf_packed(487struct virtqueue *_vq, /* the queue */488unsigned int *len) /* number of bytes returned by the device */489{
490struct virtqueue_packed *vq = packedvq(_vq);491u16 last_used, id;492void *ret;493
494if (!more_used_packed(vq)) {495DPrintf(6, "%s: No more buffers in queue\n", __FUNCTION__);496return NULL;497}498
499/* Only get used elements after they have been exposed by host. */500KeMemoryBarrier();501
502last_used = vq->last_used_idx;503id = vq->packed.vring.desc[last_used].id;504*len = vq->packed.vring.desc[last_used].len;505
506if (id >= vq->packed.vring.num) {507BAD_RING(vq, "id %u out of range\n", id);508return NULL;509}510if (!vq->packed.desc_state[id].data) {511BAD_RING(vq, "id %u is not a head!\n", id);512return NULL;513}514
515/* detach_buf_packed clears data, so grab it now. */516ret = vq->packed.desc_state[id].data;517detach_buf_packed(vq, id);518
519vq->last_used_idx += vq->packed.desc_state[id].num;520if (vq->last_used_idx >= vq->packed.vring.num) {521vq->last_used_idx -= (u16)vq->packed.vring.num;522vq->packed.used_wrap_counter ^= 1;523}524
525/*526* If we expect an interrupt for the next entry, tell host
527* by writing event index and flush out the write before
528* the read in the next get_buf call.
529*/
530if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) {531vq->packed.vring.driver->off_wrap = vq->last_used_idx |532((u16)vq->packed.used_wrap_counter <<533VRING_PACKED_EVENT_F_WRAP_CTR);534KeMemoryBarrier();535}536
537return ret;538}
539
540static BOOLEAN virtqueue_has_buf_packed(struct virtqueue *_vq)541{
542struct virtqueue_packed *vq = packedvq(_vq);543return more_used_packed(vq);544}
545
546static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)547{
548struct virtqueue_packed *vq = packedvq(_vq);549u16 new, old, off_wrap, flags, wrap_counter, event_idx;550bool needs_kick;551union {552struct {553__le16 off_wrap;554__le16 flags;555};556u32 value32;557} snapshot;558
559/*560* We need to expose the new flags value before checking notification
561* suppressions.
562*/
563KeMemoryBarrier();564
565old = vq->packed.next_avail_idx - vq->num_added;566new = vq->packed.next_avail_idx;567vq->num_added = 0;568
569snapshot.value32 = *(u32 *)vq->packed.vring.device;570flags = snapshot.flags;571
572if (flags != VRING_PACKED_EVENT_FLAG_DESC) {573needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);574goto out;575}576
577off_wrap = snapshot.off_wrap;578
579wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;580event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);581if (wrap_counter != vq->packed.avail_wrap_counter)582event_idx -= (u16)vq->packed.vring.num;583
584needs_kick = vring_need_event(event_idx, new, old);585out:586return needs_kick;587}
588
589static void virtqueue_kick_always_packed(struct virtqueue *_vq)590{
591struct virtqueue_packed *vq = packedvq(_vq);592KeMemoryBarrier();593vq->num_added = 0;594virtqueue_notify(_vq);595}
596
597/* Initializes a new virtqueue using already allocated memory */
598struct virtqueue *vring_new_virtqueue_packed(599unsigned int index, /* virtqueue index */600unsigned int num, /* virtqueue size (always a power of 2) */601unsigned int vring_align, /* vring alignment requirement */602VirtIODevice *vdev, /* the virtio device owning the queue */603void *pages, /* vring memory */604void(*notify)(struct virtqueue *), /* notification callback */605void *control) /* virtqueue memory */606{
607struct virtqueue_packed *vq = packedvq(control);608unsigned int i;609
610vq->vq.vdev = vdev;611vq->vq.notification_cb = notify;612vq->vq.index = index;613
614vq->vq.avail_va = (u8 *)pages + num * sizeof(struct vring_packed_desc);615vq->vq.used_va = (u8 *)vq->vq.avail_va + sizeof(struct vring_packed_desc_event);616
617/* initialize the ring */618vq->packed.vring.num = num;619vq->packed.vring.desc = pages;620vq->packed.vring.driver = vq->vq.avail_va;621vq->packed.vring.device = vq->vq.used_va;622
623vq->num_free = num;624vq->free_head = 0;625vq->num_added = 0;626vq->packed.avail_wrap_counter = 1;627vq->packed.used_wrap_counter = 1;628vq->last_used_idx = 0;629vq->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;630vq->packed.next_avail_idx = 0;631vq->packed.event_flags_shadow = 0;632vq->packed.desc_state = vq->desc_states;633
634RtlZeroMemory(vq->packed.desc_state, num * sizeof(*vq->packed.desc_state));635for (i = 0; i < num - 1; i++) {636vq->packed.desc_state[i].next = i + 1;637}638
639vq->vq.add_buf = virtqueue_add_buf_packed;640vq->vq.detach_unused_buf = virtqueue_detach_unused_buf_packed;641vq->vq.disable_cb = virtqueue_disable_cb_packed;642vq->vq.enable_cb = virtqueue_enable_cb_packed;643vq->vq.enable_cb_delayed = virtqueue_enable_cb_delayed_packed;644vq->vq.get_buf = virtqueue_get_buf_packed;645vq->vq.has_buf = virtqueue_has_buf_packed;646vq->vq.is_interrupt_enabled = virtqueue_is_interrupt_enabled_packed;647vq->vq.kick_always = virtqueue_kick_always_packed;648vq->vq.kick_prepare = virtqueue_kick_prepare_packed;649vq->vq.shutdown = virtqueue_shutdown_packed;650return &vq->vq;651}
652