kvm-guest-drivers-windows
562 строки · 18.4 Кб
1/*
2* Virtio ring manipulation routines
3*
4* Copyright 2017 Red Hat, Inc.
5*
6* Authors:
7* Ladi Prosek <lprosek@redhat.com>
8*
9* Redistribution and use in source and binary forms, with or without
10* modification, are permitted provided that the following conditions
11* are met :
12* 1. Redistributions of source code must retain the above copyright
13* notice, this list of conditions and the following disclaimer.
14* 2. Redistributions in binary form must reproduce the above copyright
15* notice, this list of conditions and the following disclaimer in the
16* documentation and / or other materials provided with the distribution.
17* 3. Neither the names of the copyright holders nor the names of their contributors
18* may be used to endorse or promote products derived from this software
19* without specific prior written permission.
20* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
21* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23* ARE DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE
24* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30* SUCH DAMAGE.
31*/
32#include "osdep.h"33#include "virtio_pci.h"34#include "virtio.h"35#include "kdebugprint.h"36#include "virtio_ring.h"37#include "windows\virtio_ring_allocation.h"38
39#define DESC_INDEX(num, i) ((i) & ((num) - 1))40
41/* This marks a buffer as continuing via the next field. */42#define VIRTQ_DESC_F_NEXT 143/* This marks a buffer as write-only (otherwise read-only). */
44#define VIRTQ_DESC_F_WRITE 245/* This means the buffer contains a list of buffer descriptors. */
46#define VIRTQ_DESC_F_INDIRECT 447
48/* The Host uses this in used->flags to advise the Guest: don't kick me when
49* you add a buffer. It's unreliable, so it's simply an optimization. Guest
50* will still kick if it's out of buffers. */
51#define VIRTQ_USED_F_NO_NOTIFY 152/* The Guest uses this in avail->flags to advise the Host: don't interrupt me
53* when you consume a buffer. It's unreliable, so it's simply an
54* optimization. */
55#define VIRTQ_AVAIL_F_NO_INTERRUPT 156
57#pragma warning (push)58#pragma warning (disable:4200)59
60#include <pshpack1.h>61
62/* Virtio ring descriptors: 16 bytes. These can chain together via "next". */
63struct vring_desc {64/* Address (guest-physical). */65__virtio64 addr;66/* Length. */67__virtio32 len;68/* The flags as indicated above. */69__virtio16 flags;70/* We chain unused descriptors via this, too */71__virtio16 next;72};73
74struct vring_avail {75__virtio16 flags;76__virtio16 idx;77__virtio16 ring[];78};79
80/* u32 is used here for ids for padding reasons. */
81struct vring_used_elem {82/* Index of start of used descriptor chain. */83__virtio32 id;84/* Total length of the descriptor chain which was used (written to) */85__virtio32 len;86};87
88struct vring_used {89__virtio16 flags;90__virtio16 idx;91struct vring_used_elem ring[];92};93
94#include <poppack.h>95
96/* Alignment requirements for vring elements.
97* When using pre-virtio 1.0 layout, these fall out naturally.
98*/
99#define VRING_AVAIL_ALIGN_SIZE 2100#define VRING_USED_ALIGN_SIZE 4101#define VRING_DESC_ALIGN_SIZE 16102
103/* The standard layout for the ring is a continuous chunk of memory which looks
104* like this. We assume num is a power of 2.
105*
106* struct vring
107* {
108* // The actual descriptors (16 bytes each)
109* struct vring_desc desc[num];
110*
111* // A ring of available descriptor heads with free-running index.
112* __virtio16 avail_flags;
113* __virtio16 avail_idx;
114* __virtio16 available[num];
115* __virtio16 used_event_idx;
116*
117* // Padding to the next align boundary.
118* char pad[];
119*
120* // A ring of used descriptor heads with free-running index.
121* __virtio16 used_flags;
122* __virtio16 used_idx;
123* struct vring_used_elem used[num];
124* __virtio16 avail_event_idx;
125* };
126*/
127/* We publish the used event index at the end of the available ring, and vice
128* versa. They are at the end for backwards compatibility. */
129
130struct vring {131unsigned int num;132
133struct vring_desc *desc;134
135struct vring_avail *avail;136
137struct vring_used *used;138};139
140#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num])141#define vring_avail_event(vr) (*(__virtio16 *)&(vr)->used->ring[(vr)->num])142
143static inline void vring_init(struct vring *vr, unsigned int num, void *p,144unsigned long align)145{
146vr->num = num;147vr->desc = (struct vring_desc *)p;148vr->avail = (struct vring_avail *)((__u8 *)p + num * sizeof(struct vring_desc));149vr->used = (struct vring_used *)(((ULONG_PTR)&vr->avail->ring[num] + sizeof(__virtio16)150+ align - 1) & ~((ULONG_PTR)align - 1));151}
152
153static inline unsigned vring_size_split(unsigned int num, unsigned long align)154{
155#pragma warning (push)156#pragma warning (disable:4319)157return ((sizeof(struct vring_desc) * num + sizeof(__virtio16) * (3 + num)158+ align - 1) & ~(align - 1))159+ sizeof(__virtio16) * 3 + sizeof(struct vring_used_elem) * num;160#pragma warning(pop)161}
162
163/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */
164/* Assuming a given event_idx value from the other side, if
165* we have just incremented index from old to new_idx,
166* should we trigger an event? */
167static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old)168{
169/* Note: Xen has similar logic for notification hold-off170* in include/xen/interface/io/ring.h with req_event and req_prod
171* corresponding to event_idx + 1 and new_idx respectively.
172* Note also that req_event and req_prod in Xen start at 1,
173* event indexes in virtio start at 0. */
174return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old);175}
176
177struct virtqueue_split {178struct virtqueue vq;179struct vring vring;180struct {181u16 flags;182u16 idx;183} master_vring_avail;184unsigned int num_unused;185unsigned int num_added_since_kick;186u16 first_unused;187u16 last_used;188void *opaque[];189};190
191#define splitvq(vq) ((struct virtqueue_split *)vq)192
193#pragma warning (pop)194
195/* Returns the index of the first unused descriptor */196static inline u16 get_unused_desc(struct virtqueue_split *vq)197{
198u16 idx = vq->first_unused;199ASSERT(vq->num_unused > 0);200
201vq->first_unused = vq->vring.desc[idx].next;202vq->num_unused--;203return idx;204}
205
206/* Marks the descriptor chain starting at index idx as unused */
207static inline void put_unused_desc_chain(struct virtqueue_split *vq, u16 idx)208{
209u16 start = idx;210
211vq->opaque[idx] = NULL;212while (vq->vring.desc[idx].flags & VIRTQ_DESC_F_NEXT) {213idx = vq->vring.desc[idx].next;214vq->num_unused++;215}216
217vq->vring.desc[idx].flags = VIRTQ_DESC_F_NEXT;218vq->vring.desc[idx].next = vq->first_unused;219vq->num_unused++;220
221vq->first_unused = start;222}
223
224/* Adds a buffer to a virtqueue, returns 0 on success, negative number on error */
225static int virtqueue_add_buf_split(226struct virtqueue *_vq, /* the queue */227struct scatterlist sg[], /* sg array of length out + in */228unsigned int out, /* number of driver->device buffer descriptors in sg */229unsigned int in, /* number of device->driver buffer descriptors in sg */230void *opaque, /* later returned from virtqueue_get_buf */231void *va_indirect, /* VA of the indirect page or NULL */232ULONGLONG phys_indirect) /* PA of the indirect page or 0 */233{
234struct virtqueue_split *vq = splitvq(_vq);235struct vring *vring = &vq->vring;236unsigned int i;237u16 idx;238
239if (va_indirect && (out + in) > 1 && vq->num_unused > 0) {240/* Use one indirect descriptor */241struct vring_desc *desc = (struct vring_desc *)va_indirect;242
243for (i = 0; i < out + in; i++) {244desc[i].flags = (i < out ? 0 : VIRTQ_DESC_F_WRITE);245desc[i].flags |= VIRTQ_DESC_F_NEXT;246desc[i].addr = sg[i].physAddr.QuadPart;247desc[i].len = sg[i].length;248desc[i].next = (u16)i + 1;249}250desc[i - 1].flags &= ~VIRTQ_DESC_F_NEXT;251
252idx = get_unused_desc(vq);253vq->vring.desc[idx].flags = VIRTQ_DESC_F_INDIRECT;254vq->vring.desc[idx].addr = phys_indirect;255vq->vring.desc[idx].len = i * sizeof(struct vring_desc);256
257vq->opaque[idx] = opaque;258} else {259u16 last_idx;260
261/* Use out + in regular descriptors */262if (out + in > vq->num_unused) {263return -ENOSPC;264}265
266/* First descriptor */267idx = last_idx = get_unused_desc(vq);268vq->opaque[idx] = opaque;269
270vring->desc[idx].addr = sg[0].physAddr.QuadPart;271vring->desc[idx].len = sg[0].length;272vring->desc[idx].flags = VIRTQ_DESC_F_NEXT;273if (out == 0) {274vring->desc[idx].flags |= VIRTQ_DESC_F_WRITE;275}276vring->desc[idx].next = vq->first_unused;277
278/* The rest of descriptors */279for (i = 1; i < out + in; i++) {280last_idx = get_unused_desc(vq);281
282vring->desc[last_idx].addr = sg[i].physAddr.QuadPart;283vring->desc[last_idx].len = sg[i].length;284vring->desc[last_idx].flags = VIRTQ_DESC_F_NEXT;285if (i >= out) {286vring->desc[last_idx].flags |= VIRTQ_DESC_F_WRITE;287}288vring->desc[last_idx].next = vq->first_unused;289}290vring->desc[last_idx].flags &= ~VIRTQ_DESC_F_NEXT;291}292
293/* Write the first descriptor into the available ring */294vring->avail->ring[DESC_INDEX(vring->num, vq->master_vring_avail.idx)] = idx;295KeMemoryBarrier();296vring->avail->idx = ++vq->master_vring_avail.idx;297vq->num_added_since_kick++;298
299return 0;300}
301
302/* Gets the opaque pointer associated with a returned buffer, or NULL if no buffer is available */
303static void *virtqueue_get_buf_split(304struct virtqueue *_vq, /* the queue */305unsigned int *len) /* number of bytes returned by the device */306{
307struct virtqueue_split *vq = splitvq(_vq);308void *opaque;309u16 idx;310
311if (vq->last_used == (int)vq->vring.used->idx) {312/* No descriptor index in the used ring */313return NULL;314}315KeMemoryBarrier();316
317idx = DESC_INDEX(vq->vring.num, vq->last_used);318*len = vq->vring.used->ring[idx].len;319
320/* Get the first used descriptor */321idx = (u16)vq->vring.used->ring[idx].id;322opaque = vq->opaque[idx];323
324/* Put all descriptors back to the free list */325put_unused_desc_chain(vq, idx);326
327vq->last_used++;328if (_vq->vdev->event_suppression_enabled && virtqueue_is_interrupt_enabled(_vq)) {329vring_used_event(&vq->vring) = vq->last_used;330KeMemoryBarrier();331}332
333ASSERT(opaque != NULL);334return opaque;335}
336
337/* Returns true if at least one returned buffer is available, false otherwise */
338static BOOLEAN virtqueue_has_buf_split(struct virtqueue *_vq)339{
340struct virtqueue_split *vq = splitvq(_vq);341return (vq->last_used != vq->vring.used->idx);342}
343
344/* Returns true if the device should be notified, false otherwise */
345static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)346{
347struct virtqueue_split *vq = splitvq(_vq);348bool wrap_around;349u16 old, new;350KeMemoryBarrier();351
352wrap_around = (vq->num_added_since_kick >= (1 << 16));353
354old = (u16)(vq->master_vring_avail.idx - vq->num_added_since_kick);355new = vq->master_vring_avail.idx;356vq->num_added_since_kick = 0;357
358if (_vq->vdev->event_suppression_enabled) {359return wrap_around || (bool)vring_need_event(vring_avail_event(&vq->vring), new, old);360} else {361return !(vq->vring.used->flags & VIRTQ_USED_F_NO_NOTIFY);362}363}
364
365/* Notifies the device even if it's not necessary according to the event suppression logic */
366static void virtqueue_kick_always_split(struct virtqueue *_vq)367{
368struct virtqueue_split *vq = splitvq(_vq);369KeMemoryBarrier();370vq->num_added_since_kick = 0;371virtqueue_notify(_vq);372}
373
374/* Enables interrupts on a virtqueue and returns false if the queue has at least one returned
375* buffer available to be fetched by virtqueue_get_buf, true otherwise */
376static bool virtqueue_enable_cb_split(struct virtqueue *_vq)377{
378struct virtqueue_split *vq = splitvq(_vq);379if (!virtqueue_is_interrupt_enabled(_vq)) {380vq->master_vring_avail.flags &= ~VIRTQ_AVAIL_F_NO_INTERRUPT;381if (!_vq->vdev->event_suppression_enabled)382{383vq->vring.avail->flags = vq->master_vring_avail.flags;384}385}386
387vring_used_event(&vq->vring) = vq->last_used;388KeMemoryBarrier();389return (vq->last_used == vq->vring.used->idx);390}
391
392/* Enables interrupts on a virtqueue after ~3/4 of the currently pushed buffers have been
393* returned, returns false if this condition currently holds, false otherwise */
394static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)395{
396struct virtqueue_split *vq = splitvq(_vq);397u16 bufs;398
399if (!virtqueue_is_interrupt_enabled(_vq)) {400vq->master_vring_avail.flags &= ~VIRTQ_AVAIL_F_NO_INTERRUPT;401if (!_vq->vdev->event_suppression_enabled)402{403vq->vring.avail->flags = vq->master_vring_avail.flags;404}405}406
407/* Note that 3/4 is an arbitrary threshold */408bufs = (u16)(vq->master_vring_avail.idx - vq->last_used) * 3 / 4;409vring_used_event(&vq->vring) = vq->last_used + bufs;410KeMemoryBarrier();411return ((vq->vring.used->idx - vq->last_used) <= bufs);412}
413
414/* Disables interrupts on a virtqueue */
415static void virtqueue_disable_cb_split(struct virtqueue *_vq)416{
417struct virtqueue_split *vq = splitvq(_vq);418if (virtqueue_is_interrupt_enabled(_vq)) {419vq->master_vring_avail.flags |= VIRTQ_AVAIL_F_NO_INTERRUPT;420if (!_vq->vdev->event_suppression_enabled)421{422vq->vring.avail->flags = vq->master_vring_avail.flags;423}424}425}
426
427/* Returns true if interrupts are enabled on a virtqueue, false otherwise */
428static BOOLEAN virtqueue_is_interrupt_enabled_split(struct virtqueue *_vq)429{
430struct virtqueue_split *vq = splitvq(_vq);431return !(vq->master_vring_avail.flags & VIRTQ_AVAIL_F_NO_INTERRUPT);432}
433
434/* Re-initializes an already initialized virtqueue */
435static void virtqueue_shutdown_split(struct virtqueue *_vq)436{
437struct virtqueue_split *vq = splitvq(_vq);438unsigned int num = vq->vring.num;439void *pages = vq->vring.desc;440unsigned int vring_align = _vq->vdev->addr ? PAGE_SIZE : SMP_CACHE_BYTES;441
442RtlZeroMemory(pages, vring_size_split(num, vring_align));443(void)vring_new_virtqueue_split(444_vq->index,445vq->vring.num,446vring_align,447_vq->vdev,448pages,449_vq->notification_cb,450vq);451}
452
453/* Gets the opaque pointer associated with a not-yet-returned buffer, or NULL if no buffer is available
454* to aid drivers with cleaning up all data on virtqueue shutdown */
455static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)456{
457struct virtqueue_split *vq = splitvq(_vq);458u16 idx;459void *opaque = NULL;460
461for (idx = 0; idx < (u16)vq->vring.num; idx++) {462opaque = vq->opaque[idx];463if (opaque) {464put_unused_desc_chain(vq, idx);465vq->vring.avail->idx = --vq->master_vring_avail.idx;466break;467}468}469return opaque;470}
471
472/* Returns the size of the virtqueue structure including
473* additional size for per-descriptor data */
474unsigned int vring_control_block_size(u16 qsize, bool packed)475{
476unsigned int res;477if (packed) {478return vring_control_block_size_packed(qsize);479}480res = sizeof(struct virtqueue_split);481res += sizeof(void *) * qsize;482return res;483}
484
485/* Initializes a new virtqueue using already allocated memory */
486struct virtqueue *vring_new_virtqueue_split(487unsigned int index, /* virtqueue index */488unsigned int num, /* virtqueue size (always a power of 2) */489unsigned int vring_align, /* vring alignment requirement */490VirtIODevice *vdev, /* the virtio device owning the queue */491void *pages, /* vring memory */492void(*notify)(struct virtqueue *), /* notification callback */493void *control) /* virtqueue memory */494{
495struct virtqueue_split *vq = splitvq(control);496u16 i;497
498if (DESC_INDEX(num, num) != 0) {499DPrintf(0, "Virtqueue length %u is not a power of 2\n", num);500return NULL;501}502
503RtlZeroMemory(vq, sizeof(*vq) + num * sizeof(void *));504
505vring_init(&vq->vring, num, pages, vring_align);506vq->vq.vdev = vdev;507vq->vq.notification_cb = notify;508vq->vq.index = index;509
510/* Build a linked list of unused descriptors */511vq->num_unused = num;512vq->first_unused = 0;513for (i = 0; i < num - 1; i++) {514vq->vring.desc[i].flags = VIRTQ_DESC_F_NEXT;515vq->vring.desc[i].next = i + 1;516}517vq->vq.avail_va = vq->vring.avail;518vq->vq.used_va = vq->vring.used;519vq->vq.add_buf = virtqueue_add_buf_split;520vq->vq.detach_unused_buf = virtqueue_detach_unused_buf_split;521vq->vq.disable_cb = virtqueue_disable_cb_split;522vq->vq.enable_cb = virtqueue_enable_cb_split;523vq->vq.enable_cb_delayed = virtqueue_enable_cb_delayed_split;524vq->vq.get_buf = virtqueue_get_buf_split;525vq->vq.has_buf = virtqueue_has_buf_split;526vq->vq.is_interrupt_enabled = virtqueue_is_interrupt_enabled_split;527vq->vq.kick_always = virtqueue_kick_always_split;528vq->vq.kick_prepare = virtqueue_kick_prepare_split;529vq->vq.shutdown = virtqueue_shutdown_split;530return &vq->vq;531}
532
533/* Negotiates virtio transport features */
534void vring_transport_features(535VirtIODevice *vdev,536u64 *features) /* points to device features on entry and driver accepted features on return */537{
538unsigned int i;539
540for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {541if (i != VIRTIO_RING_F_INDIRECT_DESC &&542i != VIRTIO_RING_F_EVENT_IDX &&543i != VIRTIO_F_VERSION_1) {544virtio_feature_disable(*features, i);545}546}547}
548
549/* Returns the max number of scatter-gather elements that fit in an indirect pages */
550u32 virtio_get_indirect_page_capacity()551{
552return PAGE_SIZE / sizeof(struct vring_desc);553}
554
555unsigned long vring_size(unsigned int num, unsigned long align, bool packed)556{
557if (packed) {558return vring_size_packed(num, align);559} else {560return vring_size_split(num, align);561}562}
563