xref: /kvmtool/include/linux/virtio_ring.h (revision 1a992bbaab08994e12a7594b0c39535152093a6b)
1*1a992bbaSAndre Przywara #ifndef _UAPI_LINUX_VIRTIO_RING_H
2*1a992bbaSAndre Przywara #define _UAPI_LINUX_VIRTIO_RING_H
3*1a992bbaSAndre Przywara /* An interface for efficient virtio implementation, currently for use by KVM,
4*1a992bbaSAndre Przywara  * but hopefully others soon.  Do NOT change this since it will
5*1a992bbaSAndre Przywara  * break existing servers and clients.
6*1a992bbaSAndre Przywara  *
7*1a992bbaSAndre Przywara  * This header is BSD licensed so anyone can use the definitions to implement
8*1a992bbaSAndre Przywara  * compatible drivers/servers.
9*1a992bbaSAndre Przywara  *
10*1a992bbaSAndre Przywara  * Redistribution and use in source and binary forms, with or without
11*1a992bbaSAndre Przywara  * modification, are permitted provided that the following conditions
12*1a992bbaSAndre Przywara  * are met:
13*1a992bbaSAndre Przywara  * 1. Redistributions of source code must retain the above copyright
14*1a992bbaSAndre Przywara  *    notice, this list of conditions and the following disclaimer.
15*1a992bbaSAndre Przywara  * 2. Redistributions in binary form must reproduce the above copyright
16*1a992bbaSAndre Przywara  *    notice, this list of conditions and the following disclaimer in the
17*1a992bbaSAndre Przywara  *    documentation and/or other materials provided with the distribution.
18*1a992bbaSAndre Przywara  * 3. Neither the name of IBM nor the names of its contributors
19*1a992bbaSAndre Przywara  *    may be used to endorse or promote products derived from this software
20*1a992bbaSAndre Przywara  *    without specific prior written permission.
21*1a992bbaSAndre Przywara  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
22*1a992bbaSAndre Przywara  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23*1a992bbaSAndre Przywara  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24*1a992bbaSAndre Przywara  * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
25*1a992bbaSAndre Przywara  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26*1a992bbaSAndre Przywara  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27*1a992bbaSAndre Przywara  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28*1a992bbaSAndre Przywara  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29*1a992bbaSAndre Przywara  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30*1a992bbaSAndre Przywara  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31*1a992bbaSAndre Przywara  * SUCH DAMAGE.
32*1a992bbaSAndre Przywara  *
33*1a992bbaSAndre Przywara  * Copyright Rusty Russell IBM Corporation 2007. */
34*1a992bbaSAndre Przywara #ifndef __KERNEL__
35*1a992bbaSAndre Przywara #include <stdint.h>
36*1a992bbaSAndre Przywara #endif
37*1a992bbaSAndre Przywara #include <linux/types.h>
38*1a992bbaSAndre Przywara #include <linux/virtio_types.h>
39*1a992bbaSAndre Przywara 
40*1a992bbaSAndre Przywara /* This marks a buffer as continuing via the next field. */
41*1a992bbaSAndre Przywara #define VRING_DESC_F_NEXT	1
42*1a992bbaSAndre Przywara /* This marks a buffer as write-only (otherwise read-only). */
43*1a992bbaSAndre Przywara #define VRING_DESC_F_WRITE	2
44*1a992bbaSAndre Przywara /* This means the buffer contains a list of buffer descriptors. */
45*1a992bbaSAndre Przywara #define VRING_DESC_F_INDIRECT	4
46*1a992bbaSAndre Przywara 
47*1a992bbaSAndre Przywara /*
48*1a992bbaSAndre Przywara  * Mark a descriptor as available or used in packed ring.
49*1a992bbaSAndre Przywara  * Notice: they are defined as shifts instead of shifted values.
50*1a992bbaSAndre Przywara  */
51*1a992bbaSAndre Przywara #define VRING_PACKED_DESC_F_AVAIL	7
52*1a992bbaSAndre Przywara #define VRING_PACKED_DESC_F_USED	15
53*1a992bbaSAndre Przywara 
54*1a992bbaSAndre Przywara /* The Host uses this in used->flags to advise the Guest: don't kick me when
55*1a992bbaSAndre Przywara  * you add a buffer.  It's unreliable, so it's simply an optimization.  Guest
56*1a992bbaSAndre Przywara  * will still kick if it's out of buffers. */
57*1a992bbaSAndre Przywara #define VRING_USED_F_NO_NOTIFY	1
58*1a992bbaSAndre Przywara /* The Guest uses this in avail->flags to advise the Host: don't interrupt me
59*1a992bbaSAndre Przywara  * when you consume a buffer.  It's unreliable, so it's simply an
60*1a992bbaSAndre Przywara  * optimization.  */
61*1a992bbaSAndre Przywara #define VRING_AVAIL_F_NO_INTERRUPT	1
62*1a992bbaSAndre Przywara 
63*1a992bbaSAndre Przywara /* Enable events in packed ring. */
64*1a992bbaSAndre Przywara #define VRING_PACKED_EVENT_FLAG_ENABLE	0x0
65*1a992bbaSAndre Przywara /* Disable events in packed ring. */
66*1a992bbaSAndre Przywara #define VRING_PACKED_EVENT_FLAG_DISABLE	0x1
67*1a992bbaSAndre Przywara /*
68*1a992bbaSAndre Przywara  * Enable events for a specific descriptor in packed ring.
69*1a992bbaSAndre Przywara  * (as specified by Descriptor Ring Change Event Offset/Wrap Counter).
70*1a992bbaSAndre Przywara  * Only valid if VIRTIO_RING_F_EVENT_IDX has been negotiated.
71*1a992bbaSAndre Przywara  */
72*1a992bbaSAndre Przywara #define VRING_PACKED_EVENT_FLAG_DESC	0x2
73*1a992bbaSAndre Przywara 
74*1a992bbaSAndre Przywara /*
75*1a992bbaSAndre Przywara  * Wrap counter bit shift in event suppression structure
76*1a992bbaSAndre Przywara  * of packed ring.
77*1a992bbaSAndre Przywara  */
78*1a992bbaSAndre Przywara #define VRING_PACKED_EVENT_F_WRAP_CTR	15
79*1a992bbaSAndre Przywara 
80*1a992bbaSAndre Przywara /* We support indirect buffer descriptors */
81*1a992bbaSAndre Przywara #define VIRTIO_RING_F_INDIRECT_DESC	28
82*1a992bbaSAndre Przywara 
83*1a992bbaSAndre Przywara /* The Guest publishes the used index for which it expects an interrupt
84*1a992bbaSAndre Przywara  * at the end of the avail ring. Host should ignore the avail->flags field. */
85*1a992bbaSAndre Przywara /* The Host publishes the avail index for which it expects a kick
86*1a992bbaSAndre Przywara  * at the end of the used ring. Guest should ignore the used->flags field. */
87*1a992bbaSAndre Przywara #define VIRTIO_RING_F_EVENT_IDX		29
88*1a992bbaSAndre Przywara 
89*1a992bbaSAndre Przywara /* Alignment requirements for vring elements.
90*1a992bbaSAndre Przywara  * When using pre-virtio 1.0 layout, these fall out naturally.
91*1a992bbaSAndre Przywara  */
92*1a992bbaSAndre Przywara #define VRING_AVAIL_ALIGN_SIZE 2
93*1a992bbaSAndre Przywara #define VRING_USED_ALIGN_SIZE 4
94*1a992bbaSAndre Przywara #define VRING_DESC_ALIGN_SIZE 16
95*1a992bbaSAndre Przywara 
96*1a992bbaSAndre Przywara /* Virtio ring descriptors: 16 bytes.  These can chain together via "next". */
97*1a992bbaSAndre Przywara struct vring_desc {
98*1a992bbaSAndre Przywara 	/* Address (guest-physical). */
99*1a992bbaSAndre Przywara 	__virtio64 addr;
100*1a992bbaSAndre Przywara 	/* Length. */
101*1a992bbaSAndre Przywara 	__virtio32 len;
102*1a992bbaSAndre Przywara 	/* The flags as indicated above. */
103*1a992bbaSAndre Przywara 	__virtio16 flags;
104*1a992bbaSAndre Przywara 	/* We chain unused descriptors via this, too */
105*1a992bbaSAndre Przywara 	__virtio16 next;
106*1a992bbaSAndre Przywara };
107*1a992bbaSAndre Przywara 
108*1a992bbaSAndre Przywara struct vring_avail {
109*1a992bbaSAndre Przywara 	__virtio16 flags;
110*1a992bbaSAndre Przywara 	__virtio16 idx;
111*1a992bbaSAndre Przywara 	__virtio16 ring[];
112*1a992bbaSAndre Przywara };
113*1a992bbaSAndre Przywara 
114*1a992bbaSAndre Przywara /* u32 is used here for ids for padding reasons. */
115*1a992bbaSAndre Przywara struct vring_used_elem {
116*1a992bbaSAndre Przywara 	/* Index of start of used descriptor chain. */
117*1a992bbaSAndre Przywara 	__virtio32 id;
118*1a992bbaSAndre Przywara 	/* Total length of the descriptor chain which was used (written to) */
119*1a992bbaSAndre Przywara 	__virtio32 len;
120*1a992bbaSAndre Przywara };
121*1a992bbaSAndre Przywara 
122*1a992bbaSAndre Przywara typedef struct vring_used_elem __attribute__((aligned(VRING_USED_ALIGN_SIZE)))
123*1a992bbaSAndre Przywara 	vring_used_elem_t;
124*1a992bbaSAndre Przywara 
125*1a992bbaSAndre Przywara struct vring_used {
126*1a992bbaSAndre Przywara 	__virtio16 flags;
127*1a992bbaSAndre Przywara 	__virtio16 idx;
128*1a992bbaSAndre Przywara 	vring_used_elem_t ring[];
129*1a992bbaSAndre Przywara };
130*1a992bbaSAndre Przywara 
131*1a992bbaSAndre Przywara /*
132*1a992bbaSAndre Przywara  * The ring element addresses are passed between components with different
133*1a992bbaSAndre Przywara  * alignments assumptions. Thus, we might need to decrease the compiler-selected
134*1a992bbaSAndre Przywara  * alignment, and so must use a typedef to make sure the aligned attribute
135*1a992bbaSAndre Przywara  * actually takes hold:
136*1a992bbaSAndre Przywara  *
137*1a992bbaSAndre Przywara  * https://gcc.gnu.org/onlinedocs//gcc/Common-Type-Attributes.html#Common-Type-Attributes
138*1a992bbaSAndre Przywara  *
139*1a992bbaSAndre Przywara  * When used on a struct, or struct member, the aligned attribute can only
140*1a992bbaSAndre Przywara  * increase the alignment; in order to decrease it, the packed attribute must
141*1a992bbaSAndre Przywara  * be specified as well. When used as part of a typedef, the aligned attribute
142*1a992bbaSAndre Przywara  * can both increase and decrease alignment, and specifying the packed
143*1a992bbaSAndre Przywara  * attribute generates a warning.
144*1a992bbaSAndre Przywara  */
145*1a992bbaSAndre Przywara typedef struct vring_desc __attribute__((aligned(VRING_DESC_ALIGN_SIZE)))
146*1a992bbaSAndre Przywara 	vring_desc_t;
147*1a992bbaSAndre Przywara typedef struct vring_avail __attribute__((aligned(VRING_AVAIL_ALIGN_SIZE)))
148*1a992bbaSAndre Przywara 	vring_avail_t;
149*1a992bbaSAndre Przywara typedef struct vring_used __attribute__((aligned(VRING_USED_ALIGN_SIZE)))
150*1a992bbaSAndre Przywara 	vring_used_t;
151*1a992bbaSAndre Przywara 
152*1a992bbaSAndre Przywara struct vring {
153*1a992bbaSAndre Przywara 	unsigned int num;
154*1a992bbaSAndre Przywara 
155*1a992bbaSAndre Przywara 	vring_desc_t *desc;
156*1a992bbaSAndre Przywara 
157*1a992bbaSAndre Przywara 	vring_avail_t *avail;
158*1a992bbaSAndre Przywara 
159*1a992bbaSAndre Przywara 	vring_used_t *used;
160*1a992bbaSAndre Przywara };
161*1a992bbaSAndre Przywara 
162*1a992bbaSAndre Przywara #ifndef VIRTIO_RING_NO_LEGACY
163*1a992bbaSAndre Przywara 
164*1a992bbaSAndre Przywara /* The standard layout for the ring is a continuous chunk of memory which looks
165*1a992bbaSAndre Przywara  * like this.  We assume num is a power of 2.
166*1a992bbaSAndre Przywara  *
167*1a992bbaSAndre Przywara  * struct vring
168*1a992bbaSAndre Przywara  * {
169*1a992bbaSAndre Przywara  *	// The actual descriptors (16 bytes each)
170*1a992bbaSAndre Przywara  *	struct vring_desc desc[num];
171*1a992bbaSAndre Przywara  *
172*1a992bbaSAndre Przywara  *	// A ring of available descriptor heads with free-running index.
173*1a992bbaSAndre Przywara  *	__virtio16 avail_flags;
174*1a992bbaSAndre Przywara  *	__virtio16 avail_idx;
175*1a992bbaSAndre Przywara  *	__virtio16 available[num];
176*1a992bbaSAndre Przywara  *	__virtio16 used_event_idx;
177*1a992bbaSAndre Przywara  *
178*1a992bbaSAndre Przywara  *	// Padding to the next align boundary.
179*1a992bbaSAndre Przywara  *	char pad[];
180*1a992bbaSAndre Przywara  *
181*1a992bbaSAndre Przywara  *	// A ring of used descriptor heads with free-running index.
182*1a992bbaSAndre Przywara  *	__virtio16 used_flags;
183*1a992bbaSAndre Przywara  *	__virtio16 used_idx;
184*1a992bbaSAndre Przywara  *	struct vring_used_elem used[num];
185*1a992bbaSAndre Przywara  *	__virtio16 avail_event_idx;
186*1a992bbaSAndre Przywara  * };
187*1a992bbaSAndre Przywara  */
188*1a992bbaSAndre Przywara /* We publish the used event index at the end of the available ring, and vice
189*1a992bbaSAndre Przywara  * versa. They are at the end for backwards compatibility. */
190*1a992bbaSAndre Przywara #define vring_used_event(vr) ((vr)->avail->ring[(vr)->num])
191*1a992bbaSAndre Przywara #define vring_avail_event(vr) (*(__virtio16 *)&(vr)->used->ring[(vr)->num])
192*1a992bbaSAndre Przywara 
193*1a992bbaSAndre Przywara static inline void vring_init(struct vring *vr, unsigned int num, void *p,
194*1a992bbaSAndre Przywara 			      unsigned long align)
195*1a992bbaSAndre Przywara {
196*1a992bbaSAndre Przywara 	vr->num = num;
197*1a992bbaSAndre Przywara 	vr->desc = p;
198*1a992bbaSAndre Przywara 	vr->avail = (struct vring_avail *)((char *)p + num * sizeof(struct vring_desc));
199*1a992bbaSAndre Przywara 	vr->used = (void *)(((uintptr_t)&vr->avail->ring[num] + sizeof(__virtio16)
200*1a992bbaSAndre Przywara 		+ align-1) & ~(align - 1));
201*1a992bbaSAndre Przywara }
202*1a992bbaSAndre Przywara 
203*1a992bbaSAndre Przywara static inline unsigned vring_size(unsigned int num, unsigned long align)
204*1a992bbaSAndre Przywara {
205*1a992bbaSAndre Przywara 	return ((sizeof(struct vring_desc) * num + sizeof(__virtio16) * (3 + num)
206*1a992bbaSAndre Przywara 		 + align - 1) & ~(align - 1))
207*1a992bbaSAndre Przywara 		+ sizeof(__virtio16) * 3 + sizeof(struct vring_used_elem) * num;
208*1a992bbaSAndre Przywara }
209*1a992bbaSAndre Przywara 
210*1a992bbaSAndre Przywara #endif /* VIRTIO_RING_NO_LEGACY */
211*1a992bbaSAndre Przywara 
212*1a992bbaSAndre Przywara /* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */
213*1a992bbaSAndre Przywara /* Assuming a given event_idx value from the other side, if
214*1a992bbaSAndre Przywara  * we have just incremented index from old to new_idx,
215*1a992bbaSAndre Przywara  * should we trigger an event? */
216*1a992bbaSAndre Przywara static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old)
217*1a992bbaSAndre Przywara {
218*1a992bbaSAndre Przywara 	/* Note: Xen has similar logic for notification hold-off
219*1a992bbaSAndre Przywara 	 * in include/xen/interface/io/ring.h with req_event and req_prod
220*1a992bbaSAndre Przywara 	 * corresponding to event_idx + 1 and new_idx respectively.
221*1a992bbaSAndre Przywara 	 * Note also that req_event and req_prod in Xen start at 1,
222*1a992bbaSAndre Przywara 	 * event indexes in virtio start at 0. */
223*1a992bbaSAndre Przywara 	return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old);
224*1a992bbaSAndre Przywara }
225*1a992bbaSAndre Przywara 
226*1a992bbaSAndre Przywara struct vring_packed_desc_event {
227*1a992bbaSAndre Przywara 	/* Descriptor Ring Change Event Offset/Wrap Counter. */
228*1a992bbaSAndre Przywara 	__le16 off_wrap;
229*1a992bbaSAndre Przywara 	/* Descriptor Ring Change Event Flags. */
230*1a992bbaSAndre Przywara 	__le16 flags;
231*1a992bbaSAndre Przywara };
232*1a992bbaSAndre Przywara 
233*1a992bbaSAndre Przywara struct vring_packed_desc {
234*1a992bbaSAndre Przywara 	/* Buffer Address. */
235*1a992bbaSAndre Przywara 	__le64 addr;
236*1a992bbaSAndre Przywara 	/* Buffer Length. */
237*1a992bbaSAndre Przywara 	__le32 len;
238*1a992bbaSAndre Przywara 	/* Buffer ID. */
239*1a992bbaSAndre Przywara 	__le16 id;
240*1a992bbaSAndre Przywara 	/* The flags depending on descriptor type. */
241*1a992bbaSAndre Przywara 	__le16 flags;
242*1a992bbaSAndre Przywara };
243*1a992bbaSAndre Przywara 
244*1a992bbaSAndre Przywara #endif /* _UAPI_LINUX_VIRTIO_RING_H */
245