1 /**
2  * io_uring system api definitions.
3  *
4  * See: https://github.com/torvalds/linux/blob/master/include/uapi/linux/io_uring.h
5  *
6  * Last changes from: 760618f7a8e3b63aa06266efb301719c374e29d4 (20200724)
7  */
8 module during.io_uring;
9 
10 version (linux):
11 
12 import core.sys.posix.poll;
13 import core.sys.posix.signal;
14 
15 @system nothrow @nogc:
16 
17 /**
18  * IO operation submission data structure (Submission queue entry).
19  *
20  * C API: `struct io_uring_sqe`
21  */
22 struct SubmissionEntry
23 {
24     Operation               opcode;         /// type of operation for this sqe
25     SubmissionEntryFlags    flags;          /// IOSQE_ flags
26     ushort                  ioprio;         /// ioprio for the request
27     int                     fd;             /// file descriptor to do IO on
28     union
29     {
30         ulong off;                          /// offset into file
31         ulong addr2;                        /// from Linux 5.5
32     }
33 
34     union
35     {
36         ulong addr;                         /// pointer to buffer or iovecs
37         ulong splice_off_in;
38     }
39     uint len;                               /// buffer size or number of iovecs
40 
41     union
42     {
43         ReadWriteFlags      rw_flags;
44         FsyncFlags          fsync_flags;
45         PollEvents          poll_events;        // changed in https://github.com/torvalds/linux/commit/5769a351b89cd4d82016f18fa5f6c4077403564d
46         uint                poll_events32;      /// from Linux 5.9 - word-reversed for BE
47         SyncFileRangeFlags  sync_range_flags;   /// from Linux 5.2
48         MsgFlags            msg_flags;          /// from Linux 5.3
49         TimeoutFlags        timeout_flags;      /// from Linux 5.4
50         AcceptFlags         accept_flags;       /// from Linux 5.5
51         uint                cancel_flags;       /// from Linux 5.5
52         uint                open_flags;         /// from Linux 5.6
53         uint                statx_flags;        /// from Linux 5.6
54         uint                fadvise_advice;     /// from Linux 5.6
55         uint                splice_flags;       /// from Linux 5.7
56     }
57 
58     ulong user_data;                        /// data to be passed back at completion time
59 
60     union
61     {
62         struct
63         {
64             union
65             {
66                 ushort buf_index;   /// index into fixed buffers, if used
67                 ushort buf_group;   /// for grouped buffer selection
68             }
69             ushort personality;     /// personality to use, if used
70             int splice_fd_in;
71         }
72 
73         ulong[3] __pad2;
74     }
75 
76     /// Resets entry fields
77     void clear() @safe nothrow @nogc
78     {
79         this = SubmissionEntry.init;
80     }
81 }
82 
83 enum ReadWriteFlags : int
84 {
85     NONE = 0,
86 
87     /// High priority read/write.  Allows block-based filesystems to
88     /// use polling of the device, which provides lower latency, but
89     /// may use additional resources.  (Currently, this feature is
90     /// usable only  on  a  file  descriptor opened using the
91     /// O_DIRECT flag.)
92     ///
93     /// (since Linux 4.6)
94     HIPRI = 0x00000001,
95 
96     /// Provide a per-write equivalent of the O_DSYNC open(2) flag.
97     /// This flag is meaningful only for pwritev2(), and its effect
98     /// applies only to the data range written by the system call.
99     ///
100     /// (since Linux 4.7)
101     DSYNC = 0x00000002,
102 
103     /// Provide a per-write equivalent of the O_SYNC open(2) flag.
104     /// This flag is meaningful only for pwritev2(), and its effect
105     /// applies only to the data range written by the system call.
106     ///
107     /// (since Linux 4.7)
108     SYNC = 0x00000004,
109 
110     /// Do not wait for data which is not immediately available.  If
111     /// this flag is specified, the preadv2() system call will
112     /// return instantly if it would have to read data from the
113     /// backing storage or wait for a lock.  If some data was
114     /// successfully read, it will return the number of bytes read.
115     /// If no bytes were read, it will return -1 and set errno to
116     /// EAGAIN.  Currently, this flag is meaningful only for
117     /// preadv2().
118     ///
119     /// (since Linux 4.14)
120     NOWAIT = 0x00000008,
121 
122     /// Provide a per-write equivalent of the O_APPEND open(2) flag.
123     /// This flag is meaningful only for pwritev2(), and its effect
124     /// applies only to the data range written by the system call.
125     /// The offset argument does not affect the write operation; the
126     /// data is always appended to the end of the file.  However, if
127     /// the offset argument is -1, the current file offset is
128     /// updated.
129     ///
130     /// (since Linux 4.16)
131     APPEND = 0x00000010
132 }
133 
134 enum FsyncFlags : uint
135 {
136     /// Normal file integrity sync
137     NORMAL      = 0,
138 
139     /**
140      * `fdatasync` semantics.
141      *
142      * See_Also: `fsync(2)` for details
143      */
144     DATASYNC    = (1 << 0)
145 }
146 
147 /** Possible poll event flags.
148  *  See: poll(2)
149  */
150 enum PollEvents : uint
151 {
152     /// There is data to read.
153     IN      = POLLIN,
154 
155     /** Writing is now possible, though a write larger that the available
156      *  space in a socket or pipe will still block (unless O_NONBLOCK is set).
157      */
158     OUT     = POLLOUT,
159 
160     /** There is some exceptional condition on the file descriptor.
161      *  Possibilities include:
162      *
163      *  *  There is out-of-band data on a TCP socket (see tcp(7)).
164      *  *  A pseudoterminal master in packet mode has seen a state
165      *      change on the slave (see ioctl_tty(2)).
166      *  *  A cgroup.events file has been modified (see cgroups(7)).
167      */
168     PRI     = POLLPRI,
169 
170     /** Error condition (only returned in revents; ignored in events).
171       * This bit is also set for a file descriptor referring to the
172       * write end of a pipe when the read end has been closed.
173      */
174     ERR     = POLLERR,
175 
176     /// Invalid request: fd not open (only returned in revents; ignored in events).
177     NVAL    = POLLNVAL,
178 
179     RDNORM  = POLLRDNORM, /// Equivalent to POLLIN.
180     RDBAND  = POLLRDBAND, /// Priority band data can be read (generally unused on Linux).
181     WRNORM  = POLLWRNORM, /// Equivalent to POLLOUT.
182     WRBAND  = POLLWRBAND, /// Priority data may be written.
183 
184     /** Hang up (only returned in revents; ignored in events).  Note
185      *  that when reading from a channel such as a pipe or a stream
186      *  socket, this event merely indicates that the peer closed its
187      *  end of the channel.  Subsequent reads from the channel will
188      *  return 0 (end of file) only after all outstanding data in the
189      *  channel has been consumed.
190      */
191     HUP     = POLLHUP,
192 
193     /** (since Linux 2.6.17)
194      * Stream socket peer closed connection, or shut down writing half of connection.
195      */
196     RDHUP   = 0x2000,
197 
198     /** (since Linux 4.5)
199      * Sets an exclusive wakeup mode for the epoll file descriptor that is being attached to the
200      * target file descriptor, fd. When a wakeup event occurs and multiple epoll file descriptors
201      * are attached to the same target file using EPOLLEXCLUSIVE, one or more of the epoll file
202      * descriptors will receive an event with epoll_wait(2).  The default in this scenario (when
203      * EPOLLEXCLUSIVE is not set) is for all epoll file descriptors to receive an event.
204      * EPOLLEXCLUSIVE is thus useful for avoiding thundering herd problems in certain scenarios.
205      */
206     EXCLUSIVE = 0x10000000,
207 }
208 
209 /**
210  * Flags for `sync_file_range(2)` operation.
211  *
212  * See_Also: `sync_file_range(2)` for details
213  */
214 enum SyncFileRangeFlags : uint
215 {
216     NOOP            = 0, /// no operation
217     /// Wait upon write-out of all pages in the specified range that have already been submitted to
218     /// the device driver for write-out before performing any write.
219     WAIT_BEFORE     = 1U << 0,
220 
221     /// Initiate write-out of all dirty pages in the specified range which are not presently
222     /// submitted write-out.  Note that even this may block if you attempt to write more than
223     /// request queue size.
224     WRITE           = 1U << 1,
225 
226     /// Wait upon write-out of all pages in the range after performing any write.
227     WAIT_AFTER      = 1U << 2,
228 
229     /// This is a write-for-data-integrity operation that will ensure that all pages in the
230     /// specified range which were dirty when sync_file_range() was called are committed to disk.
231     WRITE_AND_WAIT  = WAIT_BEFORE | WRITE | WAIT_AFTER
232 }
233 
234 /**
235  * Flags for `sendmsg(2)` and `recvmsg(2)` operations.
236  *
237  * See_Also: man pages for the operations.
238  */
239 enum MsgFlags : uint
240 {
241     /// No flags defined
242     NONE = 0,
243 
244     /// Sends out-of-band data on sockets that support this notion (e.g., of type `SOCK_STREAM`); the
245     /// underlying protocol must also support out-of-band data.
246     OOB = 0x01,
247 
248     /// This flag causes the receive operation to return data from the beginning of the receive
249     /// queue without removing that data from the queue. Thus, a subsequent receive call will return
250     /// the same data.
251     PEEK = 0x02,
252 
253     /// Don't use a gateway to send out the packet, send to hosts only on directly connected
254     /// networks. This is usually used only by diagnostic or routing programs. This is defined only
255     /// for protocol families that route; packet sockets don't.
256     DONTROUTE = 0x04,
257 
258     /// For raw (`AF_PACKET`), Internet datagram (since Linux 2.4.27/2.6.8), netlink (since Linux
259     /// 2.6.22), and UNIX datagram (since Linux 3.4) sockets: return the real length of the packet
260     /// or datagram, even when it was longer than the passed buffer.
261     ///
262     /// For use with Internet stream sockets, see `tcp(7)`.
263     TRUNC = 0x20,
264 
265     /// Enables nonblocking operation; if the operation would block, EAGAIN or EWOULDBLOCK is
266     /// returned. This provides similar behavior to setting the O_NONBLOCK flag (via the `fcntl(2)`
267     /// F_SETFL operation), but differs in that `MSG_DONTWAIT` is a per-call option, whereas
268     /// `O_NONBLOCK` is a setting on the open file description (see `open(2)`), which will affect
269     /// all threads in the calling process and as well as other processes that hold file descriptors
270     /// referring to the same open file description.
271     DONTWAIT = 0x40,
272 
273     /// Terminates a record (when this notion is supported, as for sockets of type `SOCK_SEQPACKET`).
274     EOR = 0x80,
275 
276     /// This flag requests that the operation block until the full request is satisfied. However,
277     /// the call may still return less data than requested if a signal is caught, an error or
278     /// disconnect occurs, or the next data to be received is of a different type than that
279     /// returned. This flag has no effect for datagram sockets.
280     WAITALL = 0x100,
281 
282     /// Tell the link layer that forward progress happened: you got a successful reply from the
283     /// other side. If the link layer doesn't get this it will regularly reprobe the neighbor (e.g.,
284     /// via a unicast ARP). Valid  only  on SOCK_DGRAM and SOCK_RAW sockets and currently
285     /// implemented only for IPv4 and IPv6. See arp(7) for details.
286     CONFIRM = 0x800,
287 
288     /// This flag specifies that queued errors should be received from the socket error queue. The
289     /// error is passed in an ancillary message with a type dependent on the protocol (for IPv4
290     /// `IP_RECVERR`). The user should supply a buffer of sufficient size. See `cmsg(3)` and `ip(7)`
291     /// for more information. The payload of the original packet that caused the error is passed as
292     /// normal data via msg_iovec. The original destination address of the datagram that caused the
293     /// error is supplied via `msg_name`.
294     ERRQUEUE = 0x2000,
295 
296     /// Don't generate a `SIGPIPE` signal if the peer on a stream-oriented socket has closed the
297     /// connection. The `EPIPE` error is still returned. This provides similar behavior to using
298     /// `sigaction(2)` to ignore `SIGPIPE`, but, whereas `MSG_NOSIGNAL` is a per-call feature,
299     /// ignoring `SIGPIPE` sets a process attribute that affects all threads in the process.
300     NOSIGNAL = 0x4000,
301 
302     /// The caller has more data to send. This flag is used with TCP sockets to obtain the same
303     /// effect as the `TCP_CORK` socket option (see `tcp(7)`), with the difference that this flag can be
304     /// set on a per-call basis.
305     ///
306     /// Since Linux 2.6, this flag is also supported for UDP sockets, and informs the kernel to
307     /// package all of the data sent in calls with this flag set into a single datagram which is
308     /// transmitted only when a call is performed that does not specify this flag.
309     ///
310     /// See_Also: the `UDP_CORK` socket option described in `udp(7)`
311     MORE = 0x8000,
312 
313     /// Set the close-on-exec flag for the file descriptor received via a UNIX domain file
314     /// descriptor using the `SCM_RIGHTS` operation (described in `unix(7)`). This flag is useful
315     /// for the same reasons as the `O_CLOEXEC` flag of `open(2)`. (recvmsg only)
316     CMSG_CLOEXEC = 0x40000000
317 }
318 
319 /** sqe->timeout_flags
320  */
321 enum TimeoutFlags : uint
322 {
323     REL = 0,        /// Relative time is the default
324     ABS = 1U << 0   /// Absolute time - `IORING_TIMEOUT_ABS` (from Linux 5.5)
325 }
326 
327 /**
328  * sqe->splice_flags
329  * extends splice(2) flags
330  */
331 enum SPLICE_F_FD_IN_FIXED = 1U << 31; /* the last bit of __u32 */
332 
333 /**
334  * Flags that can be used with the `accept4(2)` operation.
335  */
336 enum AcceptFlags : uint
337 {
338     /// Same as `accept()`
339     NONE = 0,
340 
341     /// Set the `O_NONBLOCK` file status flag on the new open file description. Using this flag saves
342     /// extra calls to `fcntl(2)` to achieve the same result.
343     NONBLOCK = 0x800, // octal 00004000
344 
345     /// Set the close-on-exec (`FD_CLOEXEC`) flag on the new file descriptor. See the description of
346     /// the `O_CLOEXEC` flag in `open(2)` for reasons why this may be useful.
347     CLOEXEC = 0x80000 // octal 02000000
348 }
349 
350 /**
351  * Describes the operation to be performed
352  *
353  * See_Also: `io_uring_enter(2)`
354  */
355 enum Operation : ubyte
356 {
357     // available from Linux 5.1
358     NOP = 0,                /// IORING_OP_NOP
359     READV = 1,              /// IORING_OP_READV
360     WRITEV = 2,             /// IORING_OP_WRITEV
361     FSYNC = 3,              /// IORING_OP_FSYNC
362     READ_FIXED = 4,         /// IORING_OP_READ_FIXED
363     WRITE_FIXED = 5,        /// IORING_OP_WRITE_FIXED
364     POLL_ADD = 6,           /// IORING_OP_POLL_ADD
365     POLL_REMOVE = 7,        /// IORING_OP_POLL_REMOVE
366 
367     // available from Linux 5.2
368     SYNC_FILE_RANGE = 8,    /// IORING_OP_SYNC_FILE_RANGE
369 
370     // available from Linux 5.3
371     SENDMSG = 9,            /// IORING_OP_SENDMSG
372     RECVMSG = 10,           /// IORING_OP_RECVMSG
373 
374     // available from Linux 5.4
375     TIMEOUT = 11,           /// IORING_OP_TIMEOUT
376 
377     // available from Linux 5.5
378     TIMEOUT_REMOVE = 12,    /// IORING_OP_TIMEOUT_REMOVE
379     ACCEPT = 13,            /// IORING_OP_ACCEPT
380     ASYNC_CANCEL = 14,      /// IORING_OP_ASYNC_CANCEL
381     LINK_TIMEOUT = 15,      /// IORING_OP_LINK_TIMEOUT
382     CONNECT = 16,           /// IORING_OP_CONNECT
383 
384     // available from Linux 5.6
385     FALLOCATE = 17,         /// IORING_OP_FALLOCATE
386     OPENAT = 18,            /// IORING_OP_OPENAT
387     CLOSE = 19,             /// IORING_OP_CLOSE
388     FILES_UPDATE = 20,      /// IORING_OP_FILES_UPDATE
389     STATX = 21,             /// IORING_OP_STATX
390     READ = 22,              /// IORING_OP_READ
391     WRITE = 23,             /// IORING_OP_WRITE
392     FADVISE = 24,           /// IORING_OP_FADVISE
393     MADVISE = 25,           /// IORING_OP_MADVISE
394     SEND = 26,              /// IORING_OP_SEND
395     RECV = 27,              /// IORING_OP_RECV
396     OPENAT2 = 28,           /// IORING_OP_OPENAT2
397     EPOLL_CTL = 29,         /// IORING_OP_EPOLL_CTL
398 
399     // available from Linux 5.7
400     SPLICE = 30,            /// IORING_OP_SPLICE
401     PROVIDE_BUFFERS = 31,   /// IORING_OP_PROVIDE_BUFFERS
402     REMOVE_BUFFERS = 32,    /// IORING_OP_REMOVE_BUFFERS
403 
404     // available from Linux 5.8
405     TEE = 33,               /// IORING_OP_TEE
406 }
407 
408 /// sqe->flags
409 enum SubmissionEntryFlags : ubyte
410 {
411     NONE        = 0,
412 
413     /// Use fixed fileset (`IOSQE_FIXED_FILE`)
414     ///
415     /// When this flag is specified, fd is an index into the files array registered with the
416     /// io_uring instance (see the `IORING_REGISTER_FILES` section of the io_uring_register(2) man
417     /// page).
418     FIXED_FILE  = 1U << 0,
419 
420     /**
421      * `IOSQE_IO_DRAIN`: issue after inflight IO
422      *
423      * If a request is marked with `IO_DRAIN`, then previous commands must complete before this one
424      * is issued. Subsequent requests are not started until the drain has completed.
425      *
426      * Note: available from Linux 5.2
427      */
428     IO_DRAIN    = 1U << 1,
429 
430     /**
431      * `IOSQE_IO_LINK`
432      *
433      * If set, the next SQE in the ring will depend on this SQE. A dependent SQE will not be started
434      * until the parent SQE has completed. If the parent SQE fails, then a dependent SQE will be
435      * failed without being started. Link chains can be arbitrarily long, the chain spans any new
436      * SQE that continues tohave the IOSQE_IO_LINK flag set. Once an SQE is encountered that does
437      * not have this flag set, that defines the end of the chain. This features allows to form
438      * dependencies between individual SQEs.
439      *
440      * Note: available from Linux 5.3
441      */
442     IO_LINK     = 1U << 2,
443 
444     /**
445      * `IOSQE_IO_HARDLINK` - like LINK, but stronger
446      *
447      * Some commands will invariably end in a failure in the sense that the
448      * completion result will be less than zero. One such example is timeouts
449      * that don't have a completion count set, they will always complete with
450      * `-ETIME` unless cancelled.
451      *
452      * For linked commands, we sever links and fail the rest of the chain if
453      * the result is less than zero. Since we have commands where we know that
454      * will happen, add IOSQE_IO_HARDLINK as a stronger link that doesn't sever
455      * regardless of the completion result. Note that the link will still sever
456      * if we fail submitting the parent request, hard links are only resilient
457      * in the presence of completion results for requests that did submit
458      * correctly.
459      *
460      * Note: available from Linux 5.5
461      */
462     IO_HARDLINK = 1U << 3,
463 
464     /**
465      * `IOSQE_ASYNC`
466      *
467      * io_uring defaults to always doing inline submissions, if at all possible. But for larger
468      * copies, even if the data is fully cached, that can take a long time. Add an IOSQE_ASYNC flag
469      * that the application can set on the SQE - if set, it'll ensure that we always go async for
470      * those kinds of requests.
471      *
472      * Note: available from Linux 5.6
473      */
474     ASYNC       = 1U << 4,    /* always go async */
475 
476     /**
477      * `IOSQE_BUFFER_SELECT`
478      * If a server process has tons of pending socket connections, generally it uses epoll to wait
479      * for activity. When the socket is ready for reading (or writing), the task can select a buffer
480      * and issue a recv/send on the given fd.
481      *
482      * Now that we have fast (non-async thread) support, a task can have tons of pending reads or
483      * writes pending. But that means they need buffers to back that data, and if the number of
484      * connections is high enough, having them preallocated for all possible connections is
485      * unfeasible.
486      *
487      * With IORING_OP_PROVIDE_BUFFERS, an application can register buffers to use for any request.
488      * The request then sets IOSQE_BUFFER_SELECT in the sqe, and a given group ID in sqe->buf_group.
489      * When the fd becomes ready, a free buffer from the specified group is selected. If none are
490      * available, the request is terminated with -ENOBUFS. If successful, the CQE on completion will
491      * contain the buffer ID chosen in the cqe->flags member, encoded as:
492      *
493      * `(buffer_id << IORING_CQE_BUFFER_SHIFT) | IORING_CQE_F_BUFFER;`
494      *
495      * Once a buffer has been consumed by a request, it is no longer available and must be
496      * registered again with IORING_OP_PROVIDE_BUFFERS.
497      *
498      * Requests need to support this feature. For now, IORING_OP_READ and IORING_OP_RECV support it.
499      * This is checked on SQE submission, a CQE with res == -EOPNOTSUPP will be posted if attempted
500      * on unsupported requests.
501      *
502      * Note: available from Linux 5.7
503      */
504     BUFFER_SELECT = 1U << 5, /* select buffer from sqe->buf_group */
505 }
506 
507 /**
508  * IO completion data structure (Completion Queue Entry)
509  *
510  * C API: `struct io_uring_cqe`
511  */
512 struct CompletionEntry
513 {
514     ulong       user_data;  /** sqe->data submission passed back */
515     int         res;        /** result code for this event */
516     CQEFlags    flags;
517 }
518 
519 /// Flags used with `CompletionEntry`
520 enum CQEFlags : uint
521 {
522     NONE = 0, /// No flags set
523 
524     /// `IORING_CQE_F_BUFFER` If set, the upper 16 bits are the buffer ID
525     /// Note: available from Linux 5.7
526     BUFFER = 1U << 0
527 }
528 
529 /**
530  * Passed in for io_uring_setup(2). Copied back with updated info on success.
531  *
532  * C API: `struct io_uring_params`
533  */
534 struct SetupParameters
535 {
536     // Magic offsets for the application to mmap the data it needs
537 
538     /// `IORING_OFF_SQ_RING`: mmap offset for submission queue ring
539     enum ulong SUBMISSION_QUEUE_RING_OFFSET = 0UL;
540     /// `IORING_OFF_CQ_RING`: mmap offset for completion queue ring
541     enum ulong COMPLETION_QUEUE_RING_OFFSET = 0x8000000UL;
542     /// `IORING_OFF_SQES`: mmap offset for submission entries
543     enum ulong SUBMISSION_QUEUE_ENTRIES_OFFSET = 0x10000000UL;
544 
545     /// (output) allocated entries in submission queue
546     /// (both ring index `array` and separate entry array at `SUBMISSION_QUEUE_ENTRIES_OFFSET`).
547     uint                        sq_entries;
548 
549     /// (output) allocated entries in completion queue
550     uint                        cq_entries;
551 
552     SetupFlags                  flags;          /// (input)
553 
554     /// (input) used if SQ_AFF and SQPOLL flags are active to pin poll thread to specific cpu.
555     /// right now always checked in kernel for "possible cpu".
556     uint                        sq_thread_cpu;
557 
558     /// (input) used if SQPOLL flag is active; timeout in milliseconds
559     /// until kernel poll thread goes to sleep.
560     uint                        sq_thread_idle;
561     SetupFeatures               features;       /// (from Linux 5.4)
562     uint                        wq_fd;          /// (from Linux 5.6)
563     private uint[3]             resv;           // reserved
564     SubmissionQueueRingOffsets  sq_off;         /// (output) submission queue ring data field offsets
565     CompletionQueueRingOffsets  cq_off;         /// (output) completion queue ring data field offsets
566 }
567 
568 /// `io_uring_setup()` flags
569 enum SetupFlags : uint
570 {
571     /// No flags set
572     NONE    = 0,
573 
574     /**
575      * `IORING_SETUP_IOPOLL`
576      *
577      * Perform busy-waiting for an I/O completion, as opposed to getting notifications via an
578      * asynchronous IRQ (Interrupt Request).  The file system (if any) and block device must
579      * support polling in order for  this  to  work. Busy-waiting  provides  lower latency, but may
580      * consume more CPU resources than interrupt driven I/O.  Currently, this feature is usable
581      * only on a file descriptor opened using the O_DIRECT flag.  When a read or write is submitted
582      * to a polled context, the application must poll for completions on the CQ ring by calling
583      * io_uring_enter(2).  It is illegal to mix and match polled and non-polled I/O on an io_uring
584      * instance.
585      */
586     IOPOLL  = 1U << 0,
587 
588     /**
589      * `IORING_SETUP_SQPOLL`
590      *
591      * When this flag is specified, a kernel thread is created to perform submission queue polling.
592      * An io_uring instance configured in this way enables an application to issue I/O without ever
593      * context switching into the kernel.
594      * By using the submission queue to fill in new submission queue entries and watching for
595      * completions on the completion queue, the application can submit and reap I/Os without doing
596      * a single system call.
597      * If the kernel thread is idle for more than sq_thread_idle microseconds, it will set the
598      * IORING_SQ_NEED_WAKEUP bit in the flags field of the struct io_sq_ring. When this happens,
599      * the application must call io_uring_enter(2) to wake the kernel thread. If I/O is kept busy,
600      * the kernel thread will never sleep. An application making use of this feature will need to
601      * guard the io_uring_enter(2) call with  the  following  code sequence:
602      *
603      *     ````
604      *     // Ensure that the wakeup flag is read after the tail pointer has been written.
605      *     smp_mb();
606      *     if (*sq_ring->flags & IORING_SQ_NEED_WAKEUP)
607      *         io_uring_enter(fd, 0, 0, IORING_ENTER_SQ_WAKEUP);
608      *     ```
609      *
610      * where sq_ring is a submission queue ring setup using the struct io_sqring_offsets described below.
611      *
612      * To  successfully  use this feature, the application must register a set of files to be used for
613      * IO through io_uring_register(2) using the IORING_REGISTER_FILES opcode. Failure to do so will
614      * result in submitted IO being errored with EBADF.
615      */
616     SQPOLL  = 1U << 1,
617 
618     /**
619      * `IORING_SETUP_SQ_AFF`
620      *
621      *  If this flag is specified, then the poll thread will be bound to the cpu set in the
622      *  sq_thread_cpu field of the struct io_uring_params.  This flag is only meaningful when
623      *  IORING_SETUP_SQPOLL is specified.
624      */
625     SQ_AFF  = 1U << 2,
626 
627     /**
628      * `IORING_SETUP_CQSIZE`
629      *
630      * Create the completion queue with struct io_uring_params.cq_entries entries.  The value must
631      * be greater than entries, and may be rounded up to the next power-of-two.
632      *
633      * Note: Available from Linux 5.5
634      */
635     CQSIZE  = 1U << 3,
636 
637     /**
638      * `IORING_SETUP_CLAMP`
639      *
640      * Some applications like to start small in terms of ring size, and then ramp up as needed. This
641      * is a bit tricky to do currently, since we don't advertise the max ring size.
642      *
643      * This adds IORING_SETUP_CLAMP. If set, and the values for SQ or CQ ring size exceed what we
644      * support, then clamp them at the max values instead of returning -EINVAL. Since we return the
645      * chosen ring sizes after setup, no further changes are needed on the application side.
646      * io_uring already changes the ring sizes if the application doesn't ask for power-of-two
647      * sizes, for example.
648      *
649      * Note: Available from Linux 5.6
650      */
651     CLAMP   = 1U << 4, /* clamp SQ/CQ ring sizes */
652 
653     /**
654      * `IORING_SETUP_ATTACH_WQ`
655      *
656      * If IORING_SETUP_ATTACH_WQ is set, it expects wq_fd in io_uring_params to be a valid io_uring
657      * fd io-wq of which will be shared with the newly created io_uring instance. If the flag is set
658      * but it can't share io-wq, it fails.
659      *
660      * This allows creation of "sibling" io_urings, where we prefer to keep the SQ/CQ private, but
661      * want to share the async backend to minimize the amount of overhead associated with having
662      * multiple rings that belong to the same backend.
663      *
664      * Note: Available from Linux 5.6
665      */
666     ATTACH_WQ = 1U << 5, /* attach to existing wq */
667 }
668 
669 /// `io_uring_params->features` flags
670 enum SetupFeatures : uint
671 {
672     NONE            = 0,
673 
674     /**
675      * `IORING_FEAT_SINGLE_MMAP` (from Linux 5.4)
676      *
677      * Indicates that we can use single mmap feature to map both sq and cq rings and so to avoid the
678      * second mmap.
679      */
680     SINGLE_MMAP     = 1U << 0,
681 
682     /**
683      * `IORING_FEAT_NODROP` (from Linux 5.5)
684      *
685      * Currently we drop completion events, if the CQ ring is full. That's fine
686      * for requests with bounded completion times, but it may make it harder or
687      * impossible to use io_uring with networked IO where request completion
688      * times are generally unbounded. Or with POLL, for example, which is also
689      * unbounded.
690      *
691      * After this patch, we never overflow the ring, we simply store requests
692      * in a backlog for later flushing. This flushing is done automatically by
693      * the kernel. To prevent the backlog from growing indefinitely, if the
694      * backlog is non-empty, we apply back pressure on IO submissions. Any
695      * attempt to submit new IO with a non-empty backlog will get an -EBUSY
696      * return from the kernel. This is a signal to the application that it has
697      * backlogged CQ events, and that it must reap those before being allowed
698      * to submit more IO.
699      *
700      * Note that if we do return -EBUSY, we will have filled whatever
701      * backlogged events into the CQ ring first, if there's room. This means
702      * the application can safely reap events WITHOUT entering the kernel and
703      * waiting for them, they are already available in the CQ ring.
704      */
705     NODROP          = 1U << 1,
706 
707     /**
708      * `IORING_FEAT_SUBMIT_STABLE` (from Linux 5.5)
709      *
710      * If this flag is set, applications can be certain that any data for async offload has been
711      * consumed when the kernel has consumed the SQE.
712      */
713     SUBMIT_STABLE   = 1U << 2,
714 
715     /**
716      * `IORING_FEAT_RW_CUR_POS` (from Linux 5.6)
717      *
718      * If this flag is set, applications can know if setting `-1` as file offsets (meaning to work
719      * with current file position) is supported.
720      */
721     RW_CUR_POS = 1U << 3,
722 
723     /**
724      * `IORING_FEAT_CUR_PERSONALITY` (from Linux 5.6)
725      * We currently setup the io_wq with a static set of mm and creds. Even for a single-use io-wq
726      * per io_uring, this is suboptimal as we have may have multiple enters of the ring. For
727      * sharing the io-wq backend, it doesn't work at all.
728      *
729      * Switch to passing in the creds and mm when the work item is setup. This means that async
730      * work is no longer deferred to the io_uring mm and creds, it is done with the current mm and
731      * creds.
732      *
733      * Flag this behavior with IORING_FEAT_CUR_PERSONALITY, so applications know they can rely on
734      * the current personality (mm and creds) being the same for direct issue and async issue.
735      */
736     CUR_PERSONALITY = 1U << 4,
737 
738     /**
739      * `IORING_FEAT_FAST_POLL` (from Linux 5.7)
740      * Currently io_uring tries any request in a non-blocking manner, if it can, and then retries
741      * from a worker thread if we get -EAGAIN. Now that we have a new and fancy poll based retry
742      * backend, use that to retry requests if the file supports it.
743      *
744      * This means that, for example, an IORING_OP_RECVMSG on a socket no longer requires an async
745      * thread to complete the IO. If we get -EAGAIN reading from the socket in a non-blocking
746      * manner, we arm a poll handler for notification on when the socket becomes readable. When it
747      * does, the pending read is executed directly by the task again, through the io_uring task
748      * work handlers. Not only is this faster and more efficient, it also means we're not
749      * generating potentially tons of async threads that just sit and block, waiting for the IO to
750      * complete.
751      *
752      * The feature is marked with IORING_FEAT_FAST_POLL, meaning that async pollable IO is fast,
753      * and that poll<link>other_op is fast as well.
754      */
755     FAST_POLL = 1U << 5,
756 
757     /**
758      * `IORING_FEAT_POLL_32BITS` (from Linux 5.9)
759      * Poll events should be 32-bits to cover EPOLLEXCLUSIVE.
760      * Explicit word-swap the poll32_events for big endian to make sure the ABI is not changed.  We
761      * call this feature IORING_FEAT_POLL_32BITS, applications who want to use EPOLLEXCLUSIVE should
762      * check the feature bit first.
763      */
764     POLL_32BITS = 1U << 6
765 }
766 
767 /**
768  * Filled with the offset for mmap(2)
769  *
770  * C API: `struct io_sqring_offsets`
771  */
772 struct SubmissionQueueRingOffsets
773 {
774     /// Incremented by kernel after entry at `head` was processed.
775     /// Pending submissions: [head..tail]
776     uint head;
777 
778     /// Modified by user space when new entry was queued; points to next
779     /// entry user space is going to fill.
780     uint tail;
781 
782     /// value `value_at(self.ring_entries) - 1`
783     /// mask for indices at `head` and `tail` (don't delete masked bits!
784     /// `head` and `tail` can point to the same entry, but if they are
785     /// not exactly equal it implies the ring is full, and if they are
786     /// exactly equal the ring is empty.)
787     uint ring_mask;
788 
789     /// value same as SetupParameters.sq_entries, power of 2.
790     uint ring_entries;
791 
792     /// SubmissionQueueFlags
793     SubmissionQueueFlags flags;
794 
795     /// number of (invalid) entries that were dropped; entries are
796     /// invalid if their index (in `array`) is out of bounds.
797     uint dropped;
798 
799     /// index into array of `SubmissionEntry`s at offset `SUBMISSION_QUEUE_ENTRIES_OFFSET` in mmap()
800     uint array;
801 
802     private uint[3] resv; // reserved
803 }
804 
805 enum SubmissionQueueFlags: uint
806 {
807     NONE        = 0,
808 
809     /// `IORING_SQ_NEED_WAKEUP`: needs io_uring_enter wakeup
810     /// set by kernel poll thread when it goes sleeping, and reset on wakeup
811     NEED_WAKEUP = 1U << 0,
812 
813     /// `IORING_SQ_CQ_OVERFLOW`: CQ ring is overflown
814     /// Since Kernel 5.8
815     /// For those applications which are not willing to use io_uring_enter() to reap and handle
816     /// cqes, they may completely rely on liburing's io_uring_peek_cqe(), but if cq ring has
817     /// overflowed, currently because io_uring_peek_cqe() is not aware of this overflow, it won't
818     /// enter kernel to flush cqes.
819     /// To fix this issue, export cq overflow status to userspace by adding new
820     /// IORING_SQ_CQ_OVERFLOW flag, then helper functions() in liburing, such as io_uring_peek_cqe,
821     /// can be aware of this cq overflow and do flush accordingly.
822     CQ_OVERFLOW = 1U << 1
823 }
824 
825 /**
826  * Field offsets used to map kernel structure to our.
827  *
828  * C API: `struct io_cqring_offsets`
829  */
830 struct CompletionQueueRingOffsets
831 {
832     /// incremented by user space after entry at `head` was processed.
833     /// available entries for processing: [head..tail]
834     uint head;
835 
836     /// modified by kernel when new entry was created; points to next
837     /// entry kernel is going to fill.
838     uint tail;
839 
840     /// value `value_at(ring_entries) - 1`
841     /// mask for indices at `head` and `tail` (don't delete masked bits!
842     /// `head` and `tail` can point to the same entry, but if they are
843     /// not exactly equal it implies the ring is full, and if they are
844     /// exactly equal the ring is empty.)
845     uint ring_mask;
846 
847     /// value same as SetupParameters.cq_entries, power of 2.
848     uint ring_entries;
849 
850     /// incremented by the kernel every time it failed to queue a
851     /// completion event because the ring was full.
852     uint overflow;
853 
854     /// Offset to array of completion queue entries
855     uint cqes;
856 
857     CQRingFlags flags;             /// (available from Linux 5.8)
858     private uint _resv1;
859     private ulong _resv2;
860 }
861 
862 /// CompletionQueue ring flags
863 enum CQRingFlags : uint
864 {
865     NONE = 0, /// No flags set
866 
867     /// `IORING_CQ_EVENTFD_DISABLED` disable eventfd notifications (available from Linux 5.8)
868     /// This new flag should be set/clear from the application to disable/enable eventfd notifications when a request is completed and queued to the CQ ring.
869     ///
870     /// Before this patch, notifications were always sent if an eventfd is registered, so IORING_CQ_EVENTFD_DISABLED is not set during the initialization.
871     /// It will be up to the application to set the flag after initialization if no notifications are required at the beginning.
872     EVENTFD_DISABLED = 1U << 0,
873 }
874 
875 /// io_uring_register(2) opcodes and arguments
876 enum RegisterOpCode : uint
877 {
878     /**
879      * `arg` points to a struct iovec array of nr_args entries.  The buffers associated with the
880      * iovecs will be locked in memory and charged against the user's RLIMIT_MEMLOCK resource limit.
881      * See getrlimit(2) for more  informa‐ tion.   Additionally,  there  is a size limit of 1GiB per
882      * buffer.  Currently, the buffers must be anonymous, non-file-backed memory, such as that
883      * returned by malloc(3) or mmap(2) with the MAP_ANONYMOUS flag set.  It is expected that this
884      * limitation will be lifted in the future. Huge pages are supported as well. Note that the
885      * entire huge page will be pinned in the kernel, even if only a portion of it is used.
886      *
887      * After a successful call, the supplied buffers are mapped into the kernel and eligible for
888      * I/O.  To make use of them, the application must specify the IORING_OP_READ_FIXED or
889      * IORING_OP_WRITE_FIXED opcodes in the submis‐ sion  queue  entry (see the struct io_uring_sqe
890      * definition in io_uring_enter(2)), and set the buf_index field to the desired buffer index.
891      * The memory range described by the submission queue entry's addr and len fields must fall
892      * within the indexed buffer.
893      *
894      * It is perfectly valid to setup a large buffer and then only use part of it for an I/O, as
895      * long as the range is within the originally mapped region.
896      *
897      * An application can increase or decrease the size or number of registered buffers by first
898      * unregistering the existing buffers, and then issuing a new call to io_uring_register() with
899      * the new buffers.
900      *
901      * An application need not unregister buffers explicitly before shutting down the io_uring
902      * instance.
903      *
904      * `IORING_REGISTER_BUFFERS`
905      */
906     REGISTER_BUFFERS        = 0,
907 
908     /**
909      * This operation takes no argument, and `arg` must be passed as NULL. All previously registered
910      * buffers associated with the io_uring instance will be released.
911      *
912      * `IORING_UNREGISTER_BUFFERS`
913      */
914     UNREGISTER_BUFFERS      = 1,
915 
916     /**
917      * Register files for I/O. `arg` contains a pointer to an array of `nr_args` file descriptors
918      * (signed 32 bit integers).
919      *
920      * To make use of the registered files, the IOSQE_FIXED_FILE flag must be set in the flags
921      * member of the struct io_uring_sqe, and the fd member is set to the index of the file in the
922      * file descriptor array.
923      *
924      * Files are automatically unregistered when the io_uring instance is torn down. An application
925      * need only unregister if it wishes to register a new set of fds.
926      *
927      * `IORING_REGISTER_FILES`
928      */
929     REGISTER_FILES          = 2,
930 
931     /**
932      * This operation requires no argument, and `arg` must be passed as NULL.  All previously
933      * registered files associated with the io_uring instance will be unregistered.
934      *
935      * `IORING_UNREGISTER_FILES`
936      */
937     UNREGISTER_FILES        = 3,
938 
939     /**
940      * `IORING_REGISTER_EVENTFD`
941      *
942      * Registers eventfd that would be used to notify about completions on io_uring itself.
943      *
944      * Note: available from Linux 5.2
945      */
946     REGISTER_EVENTFD        = 4,
947 
948     /**
949      * `IORING_UNREGISTER_EVENTFD`
950      *
951      * Unregisters previously registered eventfd.
952      *
953      * Note: available from Linux 5.2
954      */
955     UNREGISTER_EVENTFD      = 5,
956 
957     /// `IORING_REGISTER_FILES_UPDATE` (from Linux 5.5)
958     REGISTER_FILES_UPDATE   = 6,
959 
960     /**
961      * `IORING_REGISTER_EVENTFD_ASYNC` (from Linux 5.6)
962      *
963      * If an application is using eventfd notifications with poll to know when new SQEs can be
964      * issued, it's expecting the following read/writes to complete inline. And with that, it knows
965      * that there are events available, and don't want spurious wakeups on the eventfd for those
966      * requests.
967      *
968      * This adds IORING_REGISTER_EVENTFD_ASYNC, which works just like IORING_REGISTER_EVENTFD,
969      * except it only triggers notifications for events that happen from async completions (IRQ, or
970      * io-wq worker completions). Any completions inline from the submission itself will not
971      * trigger notifications.
972      */
973     REGISTER_EVENTFD_ASYNC = 7,
974 
975     /**
976      * `IORING_REGISTER_PROBE` (from Linux 5.6)
977      *
978      * The application currently has no way of knowing if a given opcode is supported or not
979      * without having to try and issue one and see if we get -EINVAL or not. And even this approach
980      * is fraught with peril, as maybe we're getting -EINVAL due to some fields being missing, or
981      * maybe it's just not that easy to issue that particular command without doing some other leg
982      * work in terms of setup first.
983      *
984      * This adds IORING_REGISTER_PROBE, which fills in a structure with info on what it supported
985      * or not. This will work even with sparse opcode fields, which may happen in the future or
986      * even today if someone backports specific features to older kernels.
987      */
988     REGISTER_PROBE = 8,
989 
990     /**
991      * `IORING_REGISTER_PERSONALITY` (from Linux 5.6)
992      *
993      * If an application wants to use a ring with different kinds of credentials, it can register
994      * them upfront. We don't lookup credentials, the credentials of the task calling
995      * IORING_REGISTER_PERSONALITY is used.
996      *
997      * An 'id' is returned for the application to use in subsequent personality support.
998      */
999     REGISTER_PERSONALITY = 9,
1000 
1001     /// `IORING_UNREGISTER_PERSONALITY` (from Linux 5.6)
1002     UNREGISTER_PERSONALITY = 10,
1003 }
1004 
1005 /// io_uring_enter(2) flags
1006 enum EnterFlags: uint
1007 {
1008     NONE        = 0,
1009     GETEVENTS   = (1 << 0), /// `IORING_ENTER_GETEVENTS`
1010     SQ_WAKEUP   = (1 << 1), /// `IORING_ENTER_SQ_WAKEUP`
1011 }
1012 
1013 /// Time specification as defined in kernel headers (used by TIMEOUT operations)
1014 struct KernelTimespec
1015 {
1016     long tv_sec; /// seconds
1017     long tv_nsec; /// nanoseconds
1018 }
1019 
1020 static assert(CompletionEntry.sizeof == 16);
1021 static assert(CompletionQueueRingOffsets.sizeof == 40);
1022 static assert(SetupParameters.sizeof == 120);
1023 static assert(SubmissionEntry.sizeof == 64);
1024 static assert(SubmissionQueueRingOffsets.sizeof == 40);
1025 
1026 /// Indicating that OP is supported by the kernel
1027 enum IO_URING_OP_SUPPORTED = 1U << 0;
1028 
1029 struct io_uring_probe_op
1030 {
1031     ubyte op;
1032     ubyte resv;
1033     ushort flags; /* IO_URING_OP_* flags */
1034     uint resv2;
1035 }
1036 
1037 struct io_uring_probe
1038 {
1039     ubyte last_op; /* last opcode supported */
1040     ubyte ops_len; /* length of ops[] array below */
1041     ushort resv;
1042     uint[3] resv2;
1043     io_uring_probe_op[0] ops;
1044 }
1045 
1046 /**
1047  * Setup a context for performing asynchronous I/O.
1048  *
1049  * The `io_uring_setup()` system call sets up a submission queue (SQ) and completion queue (CQ) with
1050  * at least entries entries, and returns a file descriptor which can be used to perform subsequent
1051  * operations on the io_uring instance. The submission and completion queues are shared between
1052  * userspace and the kernel, which eliminates the need to copy data when initiating and completing
1053  * I/O.
1054  *
1055  * See_Also: `io_uring_setup(2)`
1056  *
1057  * Params:
1058  *   entries = Defines how many entries can submission queue hold.
1059  *   p = `SetupParameters`
1060  *
1061  * Returns:
1062  *     `io_uring_setup(2)` returns a new file descriptor on success. The application may then provide
1063  *     the file descriptor in a subsequent `mmap(2)` call to map the submission and completion queues,
1064  *     or to the `io_uring_register(2)` or `io_uring_enter(2)` system calls.
1065  *
1066  *     On error, -1 is returned and `errno` is set appropriately.
1067  */
1068 int io_uring_setup(uint entries, scope ref SetupParameters p) @trusted
1069 {
1070     pragma(inline);
1071     return syscall(SYS_io_uring_setup, entries, &p);
1072 }
1073 
1074 /**
1075  * Initiate and/or complete asynchronous I/O
1076  *
1077  * `io_uring_enter()` is used to initiate and complete I/O using the shared submission and
1078  * completion queues setup by a call to `io_uring_setup(2)`. A single call can both submit new I/O
1079  * and wait for completions of I/O initiated by this call or previous calls to `io_uring_enter()``.
1080  *
1081  * When the system call returns that a certain amount of SQEs have been consumed and submitted, it's
1082  * safe to reuse SQE entries in the ring. This is true even if the actual IO submission had to be
1083  * punted to async context, which means that the SQE may in fact not have been submitted yet. If the
1084  * kernel requires later use of a particular SQE entry, it will have made a private copy of it.
1085  *
1086  * Note: For interrupt driven I/O (where `IORING_SETUP_IOPOLL` was not specified in the call to
1087  *     `io_uring_setup(2)`), an application may check the completion queue for event completions without
1088  *     entering the kernel at all.
1089  *
1090  * See_Also: `io_uring_enter(2)`
1091  *
1092  * Params:
1093  *   fd = the file descriptor returned by io_uring_setup(2).
1094  *   to_submit = specifies the number of I/Os to submit from the submission queue.
1095  *   min_complete = If the `IORING_ENTER_GETEVENTS` bit is set in flags, then the system call will attempt
1096  *        to wait for `min_complete` event completions before returning. If the io_uring instance was configured
1097  *        for polling, by specifying IORING_SETUP_IOPOLL in the call to io_uring_setup(2), then
1098  *        min_complete has a slightly different meaning.  Passing a value of 0 instructs the kernel to
1099  *        return any events which are already complete, without blocking. If min_complete is a non-zero
1100  *        value, the kernel will still return immediately if  any completion  events are available.  If
1101  *        no event completions are available, then the call will poll either until one or more
1102  *        completions become available, or until the process has exceeded its scheduler time slice.
1103  *   flags = Behavior modification flags - `EnterFlags`
1104  *   sig = a pointer to a signal mask (see `sigprocmask(2)`); if sig is not `null`, `io_uring_enter()`
1105  *         first replaces the current signal mask by the one pointed to by sig, then waits for events to
1106  *         become available in the completion queue, and then restores the original signal mask. The
1107  *         following `io_uring_enter()` call:
1108  *
1109  *         ```
1110  *         ret = io_uring_enter(fd, 0, 1, IORING_ENTER_GETEVENTS, &sig);
1111  *         ```
1112  *
1113  *         is equivalent to atomically executing the following calls:
1114  *
1115  *         ```
1116  *         pthread_sigmask(SIG_SETMASK, &sig, &orig);
1117  *         ret = io_uring_enter(fd, 0, 1, IORING_ENTER_GETEVENTS, NULL);
1118  *         pthread_sigmask(SIG_SETMASK, &orig, NULL);
1119  *         ```
1120  *
1121  *         See the description of `pselect(2)` for an explanation of why the sig parameter is necessary.
1122  *
1123  * Returns:
1124  */
1125 int io_uring_enter(int fd, uint to_submit, uint min_complete, EnterFlags flags, const sigset_t* sig = null)
1126 {
1127     pragma(inline);
1128     return syscall(SYS_io_uring_enter, fd, to_submit, min_complete, flags, sig, sigset_t.sizeof);
1129 }
1130 
1131 /**
1132  * Register files or user buffers for asynchronous I/O.
1133  *
1134  * The `io_uring_register()` system call registers user buffers or files for use in an `io_uring(7)`
1135  * instance referenced by fd.  Registering files or user buffers allows the kernel to take long term
1136  * references to internal data structures or create long term mappings of application memory,
1137  * greatly reducing per-I/O overhead.
1138  *
1139  * See_Also: `io_uring_register(2)
1140  *
1141  * Params:
1142  *   fd = the file descriptor returned by a call to io_uring_setup(2)
1143  *   opcode = code of operation to execute on args
1144  *   arg = Args used by specified operation. See `RegisterOpCode` for usage details.
1145  *   nr_args = number of provided arguments
1146  *
1147  * Returns: On success, io_uring_register() returns 0.  On error, -1 is returned, and errno is set accordingly.
1148  */
1149 int io_uring_register(int fd, RegisterOpCode opcode, const(void)* arg, uint nr_args)
1150 {
1151     pragma(inline);
1152     return syscall(SYS_io_uring_register, fd, opcode, arg, nr_args);
1153 }
1154 
1155 private:
1156 
1157 // Syscalls
1158 enum
1159 {
1160     SYS_io_uring_setup       = 425,
1161     SYS_io_uring_enter       = 426,
1162     SYS_io_uring_register    = 427
1163 }
1164 
1165 extern (C):
1166 
1167 /// Invoke `system call' number `sysno`, passing it the remaining arguments.
1168 int syscall(int sysno, ...);