[LTP] [PATCH 1/3] io_uring: Redesign common helpers to follow liburing API conventions

Martin Doucha mdoucha@suse.cz
Wed Jun 10 18:08:19 CEST 2026


Hi,
as the AI review said, this patch should be split into two parts:
1) add the new helpers first
2) remove old helpers after all tests have been ported to the new ones

The other two patches updating tests look good. Two notes below.

On 6/8/26 16:36, Sebastian Chlad wrote:
> Replace coupled submit and wait helpers with a proper liburing-style API:
> - io_uring_get_sqe(): get next SQE slot from the submission ring
> - io_uring_prep_rw(): generic SQE fill for read/write perations
> - io_uring_prep_read/write/readv/writev(): operation-specific prep helpers
> - io_uring_sqe_set_data64(): set user_data explicitly on the SQE
> - io_uring_submit(): submit all pending SQEs to the kernel
> - io_uring_wait_cqe(): wait for next CQE, return pointer without consuming it
> - io_uring_cqe_seen(): advance the CQ head to mark a CQE as consumed
> 
> The old io_uring_do_io_op() and io_uring_do_vec_io_op() helpers are
> removed. They coupled SQE submission with an immediate CQE wait, making
> tests effectively synchronous, and emitted tst_res(TPASS) from inside
> the helper, hiding where test results came from. They also set user_data
> to the opcode value, which is meaningless as a per-request tag and would
> cause spurious passes in any test with multiple in-flight SQEs of the
> same opcode.
> 
> With the new API, each test controls SQE preparation, user_data
> assignment, submission, CQE validation and result reporting explicitly
> in its own body, matching how real io_uring applications are written.
> 
> Signed-off-by: Sebastian Chlad <sebastian.chlad@suse.com>
> ---
>   .../syscalls/io_uring/io_uring_common.h       | 193 ++++++++----------
>   1 file changed, 85 insertions(+), 108 deletions(-)
> 
> diff --git a/testcases/kernel/syscalls/io_uring/io_uring_common.h b/testcases/kernel/syscalls/io_uring/io_uring_common.h
> index aa31339fb..18fdfc9b6 100644
> --- a/testcases/kernel/syscalls/io_uring/io_uring_common.h
> +++ b/testcases/kernel/syscalls/io_uring/io_uring_common.h
> @@ -3,6 +3,8 @@
>    * Copyright (C) 2026 IBM
>    * Author: Sachin Sant <sachinp@linux.ibm.com>
>    *
> + * Copyright (C) 2026 Sebastian Chlad <sebastian.chlad@suse.com>
> + *
>    * Common definitions and helper functions for io_uring tests
>    */
>   
> @@ -43,6 +45,7 @@ struct io_uring_submit {
>   	size_t sq_ptr_size;
>   	void *cq_ptr;
>   	size_t cq_ptr_size;
> +	unsigned int sq_pending;
>   };
>   
>   /*
> @@ -117,162 +120,136 @@ static inline void io_uring_cleanup_queue(struct io_uring_submit *s,
>   }
>   
>   /*
> - * Internal helper to submit a single SQE to the submission queue
> - * Used by both vectored and non-vectored I/O operations
> + * Get the next available SQE slot from the submission ring.
> + * The SQE is zeroed and tracked as pending until io_uring_submit() is called.
>    */
> -static inline void io_uring_submit_sqe_internal(struct io_uring_submit *s,
> -						int fd, int opcode,
> -						unsigned long addr,
> -						unsigned int len,
> -						off_t offset,
> -						unsigned long long user_data)
> +static inline struct io_uring_sqe *io_uring_get_sqe(struct io_uring_submit *s)
>   {
>   	struct io_sq_ring *sring = &s->sq_ring;
> -	unsigned int tail, index;
> -	struct io_uring_sqe *sqe;
> -
> -	tail = *sring->tail;
> -	index = tail & *sring->ring_mask;
> -	sqe = &s->sqes[index];
> +	unsigned int index = (*sring->tail + s->sq_pending) & *sring->ring_mask;
> +	struct io_uring_sqe *sqe = &s->sqes[index];
>   
>   	memset(sqe, 0, sizeof(*sqe));
> +	sring->array[index] = index;
> +	s->sq_pending++;
> +
> +	return sqe;
> +}
> +
> +/*
> + * Generic SQE fill for read/write family operations.
> + * Does not touch user_data - caller sets it via io_uring_sqe_set_data64().
> + */
> +static inline void io_uring_prep_rw(struct io_uring_sqe *sqe, int opcode,
> +				    int fd, const void *addr, unsigned int len,
> +				    off_t offset)
> +{
>   	sqe->opcode = opcode;
>   	sqe->fd = fd;
> -	sqe->addr = addr;
> +	sqe->addr = (unsigned long)addr;
>   	sqe->len = len;
>   	sqe->off = offset;
> -	sqe->user_data = user_data;
> +}
>   
> -	sring->array[index] = index;
> -	tail++;
> +static inline void io_uring_prep_read(struct io_uring_sqe *sqe, int fd,
> +				      void *buf, size_t len, off_t offset)
> +{
> +	io_uring_prep_rw(sqe, IORING_OP_READ, fd, buf, len, offset);
> +}
>   
> -	*sring->tail = tail;
> +static inline void io_uring_prep_write(struct io_uring_sqe *sqe, int fd,
> +				       const void *buf, size_t len, off_t offset)
> +{
> +	io_uring_prep_rw(sqe, IORING_OP_WRITE, fd, buf, len, offset);
>   }
>   
> -/*
> - * Submit a single SQE to the submission queue
> - * For basic read/write operations (non-vectored)
> - */
> -static inline void io_uring_submit_sqe(struct io_uring_submit *s, int fd,
> -				       int opcode, void *buf, size_t len,
> +static inline void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd,
> +				       struct iovec *iovs, int nr_vecs,
>   				       off_t offset)
>   {
> -	io_uring_submit_sqe_internal(s, fd, opcode, (unsigned long)buf,
> -				     len, offset, opcode);
> +	io_uring_prep_rw(sqe, IORING_OP_READV, fd, iovs, nr_vecs, offset);
> +}
> +
> +static inline void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd,
> +					struct iovec *iovs, int nr_vecs,
> +					off_t offset)
> +{
> +	io_uring_prep_rw(sqe, IORING_OP_WRITEV, fd, iovs, nr_vecs, offset);
>   }
>   
>   /*
> - * Submit a vectored SQE to the submission queue
> - * For readv/writev operations
> + * Set the user_data field of an SQE. user_data is returned verbatim in the
> + * corresponding CQE and must be unique per in-flight request to allow correct
> + * correlation of completions.
>    */
> -static inline void io_uring_submit_sqe_vec(struct io_uring_submit *s, int fd,
> -					   int opcode, struct iovec *iovs,
> -					   int nr_vecs, off_t offset)
> +static inline void io_uring_sqe_set_data64(struct io_uring_sqe *sqe,
> +					   uint64_t data)
>   {
> -	io_uring_submit_sqe_internal(s, fd, opcode, (unsigned long)iovs,
> -				     nr_vecs, offset, opcode);
> +	sqe->user_data = data;
>   }
>   
>   /*
> - * Map io_uring operation code to human-readable name
> + * Submit all pending SQEs to the kernel.
>    */
> -static inline const char *ioring_op_name(int op)
> +static inline void io_uring_submit(struct io_uring_submit *s)
>   {
> -	switch (op) {
> -	case IORING_OP_READV:
> -		return "IORING_OP_READV";
> -	case IORING_OP_WRITEV:
> -		return "IORING_OP_WRITEV";
> -	case IORING_OP_READ:
> -		return "IORING_OP_READ";
> -	case IORING_OP_WRITE:
> -		return "IORING_OP_WRITE";
> -	default:
> -		return "UNKNOWN";
> -	}
> +	unsigned int pending = s->sq_pending;
> +
> +	if (!pending)
> +		return;
> +
> +	*s->sq_ring.tail += pending;

I think that updating submission ring tail should be done using atomic 
operations, otherwise the kernel may see inconsistent value.

> +	s->sq_pending = 0;
> +
> +	if (io_uring_enter(s->ring_fd, pending, 0, 0, NULL) < 0)
> +		tst_brk(TBROK | TERRNO, "io_uring_enter() failed");
>   }
>   
>   /*
> - * Wait for and validate a completion queue entry
> - * Aborts test on failure using tst_brk()
> + * Wait for the next CQE and return a pointer to it.
> + * Does not advance the CQ head - call io_uring_cqe_seen() when done.
>    */
> -static inline void io_uring_wait_cqe(struct io_uring_submit *s,
> -				     int expected_res, int expected_opcode,
> -				     sigset_t *sig)
> +static inline struct io_uring_cqe *io_uring_wait_cqe(struct io_uring_submit *s,
> +						      sigset_t *sig)
>   {
>   	struct io_cq_ring *cring = &s->cq_ring;
> -	struct io_uring_cqe *cqe;
> -	unsigned int head;
>   	int ret;
>   
> -	ret = io_uring_enter(s->ring_fd, 1, 1, IORING_ENTER_GETEVENTS, sig);
> +	ret = io_uring_enter(s->ring_fd, 0, 1, IORING_ENTER_GETEVENTS, sig);
>   	if (ret < 0)
>   		tst_brk(TBROK | TERRNO, "io_uring_enter() failed");
>   
> -	head = *cring->head;
> -	if (head == *cring->tail)
> +	if (*cring->head == *cring->tail)

Reading the completion queue tail should also be done atomically. 
Imagine you submit multiple I/O requests and then wait for the first one 
to finish. The kernel may update the tail value while you're trying to 
read it and a non-atomic read may produce nonsensical value.

>   		tst_brk(TBROK, "No completion event received");
>   
> -	cqe = &cring->cqes[head & *cring->ring_mask];
> -
> -	if (cqe->user_data != (uint64_t)expected_opcode) {
> -		*cring->head = head + 1;
> -		tst_brk(TBROK, "Unexpected user_data: got %llu, expected %d",
> -			(unsigned long long)cqe->user_data, expected_opcode);
> -	}
> -
> -	if (cqe->res != expected_res) {
> -		*cring->head = head + 1;
> -		tst_brk(TBROK, "Operation failed: res=%d, expected=%d",
> -			cqe->res, expected_res);
> -	}
> -
> -	*cring->head = head + 1;
> +	return &cring->cqes[*cring->head & *cring->ring_mask];
>   }
>   
>   /*
> - * Initialize buffer with a repeating character pattern
> - * Useful for creating test data with predictable patterns
> + * Mark the current CQE as consumed, advancing the CQ head.
>    */
> -static inline void io_uring_init_buffer_pattern(char *buf, size_t size,
> -						char pattern)
> +static inline void io_uring_cqe_seen(struct io_uring_submit *s)
>   {
> -	size_t i;
> -
> -	for (i = 0; i < size; i++)
> -		buf[i] = pattern;
> +	(*s->cq_ring.head)++;
>   }
>   
>   /*
> - * Submit and wait for a non-vectored I/O operation
> - * Combines io_uring_submit_sqe() and io_uring_wait_cqe() with result reporting
> - */
> -static inline void io_uring_do_io_op(struct io_uring_submit *s, int fd,
> -				     int op, void *buf, size_t len,
> -				     off_t offset, sigset_t *sig)
> -{
> -	io_uring_submit_sqe(s, fd, op, buf, len, offset);
> -	io_uring_wait_cqe(s, len, op, sig);
> -	tst_res(TPASS, "OP=%s (%02x) fd=%i buf=%p len=%zu offset=%jd",
> -		ioring_op_name(op), op, fd, buf, len, (intmax_t)offset);
> -}
> -
> -/*
> - * Submit and wait for a vectored I/O operation
> - * Combines io_uring_submit_sqe_vec() and io_uring_wait_cqe() with
> - * result reporting
> + * Map io_uring operation code to human-readable name
>    */
> -static inline void io_uring_do_vec_io_op(struct io_uring_submit *s, int fd,
> -					 int op, struct iovec *iovs,
> -					 int nvecs, off_t offset,
> -					 int expected_size, sigset_t *sig)
> +static inline const char *ioring_op_name(int op)
>   {
> -	io_uring_submit_sqe_vec(s, fd, op, iovs, nvecs, offset);
> -	io_uring_wait_cqe(s, expected_size, op, sig);
> -	tst_res(TPASS, "OP=%s (%02x) fd=%i iovs=%p nvecs=%i offset=%jd "
> -		"expected_size=%i",
> -		ioring_op_name(op), op, fd, iovs, nvecs, (intmax_t)offset,
> -		expected_size);
> +	switch (op) {
> +	case IORING_OP_READV:
> +		return "IORING_OP_READV";
> +	case IORING_OP_WRITEV:
> +		return "IORING_OP_WRITEV";
> +	case IORING_OP_READ:
> +		return "IORING_OP_READ";
> +	case IORING_OP_WRITE:
> +		return "IORING_OP_WRITE";
> +	default:
> +		return "UNKNOWN";
> +	}
>   }
>   
>   #endif /* IO_URING_COMMON_H */


-- 
Martin Doucha   mdoucha@suse.cz
SW Quality Engineer
SUSE LINUX, s.r.o.
CORSO IIa
Krizikova 148/34
186 00 Prague 8
Czech Republic


More information about the ltp mailing list