[LTP] [PATCH v4] fanotify22.c: handle multiple asynchronous error events
Jan Kara
jack@suse.cz
Wed Mar 25 16:52:47 CET 2026
On Wed 25-03-26 12:43:57, Wei Gao wrote:
> Since the introduction of the asynchronous fserror reporting framework
> (kernel commit 81d2e13a57c9), fanotify22 has encountered sporadic failures
> due to the non-deterministic nature of event delivery and merging:
>
> 1) tcase3 failure: A race condition occurs when the test reads the
> notification fd between two events. uses a poll() and read() loop to wait
> until the expected.
>
> 2) tcase4 failure: The kernel may deliver errors as independent events
> instead of a single merged event, since different worker kthread can
> end up generating each event so they won't be merged. As suggested by
> Jan Kara, this patch introduces a consolidate_events() helper. It iterates
> through the event buffer, accumulates the error_count from all independent
> events, and updates the first event's count in-place.
>
> Reported-by: kernel test robot <oliver.sang@intel.com>
> Closes: https://lore.kernel.org/oe-lkp/202602042124.87bd00e3-lkp@intel.com
> Suggested-by: Jan Kara <jack@suse.cz>
> Signed-off-by: Wei Gao <wegao@suse.com>
...
> +static size_t consolidate_events(char *buf, size_t len, const struct test_case *ex)
> +{
> + struct fanotify_event_metadata *metadata, *first = NULL;
> + struct fanotify_event_info_error *first_info = NULL;
> + unsigned int total_count = 0;
> + int event_num = 0;
> +
> + for (metadata = (struct fanotify_event_metadata *)buf;
> + FAN_EVENT_OK(metadata, len);
> + metadata = FAN_EVENT_NEXT(metadata, len)) {
> +
> + event_num++;
> + struct fanotify_event_info_error *info = get_event_info_error(metadata);
> +
> + if (!info) {
> + tst_res(TFAIL, "%s: Event [%d] missing error info",
> + ex->name, event_num);
> + continue;
> + }
> +
> + if (info->error != ex->error && (ex->error2 == 0 || info->error != ex->error2)) {
> + tst_res(TFAIL, "%s: Event [%d] unexpected errno (%d)",
> + ex->name, event_num, info->error);
Should we add 'continue' here similarly to the failure case above? So that
we skip over the event with invalid error code... Otherwise the test looks
correct to me.
Honza
> + }
> +
> + if (!first) {
> + first = metadata;
> + first_info = info;
> + }
> + total_count += info->error_count;
> +
> + tst_res(TINFO, "Event [%d]: errno=%d, error_count=%d",
> + event_num, info->error, info->error_count);
> + }
> +
> + if (first_info)
> + first_info->error_count = total_count;
> +
> + return (first) ? first->event_len : 0;
> +}
> +
> static int check_error_event_info_fid(struct fanotify_event_info_fid *fid,
> const struct test_case *ex)
> {
> @@ -248,19 +291,54 @@ static void check_event(char *buf, size_t len, const struct test_case *ex)
> static void do_test(unsigned int i)
> {
> const struct test_case *tcase = &testcases[i];
> - size_t read_len;
> + size_t read_len = 0;
> + struct pollfd pfd;
> + unsigned int accumulated_count = 0;
>
> SAFE_FANOTIFY_MARK(fd_notify, FAN_MARK_ADD|FAN_MARK_FILESYSTEM,
> FAN_FS_ERROR, AT_FDCWD, MOUNT_PATH);
>
> tcase->trigger_error();
>
> - read_len = SAFE_READ(0, fd_notify, event_buf, BUF_SIZE);
> + pfd.fd = fd_notify;
> + pfd.events = POLLIN;
> +
> + while (accumulated_count < tcase->error_count) {
> + if (poll(&pfd, 1, 5000) <= 0) {
> + tst_res(TFAIL, "%s: Timeout waiting for events", tcase->name);
> + goto out;
> + }
> +
> + char *current_pos = event_buf + read_len;
> + int ret = read(fd_notify, current_pos, BUF_SIZE - read_len);
> +
> + if (ret < 0) {
> + tst_res(TFAIL, "%s: read failed: %s", tcase->name, strerror(errno));
> + goto out;
> + }
> +
> + struct fanotify_event_metadata *m =
> + (struct fanotify_event_metadata *)current_pos;
> + while (FAN_EVENT_OK(m, ret)) {
> + struct fanotify_event_info_error *e = get_event_info_error(m);
> +
> + if (e)
> + accumulated_count += e->error_count;
> +
> + read_len += m->event_len;
> + m = FAN_EVENT_NEXT(m, ret);
> + }
> + }
> +
> + read_len = consolidate_events(event_buf, read_len, tcase);
> +
> + check_event(event_buf, read_len, tcase);
> +
> +out:
>
> SAFE_FANOTIFY_MARK(fd_notify, FAN_MARK_REMOVE|FAN_MARK_FILESYSTEM,
> FAN_FS_ERROR, AT_FDCWD, MOUNT_PATH);
>
> - check_event(event_buf, read_len, tcase);
> /* Unmount and mount the filesystem to get it out of the error state */
> SAFE_UMOUNT(MOUNT_PATH);
> SAFE_MOUNT(tst_device->dev, MOUNT_PATH, tst_device->fs_type, 0, NULL);
> --
> 2.52.0
>
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
More information about the ltp
mailing list