[LTP] [PATCH] Add test for possible writev() issues with NULL buffer in iovec

Richard Palethorpe rpalethorpe@suse.de
Fri Feb 19 15:35:09 CET 2021


Hello,

Martin Doucha <mdoucha@suse.cz> writes:

> Fixes #790
>
> Signed-off-by: Martin Doucha <mdoucha@suse.cz>
> ---
>
> This test triggers temporary write of invalid data into test file on some
> file systems on kernel 4.4.21 and older.
>
>  runtest/syscalls                            |   1 +
>  testcases/kernel/syscalls/writev/.gitignore |   1 +
>  testcases/kernel/syscalls/writev/Makefile   |   3 +
>  testcases/kernel/syscalls/writev/writev03.c | 142 ++++++++++++++++++++
>  4 files changed, 147 insertions(+)
>  create mode 100644 testcases/kernel/syscalls/writev/writev03.c
>
> diff --git a/runtest/syscalls b/runtest/syscalls
> index ae47a6d5e..f01d94540 100644
> --- a/runtest/syscalls
> +++ b/runtest/syscalls
> @@ -1675,6 +1675,7 @@ write05 write05
>  
>  writev01 writev01
>  writev02 writev02
> +writev03 writev03
>  writev05 writev05
>  writev06 writev06
>  writev07 writev07
> diff --git a/testcases/kernel/syscalls/writev/.gitignore b/testcases/kernel/syscalls/writev/.gitignore
> index d60da0f43..167779736 100644
> --- a/testcases/kernel/syscalls/writev/.gitignore
> +++ b/testcases/kernel/syscalls/writev/.gitignore
> @@ -1,5 +1,6 @@
>  /writev01
>  /writev02
> +/writev03
>  /writev05
>  /writev06
>  /writev07
> diff --git a/testcases/kernel/syscalls/writev/Makefile b/testcases/kernel/syscalls/writev/Makefile
> index 4844a6910..6627abaed 100644
> --- a/testcases/kernel/syscalls/writev/Makefile
> +++ b/testcases/kernel/syscalls/writev/Makefile
> @@ -9,4 +9,7 @@ endif
>  
>  include $(top_srcdir)/include/mk/testcases.mk
>  
> +writev03: CFLAGS += -pthread
> +writev03: LDLIBS += -lrt
> +
>  include $(top_srcdir)/include/mk/generic_leaf_target.mk
> diff --git a/testcases/kernel/syscalls/writev/writev03.c b/testcases/kernel/syscalls/writev/writev03.c
> new file mode 100644
> index 000000000..f48efccf2
> --- /dev/null
> +++ b/testcases/kernel/syscalls/writev/writev03.c
> @@ -0,0 +1,142 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) 2021 SUSE LLC <mdoucha@suse.cz>
> + *
> + * Check for potential issues in writev() if the first iovec entry is NULL
> + * and the next one is not present in RAM. This can result in a brief window
> + * where writev() first writes uninitialized data into the file (possibly
> + * exposing internal kernel structures) and then overwrites it with the real
> + * iovec contents later. Bugs fixed in:
> + *
> + *  commit d4690f1e1cdabb4d61207b6787b1605a0dc0aeab
> + *  Author: Al Viro <viro@ZenIV.linux.org.uk>
> + *  Date:   Fri Sep 16 00:11:45 2016 +0100
> + *
> + *  fix iov_iter_fault_in_readable()
> + */
> +
> +#include <sys/uio.h>
> +#include "tst_test.h"
> +#include "tst_fuzzy_sync.h"
> +
> +#define CHUNK_SIZE 256
> +#define BUF_SIZE (2 * CHUNK_SIZE)
> +#define MNTPOINT "mntpoint"
> +#define TEMPFILE MNTPOINT "/test_file"
> +#define MAPFILE MNTPOINT "/map_file"
> +
> +static unsigned char buf[BUF_SIZE], *map_ptr;
> +static int mapfd = -1, writefd = -1;
> +static volatile ssize_t written;

written should be atomic type (see below)

> +static struct tst_fzsync_pair fzsync_pair;
> +struct iovec iov[5];
> +
> +static void setup(void)
> +{
> +	int i;
> +
> +	for (i = 0; i < BUF_SIZE; i++)
> +		buf[i] = i & 0xff;
> +
> +	mapfd = SAFE_OPEN(MAPFILE, O_CREAT|O_RDWR|O_TRUNC, 0644);
> +	SAFE_WRITE(1, mapfd, buf, BUF_SIZE);
> +
> +	tst_fzsync_pair_init(&fzsync_pair);
> +}
> +
> +static void *thread_run(void *arg)
> +{
> +	while (tst_fzsync_run_b(&fzsync_pair)) {
> +		writefd = SAFE_OPEN(TEMPFILE, O_CREAT|O_WRONLY|O_TRUNC, 0644);
> +		written = BUF_SIZE;
> +		tst_fzsync_wait_b(&fzsync_pair);
> +
> +		/*
> +		 * Do *NOT* preload the data using MAP_POPULATE or touching
> +		 * the mapped range. We're testing whether writev() handles
> +		 * fault-in correctly.
> +		 */
> +		map_ptr = SAFE_MMAP(NULL, BUF_SIZE, PROT_READ, MAP_SHARED,
> +			mapfd, 0);

Possibly, instead of recreating the mapping each loop you could call
madvise MADV_DONTNEED on the mapping. In which case it may also be best
to use MAP_PRIVATE as well. Of courese if it is already fast then this
does not matter.

> +		iov[1].iov_base = map_ptr;
> +		iov[1].iov_len = CHUNK_SIZE;
> +		iov[3].iov_base = map_ptr + CHUNK_SIZE;
> +		iov[3].iov_len = CHUNK_SIZE;
> +
> +		tst_fzsync_start_race_b(&fzsync_pair);
> +		written = writev(writefd, iov, ARRAY_SIZE(iov));

To be on the safe side we should write to written with the atomic
functions (see below).

> +		tst_fzsync_end_race_b(&fzsync_pair);
> +
> +		SAFE_MUNMAP(map_ptr, BUF_SIZE);
> +		map_ptr = NULL;
> +		SAFE_CLOSE(writefd);
> +	}
> +
> +	return arg;
> +}
> +
> +static void run(void)
> +{
> +	int fd, failed = 0;
> +	ssize_t total_read;
> +	unsigned char readbuf[BUF_SIZE];
> +
> +	tst_fzsync_pair_reset(&fzsync_pair, thread_run);
> +
> +	while (!failed && tst_fzsync_run_a(&fzsync_pair)) {
> +		tst_fzsync_wait_a(&fzsync_pair);
> +		fd = SAFE_OPEN(TEMPFILE, O_RDONLY);
> +		tst_fzsync_start_race_a(&fzsync_pair);
> +
> +		for (total_read = 0; total_read < written;) {

Also read from written with the tst_atomic functions. This is especially
important for weak memory models because written may not be synchronised
straight away. Then it could block on read().

There is also a small chance some architecture will update ssize_t
non-atomically so written is smaller than expected. This would lead to a
false positive.

I suppose an alternative would be to complete writing the data just using
an ordinary write() call or however you want.

> +			total_read += SAFE_READ(0, fd, readbuf + total_read,
> +				BUF_SIZE - total_read);
> +		}
> +
> +		tst_fzsync_end_race_a(&fzsync_pair);
> +		SAFE_CLOSE(fd);
> +
> +		if (total_read > BUF_SIZE)
> +			tst_brk(TBROK, "read() returned too much data");
> +
> +		if (total_read <= 0)
> +			continue;
> +
> +		if (memcmp(readbuf, buf, (size_t)total_read)) {
> +			tst_res(TFAIL, "writev() wrote invalid data");
> +			failed = 1;
> +			break;
> +		}
> +	}
> +
> +	if (!failed)
> +		tst_res(TPASS, "writev() handles page fault-in correctly");
> +}
> +
> +static void cleanup(void)
> +{
> +	if (map_ptr && map_ptr != MAP_FAILED)
> +		SAFE_MUNMAP(map_ptr, BUF_SIZE);
> +
> +	if (mapfd >= 0)
> +		SAFE_CLOSE(mapfd);
> +
> +	if (writefd >= 0)
> +		SAFE_CLOSE(writefd);
> +
> +	tst_fzsync_pair_cleanup(&fzsync_pair);
> +}
> +
> +static struct tst_test test = {
> +	.test_all = run,
> +	.needs_root = 1,
> +	.mount_device = 1,
> +	.mntpoint = MNTPOINT,
> +	.all_filesystems = 1,
> +	.setup = setup,
> +	.cleanup = cleanup,
> +	.tags = (const struct tst_tag[]) {
> +		{"linux-git", "d4690f1e1cda"},

I guess CVE is on the way?

> +		{}
> +	}
> +};
> -- 
> 2.30.0

Apart from the volatile variable looks good!

-- 
Thank you,
Richard.


More information about the ltp mailing list