[LTP] [PATCH v2 1/5] Hugetlb: Migrating libhugetlbfs counters

Tarun Sahu tsahu@linux.ibm.com
Wed Nov 9 22:26:37 CET 2022


Hi,
Thanks for reviewing the patch.
On Nov 09 2022, Cyril Hrubis wrote:
> Hi!
> >  runtest/hugetlb                               |   1 +
> >  testcases/kernel/mem/.gitignore               |   1 +
> >  .../kernel/mem/hugetlb/hugemmap/hugemmap10.c  | 438 ++++++++++++++++++
> >  3 files changed, 440 insertions(+)
> >  create mode 100644 testcases/kernel/mem/hugetlb/hugemmap/hugemmap10.c
> > 
> > diff --git a/runtest/hugetlb b/runtest/hugetlb
> > index e2ada7a97..8a56d52a3 100644
> > --- a/runtest/hugetlb
> > +++ b/runtest/hugetlb
> > @@ -6,6 +6,7 @@ hugemmap06 hugemmap06
> >  hugemmap07 hugemmap07
> >  hugemmap08 hugemmap08
> >  hugemmap09 hugemmap09
> > +hugemmap10 hugemmap10
> >  hugemmap05_1 hugemmap05 -m
> >  hugemmap05_2 hugemmap05 -s
> >  hugemmap05_3 hugemmap05 -s -m
> > diff --git a/testcases/kernel/mem/.gitignore b/testcases/kernel/mem/.gitignore
> > index 1a242ffe0..e7def68cb 100644
> > --- a/testcases/kernel/mem/.gitignore
> > +++ b/testcases/kernel/mem/.gitignore
> > @@ -7,6 +7,7 @@
> >  /hugetlb/hugemmap/hugemmap07
> >  /hugetlb/hugemmap/hugemmap08
> >  /hugetlb/hugemmap/hugemmap09
> > +/hugetlb/hugemmap/hugemmap10
> >  /hugetlb/hugeshmat/hugeshmat01
> >  /hugetlb/hugeshmat/hugeshmat02
> >  /hugetlb/hugeshmat/hugeshmat03
> > diff --git a/testcases/kernel/mem/hugetlb/hugemmap/hugemmap10.c b/testcases/kernel/mem/hugetlb/hugemmap/hugemmap10.c
> > new file mode 100644
> > index 000000000..dacfee8ce
> > --- /dev/null
> > +++ b/testcases/kernel/mem/hugetlb/hugemmap/hugemmap10.c
> > @@ -0,0 +1,438 @@
> > +// SPDX-License-Identifier: LGPL-2.1-or-later
> > +/*
> > + * Copyright (C) 2005-2007 IBM Corporation.
> > + * Author: David Gibson & Adam Litke
> > + */
> > +
> > +/*\
> > + * [Description]
> > + *
> > + * Counters:
>       ^
> Again this wouldn't render nicely in asciidoc.
> 
OOps, Missed it, Will updated it.
> > + * This Test perform mmap and unmap and write operation on hugetlb file
> > + * based mapping. Mapping can be shared or private. and it checks for
> > + * Hugetlb counter (Total, Free, Reserve, Surplus) in /proc/meminfo and
> > + * compare them with expected (calculated) value. if all checks are
> > + * successful, the test passes.
> > + *
> > + */
> > +
> > +#define _GNU_SOURCE
> > +#include <unistd.h>
> > +#include <stdio.h>
> > +#include <sys/mount.h>
> > +#include <limits.h>
> > +#include <sys/param.h>
> > +#include <sys/types.h>
> > +#include <setjmp.h>
> > +
> > +#include "hugetlb.h"
> > +
> > +#define MNTPOINT "hugetlbfs/"
> > +
> > +static long hpage_size;
> > +static int private_resv;
> > +
> > +#define NR_SLOTS	2
> > +#define SL_SETUP	0
> > +#define SL_TEST		1
> > +static int map_fd[NR_SLOTS];
> > +static char *map_addr[NR_SLOTS];
> > +static unsigned long map_size[NR_SLOTS];
> > +static unsigned int touched[NR_SLOTS];
> > +
> > +static long prev_total;
> > +static long prev_free;
> > +static long prev_resv;
> > +static long prev_surp;
> > +static jmp_buf buf;
> > +
> > +static void read_meminfo_huge(long *total, long *free, long *resv, long *surp)
> > +{
> > +	*total = SAFE_READ_MEMINFO(MEMINFO_HPAGE_TOTAL);
> > +	*free = SAFE_READ_MEMINFO(MEMINFO_HPAGE_FREE);
> > +	*resv = SAFE_READ_MEMINFO(MEMINFO_HPAGE_RSVD);
> > +	*surp = SAFE_READ_MEMINFO(MEMINFO_HPAGE_SURP);
> > +}
> > +
> > +static int kernel_has_private_reservations(void)
> > +{
> > +	int fd;
> > +	long t, f, r, s;
> > +	long nt, nf, nr, ns;
> > +	void *map;
> > +
> > +	read_meminfo_huge(&t, &f, &r, &s);
> > +	fd = tst_creat_unlinked(MNTPOINT);
> > +
> > +	map = SAFE_MMAP(NULL, hpage_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
> > +
> > +	read_meminfo_huge(&nt, &nf, &nr, &ns);
> > +
> > +	SAFE_MUNMAP(map, hpage_size);
> > +	SAFE_CLOSE(fd);
> > +
> > +	/*
> > +	 * There are only three valid cases:
> > +	 * 1) If a surplus page was allocated to create a reservation, all
> > +	 *    four pool counters increment
> > +	 * 2) All counters remain the same except for Hugepages_Rsvd, then
> > +	 *    a reservation was created using an existing pool page.
> > +	 * 3) All counters remain the same, indicates that no reservation has
> > +	 *    been created
> > +	 */
> > +	if ((nt == t + 1) && (nf == f + 1) && (ns == s + 1) && (nr == r + 1))
> > +		return 1;
> > +	else if ((nt == t) && (nf == f) && (ns == s)) {
> > +		if (nr == r + 1)
> > +			return 1;
> > +		else if (nr == r)
> > +			return 0;
> > +	} else
> > +		tst_brk(TCONF, "bad counter state - "
> > +		      "T:%li F:%li R:%li S:%li -> T:%li F:%li R:%li S:%li\n",
>                                                                           ^
> 									  Do
> 									  not
> 									  print
> 									  newline
> > +			  t, f, r, s, nt, nf, nr, ns);
> 
> This is confusing for no good reason. We are doing return in the if ()
> blcoks, so there is no reason to use else at all. This would be much
> more readable as:
> 
> 
> 	if (...)
> 		reutrn 1;
> 
> 
> 	if (...) {
> 		if (...)
> 			reutrn 1;
> 		else
> 			return 0;
> 	}
> 
> 	tst_brk(TCONF, "...");
> 
> 
Yeah. Will update it.
> > +	return -1;
> > +}
> > +
> > +static void verify_counters(int line, long et, long ef, long er, long es)
> > +{
> > +	long t, f, r, s;
> > +	long c, ec;
> > +	char *huge_info_name;
> > +
> > +	read_meminfo_huge(&t, &f, &r, &s);
> > +
> > +	if (t != et) {
> > +		c = t;
> > +		ec = et;
> > +		huge_info_name = MEMINFO_HPAGE_TOTAL;
> > +	} else if (f != ef) {
> > +		c = f;
> > +		ec = ef;
> > +		huge_info_name = MEMINFO_HPAGE_FREE;
> > +	} else if (r != er) {
> > +		c = r;
> > +		ec = er;
> > +		huge_info_name = MEMINFO_HPAGE_RSVD;
> > +	} else if (s != es) {
> > +		c = s;
> > +		ec = es;
> > +		huge_info_name = MEMINFO_HPAGE_SURP;
> > +	} else {
> 
> I think that it would be better to actually print TFAIL for each of the
> values not just the first that is different.
> 
> Something as:
> 
> 	int fail = 0;
> 
> 	if (t != et) {
> 		tst_res(TFAIL, ...);
> 		fail++;
> 	}
> 
> 	if (f != ef) {
> 		...
> 	}
> 
> 	...
> 
> 
> 	if (fail)
> 		return 1;
> 
> 	...
> 
Yeah. That way user will have more information. Will update it.
> > +		prev_total = t;
> > +		prev_free = f;
> > +		prev_resv = r;
> > +		prev_surp = s;
> > +		return;
> > +	}
> > +
> > +	tst_res(TFAIL, "Failure Line %i: Bad %s expected %li, actual %li",
>                         ^
> 			Never print "Fail/Pass" with tst_res() it's
> 			printed based on the flag passed to it.
> 
> The output would contain Fail and Failed at the same time.
> 
This doesn't say failed.
It says failure-line from which the failure originated.
like, 
hugemmap10.c:63: FAIL: Failure Line 321, Bad HugePages_Free: expected 5, actual 4

> > +			line, huge_info_name, ec, c);
> > +	longjmp(buf, 1);
> > +}
> > +
> > +/* Memory operations:
> > + * Each of these has a predefined effect on the counters
> > + */
> > +static void set_nr_hugepages_(long count, int line)
> > +{
> > +	long min_size;
> > +	long et, ef, er, es;
> > +
> > +	SAFE_FILE_PRINTF(PATH_NR_HPAGES, "%lu", count);
> > +
> > +	/* The code below is based on set_max_huge_pages in mm/hugetlb.c */
> > +	es = prev_surp;
> > +	et = prev_total;
> > +	ef = prev_free;
> > +	er = prev_resv;
> > +
> > +	/*
> > +	 * Increase the pool size
> > +	 * First take pages out of surplus state.  Then make up the
> > +	 * remaining difference by allocating fresh huge pages.
> > +	 */
> > +	while (es && count > et - es)
> > +		es--;
> > +	while (count > et - es) {
> > +		et++;
> > +		ef++;
> > +	}
> > +	if (count >= et - es)
> > +		goto out;
> > +
> > +	/*
> > +	 * Decrease the pool size
> > +	 * First return free pages to the buddy allocator (being careful
> > +	 * to keep enough around to satisfy reservations).  Then place
> > +	 * pages into surplus state as needed so the pool will shrink
> > +	 * to the desired size as pages become free.
> > +	 */
> > +	min_size = MAX(count, er + et - ef);
> > +	while (min_size < et - es) {
> > +		ef--;
> > +		et--;
> > +	}
> > +	while (count < et - es)
> > +		es++;
> > +
> > +out:
> > +	verify_counters(line, et, ef, er, es);
> > +}
> > +#define set_nr_hugepages(c) set_nr_hugepages_(c, __LINE__)
> 
> I think that instead of the __LINE__ it would make more sense to pass
> the test description as a string as we do with test_counters()
> 
That will require each line inside test_counters to have unique string
description for map, touch, unmap, set_nr_hugepages calls, similiary inside
for loop. Which will make user hard to find where they have to look for
origin of issue, unless they search for string match.

like,

	/* untouched, private mmap */
	map(SL_TEST, 1, MAP_PRIVATE, "mmap private no touch");
	unmap(SL_TEST, 1, MAP_PRIVATE, "unmap memory mmaped private no touched");

	/* touched, private mmap */
	map(SL_TEST, 1, MAP_PRIVATE, "mmap private followed by touch");
	
	touch(SL_TEST, 1, MAP_PRIVATE, "touch memory mmaped private");
	unmap(SL_TEST, 1, MAP_PRIVATE, "unmap memory touched mmaped private");

But I agree, a unique description, will give more information on test run
logs. 

What do you think?
> > +static void map_(int s, int hpages, int flags, int line)
> > +{
> > +	long et, ef, er, es;
> > +
> > +	map_fd[s] = tst_creat_unlinked(MNTPOINT);
> > +	map_size[s] = hpages * hpage_size;
> > +	map_addr[s] = SAFE_MMAP(NULL, map_size[s], PROT_READ|PROT_WRITE, flags,
> > +				map_fd[s], 0);
> > +	touched[s] = 0;
> > +
> > +	et = prev_total;
> > +	ef = prev_free;
> > +	er = prev_resv;
> > +	es = prev_surp;
> > +	/*
> > +	 * When using MAP_SHARED, a reservation will be created to guarantee
> > +	 * pages to the process.  If not enough pages are available to
> > +	 * satisfy the reservation, surplus pages are added to the pool.
> > +	 * NOTE: This code assumes that the whole mapping needs to be
> > +	 * reserved and hence, will not work with partial reservations.
> > +	 *
> > +	 * If the kernel supports private reservations, then MAP_PRIVATE
> > +	 * mappings behave like MAP_SHARED at mmap time.  Otherwise,
> > +	 * no counter updates will occur.
> > +	 */
> > +	if ((flags & MAP_SHARED) || private_resv) {
> > +		unsigned long shortfall = 0;
> > +
> > +		if (hpages + prev_resv > prev_free)
> > +			shortfall = hpages - prev_free + prev_resv;
> > +		et += shortfall;
> > +		ef += shortfall;
> > +		er += hpages;
> > +		es += shortfall;
> > +	}
> > +
> > +	verify_counters(line, et, ef, er, es);
> > +}
> > +#define map(s, h, f) map_(s, h, f, __LINE__)
> > +
> > +static void unmap_(int s, int hpages, int flags, int line)
> > +{
> > +	long et, ef, er, es;
> > +	unsigned long i;
> > +
> > +	SAFE_MUNMAP(map_addr[s], map_size[s]);
> > +	SAFE_CLOSE(map_fd[s]);
> > +	map_addr[s] = NULL;
> > +	map_size[s] = 0;
> > +
> > +	et = prev_total;
> > +	ef = prev_free;
> > +	er = prev_resv;
> > +	es = prev_surp;
> > +
> > +	/*
> > +	 * When a VMA is unmapped, the instantiated (touched) pages are
> > +	 * freed.  If the pool is in a surplus state, pages are freed to the
> > +	 * buddy allocator, otherwise they go back into the hugetlb pool.
> > +	 * NOTE: This code assumes touched pages have only one user.
> > +	 */
> > +	for (i = 0; i < touched[s]; i++) {
> > +		if (es) {
> > +			et--;
> > +			es--;
> > +		} else
> > +			ef++;
> > +	}
> > +
> > +	/*
> > +	 * mmap may have created some surplus pages to accommodate a
> > +	 * reservation.  If those pages were not touched, then they will
> > +	 * not have been freed by the code above.  Free them here.
> > +	 */
> > +	if ((flags & MAP_SHARED) || private_resv) {
> > +		int unused_surplus = MIN(hpages - touched[s], es);
> > +
> > +		et -= unused_surplus;
> > +		ef -= unused_surplus;
> > +		er -= hpages - touched[s];
> > +		es -= unused_surplus;
> > +	}
> > +
> > +	verify_counters(line, et, ef, er, es);
> > +}
> > +#define unmap(s, h, f) unmap_(s, h, f, __LINE__)
> > +
> > +static void touch_(int s, int hpages, int flags, int line)
> > +{
> > +	long et, ef, er, es;
> > +	int nr;
> > +	char *c;
> > +
> > +	for (c = map_addr[s], nr = hpages;
> > +			hpages && c < map_addr[s] + map_size[s];
> > +			c += hpage_size, nr--)
> > +		*c = (char) (nr % 2);
> > +	/*
> > +	 * Keep track of how many pages were touched since we can't easily
> > +	 * detect that from user space.
> > +	 * NOTE: Calling this function more than once for a mmap may yield
> > +	 * results you don't expect.  Be careful :)
> > +	 */
> > +	touched[s] = MAX(touched[s], hpages);
> > +
> > +	/*
> > +	 * Shared (and private when supported) mappings and consume resv pages
> > +	 * that were previously allocated. Also deduct them from the free count.
> > +	 *
> > +	 * Unreserved private mappings may need to allocate surplus pages to
> > +	 * satisfy the fault.  The surplus pages become part of the pool
> > +	 * which could elevate total, free, and surplus counts.  resv is
> > +	 * unchanged but free must be decreased.
> > +	 */
> > +	if (flags & MAP_SHARED || private_resv) {
> > +		et = prev_total;
> > +		ef = prev_free - hpages;
> > +		er = prev_resv - hpages;
> > +		es = prev_surp;
> > +	} else {
> > +		if (hpages + prev_resv > prev_free)
> > +			et = prev_total + (hpages - prev_free + prev_resv);
> > +		else
> > +			et = prev_total;
> > +		er = prev_resv;
> > +		es = prev_surp + et - prev_total;
> > +		ef = prev_free - hpages + et - prev_total;
> > +	}
> > +	verify_counters(line, et, ef, er, es);
> > +}
> > +#define touch(s, h, f) touch_(s, h, f, __LINE__)
> 
> 
> > +static void test_counters(char *desc, int base_nr)
> > +{
> > +	tst_res(TINFO, "%s...", desc);
> > +	set_nr_hugepages(base_nr);
> > +
> > +	/* untouched, shared mmap */
> > +	map(SL_TEST, 1, MAP_SHARED);
> > +	unmap(SL_TEST, 1, MAP_SHARED);
> > +
> > +	/* untouched, private mmap */
> > +	map(SL_TEST, 1, MAP_PRIVATE);
> > +	unmap(SL_TEST, 1, MAP_PRIVATE);
> > +
> > +	/* touched, shared mmap */
> > +	map(SL_TEST, 1, MAP_SHARED);
> > +	touch(SL_TEST, 1, MAP_SHARED);
> > +	unmap(SL_TEST, 1, MAP_SHARED);
> > +
> > +	/* touched, private mmap */
> > +	map(SL_TEST, 1, MAP_PRIVATE);
> > +	touch(SL_TEST, 1, MAP_PRIVATE);
> > +	unmap(SL_TEST, 1, MAP_PRIVATE);
> > +
> > +	/* Explicit resizing during outstanding surplus */
> > +	/* Consume surplus when growing pool */
> > +	map(SL_TEST, 2, MAP_SHARED);
> > +	set_nr_hugepages(MAX(base_nr, 1));
> > +
> > +	/* Add pages once surplus is consumed */
> > +	set_nr_hugepages(MAX(base_nr, 3));
> > +
> > +	/* Release free huge pages first */
> > +	set_nr_hugepages(MAX(base_nr, 2));
> > +
> > +	/* When shrinking beyond committed level, increase surplus */
> > +	set_nr_hugepages(base_nr);
> > +
> > +	/* Upon releasing the reservation, reduce surplus counts */
> > +	unmap(SL_TEST, 2, MAP_SHARED);
> > +	tst_res(TINFO, "OK");
> > +}
> > +
> > +static void per_iteration_cleanup(void)
> > +{
> > +	int nr = 0;
> 
> This is confusing for no good reason, the nr should better be
> initialized in the for loop instead as it's usually done.
> 
Ok, Will update it.
> > +	prev_total = 0;
> > +	prev_free = 0;
> > +	prev_resv = 0;
> > +	prev_surp = 0;
> > +	for (; nr < NR_SLOTS; nr++) {
> > +		if (map_fd[nr] > 0)
> > +			SAFE_CLOSE(map_fd[nr]);
> > +	}
> > +}
> > +
> > +static void run_test(void)
> > +{
> > +	int base_nr = 0;
> > +
> > +	SAFE_FILE_PRINTF(PATH_OC_HPAGES, "%lu", tst_hugepages);
> > +	private_resv = kernel_has_private_reservations();
> 
> This should be done once in the test setup().
> 
Yeah, Will move it.
> > +	if (setjmp(buf))
> > +		goto cleanup;
> 
> This is way beyond ugly. I guess that it would be cleaner to actually
> return a pass/fail from the test_counters() function and break the for()
> loop based on that value instead of this longjmp trickery.
> 
> Also I do not think that the current code is correct anyway, because we
> skip the unmap() call. So I suppose the correct way would be:
> 
> 
> 	res = test_counters("Untouched, shared", base_nr);
> 	unmap(SL_SETUP, 1, MAP_SHARED);
> 
> 	if (res)
> 		break;
> 

I was thinking same first. But Thought of adding the checks at each line in
test_counters(...) and inside for loop, will make the code unclean. Hence,
I chose the setjmp/longjmp mechanism. Only drawback is that mapping was not
getting cleaned up (unmap), That we can add in per_iteration_cleanup.

What do you think?

> Or eventually we can make the desing better by unmaping any leftover
> mappings in the per_iteration_cleanup(). Then we can just do:
> 
> 	map()
> 	if (test_coutners(...)
> 		break;
> 	unmap()
> 
map and unmap do also require return checks, as they also perform
verify_counter on expected and original counters.

yeah, I will add unmap in per_iteration_cleanup.
> > +	for (base_nr = 0; base_nr <= 3; base_nr++) {
> > +		tst_res(TINFO, "Base pool size: %i", base_nr);
> > +		/* Run the tests with a clean slate */
> > +		test_counters("Clean", base_nr);
> > +
> > +		/* Now with a pre-existing untouched, shared mmap */
> > +		map(SL_SETUP, 1, MAP_SHARED);
> > +		test_counters("Untouched, shared", base_nr);
> > +		unmap(SL_SETUP, 1, MAP_SHARED);
> > +
> > +		/* Now with a pre-existing untouched, private mmap */
> > +		map(SL_SETUP, 1, MAP_PRIVATE);
> > +		test_counters("Untouched, private", base_nr);
> > +		unmap(SL_SETUP, 1, MAP_PRIVATE);
> > +
> > +		/* Now with a pre-existing touched, shared mmap */
> > +		map(SL_SETUP, 1, MAP_SHARED);
> > +		touch(SL_SETUP, 1, MAP_SHARED);
> > +		test_counters("Touched, shared", base_nr);
> > +		unmap(SL_SETUP, 1, MAP_SHARED);
> > +
> > +		/* Now with a pre-existing touched, private mmap */
> > +		map(SL_SETUP, 1, MAP_PRIVATE);
> > +		touch(SL_SETUP, 1, MAP_PRIVATE);
> > +		test_counters("Touched, private", base_nr);
> > +		unmap(SL_SETUP, 1, MAP_PRIVATE);
> > +	}
> > +	tst_res(TPASS, "Hugepages Counters works as expected.");
> > +cleanup:
> > +	per_iteration_cleanup();
> > +}
> > +
> > +static void setup(void)
> > +{
> > +	hpage_size = SAFE_READ_MEMINFO(MEMINFO_HPAGE_SIZE)*1024;
> > +}
> > +
> > +static void cleanup(void)
> > +{
> > +	per_iteration_cleanup();
> > +}
> > +
> > +static struct tst_test test = {
> > +	.needs_root = 1,
> > +	.mntpoint = MNTPOINT,
> > +	.needs_hugetlbfs = 1,
> > +	.save_restore = (const struct tst_path_val[]) {
> > +		{PATH_OC_HPAGES, NULL},
> > +		{PATH_NR_HPAGES, NULL},
> > +		{}
> > +	},
> > +	.setup = setup,
> > +	.cleanup = cleanup,
> > +	.test_all = run_test,
> > +	.hugepages = {3, TST_NEEDS},
> > +};
> > +
> > -- 
> > 2.31.1
> > 
> 
> -- 
> Cyril Hrubis
> chrubis@suse.cz
> 
> -- 
> Mailing list info: https://lists.linux.it/listinfo/ltp


More information about the ltp mailing list