[LTP] [PATCH] Migrating the libhugetlbfs/testcases/alloc-instantiate-race.c test

Samir Mulani samir@linux.vnet.ibm.com
Tue Sep 12 08:41:26 CEST 2023


This test is designed to detect a kernel allocation race introduced
with hugepage demand-faulting.  The problem is that no lock is held
between allocating a hugepage and instantiating it in the
pagetables or page cache index.  In between the two, the (huge)
page is cleared, so there's substantial time.  Thus two processes
can race instantiating the (same) last available hugepage - one
will fail on the allocation, and thus cause an OOM fault even
though the page it actually wants is being instantiated by the
other racing process.

Signed-off-by: Samir Mulani <samir@linux.vnet.ibm.com>
---
 .../kernel/mem/hugetlb/hugemmap/hugemmap36.c  | 286 ++++++++++++++++++
 1 file changed, 286 insertions(+)
 create mode 100644 testcases/kernel/mem/hugetlb/hugemmap/hugemmap36.c

diff --git a/testcases/kernel/mem/hugetlb/hugemmap/hugemmap36.c b/testcases/kernel/mem/hugetlb/hugemmap/hugemmap36.c
new file mode 100644
index 000000000..7135af8a4
--- /dev/null
+++ b/testcases/kernel/mem/hugetlb/hugemmap/hugemmap36.c
@@ -0,0 +1,286 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2005-2006 IBM Corporation
+ * Author: David Gibson & Adam Litke
+ */
+
+/*\
+ * [Description]
+ *
+ * Test Name: alloc-instantiate-race.c
+ *
+ * This test is designed to detect a kernel allocation race introduced
+ * with hugepage demand-faulting.  The problem is that no lock is held
+ * between allocating a hugepage and instantiating it in the
+ * pagetables or page cache index.  In between the two, the (huge)
+ * page is cleared, so there's substantial time.  Thus two processes
+ * can race instantiating the (same) last available hugepage - one
+ * will fail on the allocation, and thus cause an OOM fault even
+ * though the page it actually wants is being instantiated by the
+ * other racing process.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <pthread.h>
+#include "tst_safe_pthread.h"
+#include "hugetlb.h"
+#define SYSFS_CPU_ONLINE_FMT	"/sys/devices/system/cpu/cpu%d/online"
+#define MNTPOINT "hugetlbfs/"
+unsigned long totpages;
+static long hpage_size;
+char *str_op;
+int race_type;
+static int child1, child2;
+static pthread_t thread1, thread2;
+
+struct racer_info {
+	void *p; /* instantiation address */
+	int cpu;
+	int race_type;
+	volatile int *mytrigger;
+	volatile int *othertrigger;
+	int status;
+};
+
+static int one_racer(void *p, int cpu,
+		     volatile int *mytrigger, volatile int *othertrigger)
+{
+	volatile int *pi = p;
+	cpu_set_t cpuset;
+	int err;
+
+	/* Split onto different cpus to encourage the race */
+	CPU_ZERO(&cpuset);
+	CPU_SET(cpu, &cpuset);
+
+	err = sched_setaffinity(gettid(), CPU_SETSIZE/8, &cpuset);
+	if (err != 0)
+		tst_res(TFAIL,"sched_setaffinity(cpu%d): %s", cpu, strerror(errno));
+
+	/* Ready.. */
+	*mytrigger = 1;
+	/* Set.. */
+	while (! *othertrigger)
+		;
+
+	/* Instantiate! */
+	*pi = 1;
+
+	return 0;
+}
+
+static void proc_racer(void *p, int cpu,
+		       volatile int *mytrigger, volatile int *othertrigger)
+{
+	exit(one_racer(p, cpu, mytrigger, othertrigger));
+}
+
+static void *thread_racer(void *info)
+{
+	struct racer_info *ri = info;
+	one_racer(ri->p, ri->cpu, ri->mytrigger, ri->othertrigger);
+	return ri;
+}
+
+void check_online_cpus(int online_cpus[], int nr_cpus_needed)
+{
+	char cpu_state, path_buf[64];
+	int total_cpus, cpu_idx, fd, ret, i;
+
+	total_cpus = get_nprocs_conf();
+	cpu_idx = 0;
+
+	if (get_nprocs() < nr_cpus_needed)
+		tst_res(TFAIL,"Atleast online %d cpus are required", nr_cpus_needed);
+
+	for (i = 0; i < total_cpus && cpu_idx < nr_cpus_needed; i++) {
+		errno = 0;
+		sprintf(path_buf, SYSFS_CPU_ONLINE_FMT, i);
+		fd = open(path_buf, O_RDONLY);
+		if (fd < 0) {
+			/* If 'online' is absent, the cpu cannot be offlined */
+			if (errno == ENOENT) {
+				online_cpus[cpu_idx] = i;
+				cpu_idx++;
+				continue;
+			} else {
+				tst_res(TFAIL,"Unable to open %s: %s", path_buf,
+				     strerror(errno));
+			}
+		}
+
+		ret = read(fd, &cpu_state, 1);
+		if (ret < 1)
+			tst_res(TFAIL,"Unable to read %s: %s", path_buf,
+			     strerror(errno));
+
+		if (cpu_state == '1') {
+			online_cpus[cpu_idx] = i;
+			cpu_idx++;
+		}
+
+		if (fd >= 0)
+			SAFE_CLOSE(fd);
+	}
+
+	if (cpu_idx < nr_cpus_needed)
+		tst_res(TFAIL,"Atleast %d online cpus were not found", nr_cpus_needed);
+}
+
+static void run_race(void *syncarea, int race_type)
+{
+	volatile int *trigger1, *trigger2;
+	int fd;
+	void *p, *tret1, *tret2;
+	int status1, status2;
+	int online_cpus[2];
+
+	check_online_cpus(online_cpus, 2);
+	memset(syncarea, 0, sizeof(*trigger1) + sizeof(*trigger2));
+	trigger1 = syncarea;
+	trigger2 = trigger1 + 1;
+
+	/* Get a new file for the final page */
+	fd = tst_creat_unlinked(MNTPOINT, 0);
+	tst_res(TINFO,"Mapping final page.. ");
+	p = SAFE_MMAP(NULL, hpage_size, PROT_READ|PROT_WRITE, race_type, fd, 0);
+	if (race_type == MAP_SHARED) {
+		child1 = SAFE_FORK();
+		if (child1 == 0)
+			proc_racer(p, online_cpus[0], trigger1, trigger2);
+
+		child2 = SAFE_FORK();
+		if (child2 == 0)
+			proc_racer(p, online_cpus[1], trigger2, trigger1);
+
+		/* wait() calls */
+		SAFE_WAITPID(child1, &status1, 0);
+		tst_res(TINFO,"Child 1 status: %x\n", status1);
+
+
+		SAFE_WAITPID(child2, &status2, 0);
+		tst_res(TINFO,"Child 2 status: %x\n", status2);
+
+		if (WIFSIGNALED(status1))
+			tst_res(TFAIL,"Child 1 killed by signal %s",
+			strsignal(WTERMSIG(status1)));
+		if (WIFSIGNALED(status2))
+			tst_res(TFAIL,"Child 2 killed by signal %s",
+			strsignal(WTERMSIG(status2)));
+
+		status1 = WEXITSTATUS(status1);
+		status2 = WEXITSTATUS(status2);
+	} else {
+		struct racer_info ri1 = {
+			.p = p,
+			.cpu = online_cpus[0],
+			.mytrigger = trigger1,
+			.othertrigger = trigger2,
+		};
+		struct racer_info ri2 = {
+			.p = p,
+			.cpu = online_cpus[1],
+			.mytrigger = trigger2,
+			.othertrigger = trigger1,
+		};
+		
+		SAFE_PTHREAD_CREATE(&thread1, NULL, thread_racer, &ri1);
+		SAFE_PTHREAD_CREATE(&thread2, NULL, thread_racer, &ri2);
+		SAFE_PTHREAD_JOIN(thread1, &tret1);
+		if (tret1 != &ri1)
+			tst_res(TFAIL,"Thread 1 returned %p not %p, killed?\n",
+			     tret1, &ri1);
+		SAFE_PTHREAD_JOIN(thread2, &tret2);
+		if (tret2 != &ri2)
+			tst_res(TFAIL,"Thread 2 returned %p not %p, killed?\n",
+			     tret2, &ri2);
+		status1 = ri1.status;
+		status2 = ri2.status;
+	}
+
+	if (status1 != 0)
+		tst_res(TFAIL,"Racer 1 terminated with code %d", status1);
+
+	if (status2 != 0)
+		tst_res(TFAIL,"Racer 2 terminated with code %d", status2);
+
+	if (fd >= 0)
+		SAFE_CLOSE(fd);
+}
+
+
+static void run_test(void)
+{
+	int fd;
+	long unsigned int i;
+	void *p, *q;
+	fd = tst_creat_unlinked(MNTPOINT, 0);
+	/* Get a shared normal page for synchronization */
+	tst_res(TINFO, "Mapping synchronization area..");
+	q = SAFE_MMAP(NULL, getpagesize(), PROT_READ|PROT_WRITE,
+		 MAP_SHARED|MAP_ANONYMOUS, -1, 0);
+	
+	tst_res(TINFO,"done\n");
+	tst_res(TINFO,"Mapping %ld/%ld pages.. ", totpages-1, totpages);
+	
+	p = SAFE_MMAP(NULL, (totpages-1)*hpage_size, PROT_READ|PROT_WRITE,
+		 MAP_SHARED, fd, 0);
+
+	/* Allocate all save one of the pages up front */
+	tst_res(TINFO,"instantiating.. ");
+	for (i = 0; i < (totpages - 1); i++)
+		memset(p + (i * hpage_size), 0, sizeof(int));
+
+	tst_res(TINFO,"done\n");
+
+	run_race(q, race_type);
+	tst_res(TPASS,"Test passed..!!");
+
+	if (fd >= 0)
+		SAFE_CLOSE(fd);
+}
+
+void setup(void)
+{
+	totpages = SAFE_READ_MEMINFO(MEMINFO_HPAGE_FREE);
+	hpage_size = tst_get_hugepage_size();
+	if (str_op)
+	{	
+		if (strcmp(str_op, "shared") == 0) {
+			race_type = MAP_SHARED;
+			} else if (strcmp(str_op, "private") == 0) {
+				race_type = MAP_PRIVATE;
+				} else {
+					tst_res(TFAIL,"Usage: alloc-instantiate-race <private|shared>");
+		}
+	}
+	else{
+		tst_res(TFAIL,"Usage: alloc-instantiate-race <private|shared>");
+	}	
+}
+
+void cleanup(void)
+{
+	if (child1)
+		SAFE_KILL(child1, SIGKILL);
+	if (child2)
+		SAFE_KILL(child2, SIGKILL);
+}
+
+
+static struct tst_test test = {
+	.options = (struct  tst_option[]){
+		{"m:", &str_op, "Usage: alloc-instantiate-race <private|shared>"},
+		{}
+	},
+	.needs_root = 1,
+	.mntpoint = MNTPOINT,
+	.needs_hugetlbfs = 1,
+	.needs_tmpdir = 1,
+	.setup = setup,
+	.cleanup = cleanup,
+	.test_all = run_test,
+	.hugepages = {2, TST_NEEDS},
+	.forks_child = 1,
+};
-- 
2.39.3



More information about the ltp mailing list