[LTP] [PATCH v4] thermal: add new test group

Petr Vorel pvorel@suse.cz
Fri Jan 23 21:25:21 CET 2026


Hi Piotr,

> This is a new test for checking thermal interrupt events.
> Corrects issues pointed out by Petr Vorel for v3.

> +++ b/testcases/kernel/thermal/thermal_interrupt_events.c
> @@ -0,0 +1,204 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +
> +/*
> + * Copyright (C) 2025-2026 Intel - http://www.intel.com/
> + */
> +
> +/*\
> + * Tests the CPU package thermal sensor interface for Intel platforms.
> +
> + * Works by checking the initial count of thermal interrupts. Then it
> + * decreases the threshold for sending a thermal interrupt to just above
> + * the current temperature and runs a workload on the CPU. Finally, it restores
> + * the original thermal threshold and checks whether the number of thermal
> + * interrupts increased.
> + */
> +
> +#include "tst_safe_stdio.h"
> +#include "tst_test.h"
very nit: blank line between includes and defines helps readability
> +#define	PATH_LEN	69
very nit: you mix tabs and spaces after #define.
I'd be ok to use NAME_MAX (255) from <limits.h> than to have an extra constant
just to save little bit of memory.
But maybe worth to define 8192 (/proc/interrupts line), which is used on 2 places.

> +#define RUNTIME		30
> +#define SLEEP		10
> +#define	STRING_LEN	23
You don't use STRING_LEN.

> +#define TEMP_INCREMENT	10
> +
> +static char trip_path[PATH_LEN];
> +static int nproc, trip;
> +
> +static void setup(void)
> +{
> +	nproc = tst_ncpus();
> +	tst_res(TDEBUG, "Number of logical cores: %d", nproc);
> +}
> +
> +static void cleanup(void)
> +{
> +	tst_res(TDEBUG, "Restoring original trip_point_1_temp value: %d", trip);
I don't think this message is useful. It's just a cleanup. Also if it fails,
you'll get message from LTP library.

> +	SAFE_FILE_PRINTF(trip_path, "%d", trip);
> +}
> +
> +static void *cpu_workload(double run_time)
> +{
> +	time_t start_time = time(NULL);
> +	int num = 2;
> +
> +	while (difftime(time(NULL), start_time) < run_time) {
> +		for (int i = 2; i * i <= num; i++) {
> +			if (num % i == 0)
> +				break;
> +		}
> +		num++;
> +	}
> +	return NULL;
> +}
> +
> +static void read_interrupts(uint64_t *interrupt_array, const int nproc)
very nit: maybe just interrupts? Variable names can be meaningful and yet short
enough :).

> +{
> +	bool interrupts_found = 0;
very nit: sure it works, but why not using true and false?

> +	char line[8192];
> +
> +	memset(interrupt_array, 0, nproc * sizeof(*interrupt_array));
> +	FILE *fp = SAFE_FOPEN("/proc/interrupts", "r");
> +
> +	while (fgets(line, sizeof(line), fp)) {
> +		if (strstr(line, "Thermal event interrupts")) {
> +			interrupts_found = 1;
> +			char *token = strtok(line, " ");
> +
> +			token = strtok(NULL, " ");
> +			int i = 0;
> +
> +			while (!!strncmp(token, "Thermal", 7)) {
> +				interrupt_array[i++] = atoll(token);
> +				token = strtok(NULL, " ");
> +				tst_res(TDEBUG, "Current value of interrupt_array[%d]: %ld", i - 1, interrupt_array[i - 1]);
nit: It's just a debug info, why not just (shorten long lines):
				tst_res(TDEBUG, "interrupts[%d]: %ld", i-1, interrupts[i-1]);

> +			}
We don't need processing any other line after TRM: right?

	while (fgets(line, sizeof(line), fp)) {
		if (!strstr(line, "Thermal event interrupts"))
			continue;

		interrupts_found = 1;
		char *token = strtok(line, " ");

		token = strtok(NULL, " ");
		int i = 0;

		while (!!strncmp(token, "Thermal", 7)) {
			interrupt_array[i++] = atoll(token);
			token = strtok(NULL, " ");
			tst_res(TDEBUG, "Current value of interrupt_array[%d]: %ld", i - 1, interrupt_array[i - 1]);
		}
		break;
	}

> +		}
> +	}
> +	SAFE_FCLOSE(fp);
> +	if (!interrupts_found)
> +		tst_brk(TCONF, "No Thermal event interrupts line in /proc/interrupts");
> +}
> +

> +static void run(void)
> +{
> +	bool status = 1;
> +	char line[8192];
> +	uint64_t interrupt_init[nproc], interrupt_later[nproc];
> +
> +	read_interrupts(interrupt_init, nproc);
> +
> +	DIR *dir = SAFE_OPENDIR("/sys/class/thermal/");
> +	struct dirent *entry;
> +	int tz_counter = 0;
> +
> +	while ((entry = SAFE_READDIR(dir))) {
> +		if ((strncmp(entry->d_name, "thermal_zone", sizeof("thermal_zone"))) > 0)
> +			tz_counter++;
> +	}
> +	SAFE_CLOSEDIR(dir);
> +	tst_res(TDEBUG, "Found %d thermal zone(s)", tz_counter);
As I noted previously, at least this part will not change if you run test more
times, does it? Why not to move it to the setup()?

Imagine running test 1000x iterations:
./thermal_interrupt_events -i 1000

Why to waste time with reading it again?

The only exception might be reading interrupts. I would expect it's ok to have
only the initial state (read in the setup() as well), but maybe (when test run
with more iterations via -i x) it needs to have the updated state (from the
previous iteration).

> +
> +	bool x86_pkg_temp_tz[tz_counter], x86_pkg_temp_tz_found = 0;
> +
> +	memset(x86_pkg_temp_tz, 0, sizeof(x86_pkg_temp_tz));
> +
> +	for (int i = 0; i < tz_counter; i++) {
> +		char path[PATH_LEN];
> +
> +		snprintf(path, PATH_LEN, "/sys/class/thermal/thermal_zone%d/type", i);
> +		tst_res(TDEBUG, "Checking whether %s is x86_pkg_temp", path);
> +
> +		SAFE_FILE_SCANF(path, "%s", line);
> +		if (strstr(line, "x86_pkg_temp")) {
> +			tst_res(TDEBUG, "Thermal zone %d uses x86_pkg_temp", i);
> +			x86_pkg_temp_tz[i] = 1;
> +			x86_pkg_temp_tz_found = 1;
> +		}
> +	}
> +	if (!x86_pkg_temp_tz_found) {
> +		tst_res(TINFO, "No thermal zone uses x86_pkg_temp");
And probably this part will not happen when you run more iterations (-i1000).

> +		status = 0;

If this happen, test fail, right? (you never set status = 1 later). Why to do
the rest of the testing?

I would really expect the whole part of run() up here is in the setup() and test
quits in this case:

if (!x86_pkg_temp_tz_found)
	tst_brk(TCONF, "No thermal zone uses x86_pkg_temp");

> +	}


> +
> +	for (int i = 0; i < tz_counter; i++) {
> +		if (x86_pkg_temp_tz[i]) {

run() is quite long. Maybe move content of of this loop would help.
Something like this (use whatever function name) would help the readability.

	for (int i = 0; i < tz_counter; i++) {
		if (x86_pkg_temp_tz[i])
			test_zone(x86_pkg_temp_tz[i]);
	}
Maybe even split the while part into it's own function.

> +			char path[PATH_LEN], temp_path[PATH_LEN];
> +			int sleep_time = SLEEP, temp_high, temp;
> +			double run_time = RUNTIME;
> +
> +			snprintf(path, PATH_LEN, "/sys/class/thermal/thermal_zone%d/", i);
> +			strncpy(temp_path, path, PATH_LEN);
> +			strncat(temp_path, "temp", 4);
> +			tst_res(TINFO, "Testing %s", temp_path);
> +			SAFE_FILE_SCANF(temp_path, "%d", &temp);
> +			if (temp < 0) {
> +				tst_brk(TINFO, "Unexpected zone temperature value %d", temp);
tst_brk(TINFO, ...) is wrong, because tst_brk() quits testing. TINFO is always
used with tst_res() (normal message).

I guess it should be either tst_brk(TCONF), as wrong temperature looks to me as
a bug.

> +				status = 0;
> +			}
> +			tst_res(TDEBUG, "Current temperature for %s: %d", path, temp);
> +
> +			temp_high = temp + TEMP_INCREMENT;
> +
> +			strncpy(trip_path, path, PATH_LEN);
> +			strncat(trip_path, "trip_point_1_temp", 17);
> +
> +			tst_res(TDEBUG, "Setting new trip_point_1_temp value: %d", temp_high);
> +			SAFE_FILE_SCANF(trip_path, "%d", &trip);
> +			SAFE_FILE_PRINTF(trip_path, "%d", temp_high);
> +
> +			while (sleep_time > 0) {
> +				tst_res(TDEBUG, "Running for %f seconds, then sleeping for %d seconds", run_time, sleep_time);
nit: %f should be %d, right?

				tst_res(TDEBUG, "Running for %d seconds, then sleeping for %d seconds",
					(int)run_time, sleep_time);
> +
> +				for (int j = 0; j < nproc; j++) {
> +					if (!SAFE_FORK()) {
> +						cpu_workload(run_time);
> +						exit(0);
> +					}
> +				}
> +
> +				for (int j = 0; j < nproc; j++)
> +					tst_reap_children();

tst_reap_children() should be called only once (not for all cpus).

Quoting doc:

	The 'tst_reap_children()' function makes the process wait for all of its
	children and exits with 'tst_brk(TBROK, ...)' if any of them returned
	a non zero exit code.

See function itself in lib/tst_test.c and "Test 3" in lib/newlib_tests/tst_checkpoint.c.

> +
> +				SAFE_FILE_SCANF(temp_path, "%d", &temp);
> +				tst_res(TDEBUG, "Temperature for %s after a test: %d", path, temp);
> +
> +				if (temp > temp_high)
> +					break;
> +				sleep(sleep_time--);
> +				run_time -= 3;
> +			}
> +			if (temp <= temp_high) {
> +				tst_res(TINFO, "Zone temperature is not rising as expected");
I'm not the expert on the topic, but IMHO how about have this as TFAIL
and print at the end only TPASS if no error found?

				tst_res(TFAIL, "CPU %d: Zone temperature is not rising as expected", i);

> +				status = 0;
> +			}
> +		}
> +	}
> +	read_interrupts(interrupt_later, nproc);
> +
> +	for (int i = 0; i < nproc; i++) {
> +		if (interrupt_later[i] < interrupt_init[i]) {
> +			tst_res(TINFO, "For CPU %d interrupt counter is currently %ld, while it was %ld before the test", i, interrupt_later[i], interrupt_init[i]);
very nit: this line is really too long.

			tst_res(TFAIL, "CPU %d interrupt counter: %ld (previous: %ld)",
				i, interrupt_later[i], interrupt_init[i]);

> +			status = 0;
> +		}
> +	}
> +
> +	if (status)
> +		tst_res(TPASS, "x86 package thermal interrupt triggered");
> +	else
> +		tst_res(TFAIL, "x86 package thermal interrupt did not trigger");

	if (status)
		tst_res(TPASS, "All interrupts triggered");

(Don't print final TFAIL when errors were printed previously.)
...

Kind regards,
Petr


More information about the ltp mailing list