[LTP] [PATCH v4] thermal: add new test group
Petr Vorel
pvorel@suse.cz
Fri Jan 23 21:25:21 CET 2026
Hi Piotr,
> This is a new test for checking thermal interrupt events.
> Corrects issues pointed out by Petr Vorel for v3.
> +++ b/testcases/kernel/thermal/thermal_interrupt_events.c
> @@ -0,0 +1,204 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +
> +/*
> + * Copyright (C) 2025-2026 Intel - http://www.intel.com/
> + */
> +
> +/*\
> + * Tests the CPU package thermal sensor interface for Intel platforms.
> +
> + * Works by checking the initial count of thermal interrupts. Then it
> + * decreases the threshold for sending a thermal interrupt to just above
> + * the current temperature and runs a workload on the CPU. Finally, it restores
> + * the original thermal threshold and checks whether the number of thermal
> + * interrupts increased.
> + */
> +
> +#include "tst_safe_stdio.h"
> +#include "tst_test.h"
very nit: blank line between includes and defines helps readability
> +#define PATH_LEN 69
very nit: you mix tabs and spaces after #define.
I'd be ok to use NAME_MAX (255) from <limits.h> than to have an extra constant
just to save little bit of memory.
But maybe worth to define 8192 (/proc/interrupts line), which is used on 2 places.
> +#define RUNTIME 30
> +#define SLEEP 10
> +#define STRING_LEN 23
You don't use STRING_LEN.
> +#define TEMP_INCREMENT 10
> +
> +static char trip_path[PATH_LEN];
> +static int nproc, trip;
> +
> +static void setup(void)
> +{
> + nproc = tst_ncpus();
> + tst_res(TDEBUG, "Number of logical cores: %d", nproc);
> +}
> +
> +static void cleanup(void)
> +{
> + tst_res(TDEBUG, "Restoring original trip_point_1_temp value: %d", trip);
I don't think this message is useful. It's just a cleanup. Also if it fails,
you'll get message from LTP library.
> + SAFE_FILE_PRINTF(trip_path, "%d", trip);
> +}
> +
> +static void *cpu_workload(double run_time)
> +{
> + time_t start_time = time(NULL);
> + int num = 2;
> +
> + while (difftime(time(NULL), start_time) < run_time) {
> + for (int i = 2; i * i <= num; i++) {
> + if (num % i == 0)
> + break;
> + }
> + num++;
> + }
> + return NULL;
> +}
> +
> +static void read_interrupts(uint64_t *interrupt_array, const int nproc)
very nit: maybe just interrupts? Variable names can be meaningful and yet short
enough :).
> +{
> + bool interrupts_found = 0;
very nit: sure it works, but why not using true and false?
> + char line[8192];
> +
> + memset(interrupt_array, 0, nproc * sizeof(*interrupt_array));
> + FILE *fp = SAFE_FOPEN("/proc/interrupts", "r");
> +
> + while (fgets(line, sizeof(line), fp)) {
> + if (strstr(line, "Thermal event interrupts")) {
> + interrupts_found = 1;
> + char *token = strtok(line, " ");
> +
> + token = strtok(NULL, " ");
> + int i = 0;
> +
> + while (!!strncmp(token, "Thermal", 7)) {
> + interrupt_array[i++] = atoll(token);
> + token = strtok(NULL, " ");
> + tst_res(TDEBUG, "Current value of interrupt_array[%d]: %ld", i - 1, interrupt_array[i - 1]);
nit: It's just a debug info, why not just (shorten long lines):
tst_res(TDEBUG, "interrupts[%d]: %ld", i-1, interrupts[i-1]);
> + }
We don't need processing any other line after TRM: right?
while (fgets(line, sizeof(line), fp)) {
if (!strstr(line, "Thermal event interrupts"))
continue;
interrupts_found = 1;
char *token = strtok(line, " ");
token = strtok(NULL, " ");
int i = 0;
while (!!strncmp(token, "Thermal", 7)) {
interrupt_array[i++] = atoll(token);
token = strtok(NULL, " ");
tst_res(TDEBUG, "Current value of interrupt_array[%d]: %ld", i - 1, interrupt_array[i - 1]);
}
break;
}
> + }
> + }
> + SAFE_FCLOSE(fp);
> + if (!interrupts_found)
> + tst_brk(TCONF, "No Thermal event interrupts line in /proc/interrupts");
> +}
> +
> +static void run(void)
> +{
> + bool status = 1;
> + char line[8192];
> + uint64_t interrupt_init[nproc], interrupt_later[nproc];
> +
> + read_interrupts(interrupt_init, nproc);
> +
> + DIR *dir = SAFE_OPENDIR("/sys/class/thermal/");
> + struct dirent *entry;
> + int tz_counter = 0;
> +
> + while ((entry = SAFE_READDIR(dir))) {
> + if ((strncmp(entry->d_name, "thermal_zone", sizeof("thermal_zone"))) > 0)
> + tz_counter++;
> + }
> + SAFE_CLOSEDIR(dir);
> + tst_res(TDEBUG, "Found %d thermal zone(s)", tz_counter);
As I noted previously, at least this part will not change if you run test more
times, does it? Why not to move it to the setup()?
Imagine running test 1000x iterations:
./thermal_interrupt_events -i 1000
Why to waste time with reading it again?
The only exception might be reading interrupts. I would expect it's ok to have
only the initial state (read in the setup() as well), but maybe (when test run
with more iterations via -i x) it needs to have the updated state (from the
previous iteration).
> +
> + bool x86_pkg_temp_tz[tz_counter], x86_pkg_temp_tz_found = 0;
> +
> + memset(x86_pkg_temp_tz, 0, sizeof(x86_pkg_temp_tz));
> +
> + for (int i = 0; i < tz_counter; i++) {
> + char path[PATH_LEN];
> +
> + snprintf(path, PATH_LEN, "/sys/class/thermal/thermal_zone%d/type", i);
> + tst_res(TDEBUG, "Checking whether %s is x86_pkg_temp", path);
> +
> + SAFE_FILE_SCANF(path, "%s", line);
> + if (strstr(line, "x86_pkg_temp")) {
> + tst_res(TDEBUG, "Thermal zone %d uses x86_pkg_temp", i);
> + x86_pkg_temp_tz[i] = 1;
> + x86_pkg_temp_tz_found = 1;
> + }
> + }
> + if (!x86_pkg_temp_tz_found) {
> + tst_res(TINFO, "No thermal zone uses x86_pkg_temp");
And probably this part will not happen when you run more iterations (-i1000).
> + status = 0;
If this happen, test fail, right? (you never set status = 1 later). Why to do
the rest of the testing?
I would really expect the whole part of run() up here is in the setup() and test
quits in this case:
if (!x86_pkg_temp_tz_found)
tst_brk(TCONF, "No thermal zone uses x86_pkg_temp");
> + }
> +
> + for (int i = 0; i < tz_counter; i++) {
> + if (x86_pkg_temp_tz[i]) {
run() is quite long. Maybe move content of of this loop would help.
Something like this (use whatever function name) would help the readability.
for (int i = 0; i < tz_counter; i++) {
if (x86_pkg_temp_tz[i])
test_zone(x86_pkg_temp_tz[i]);
}
Maybe even split the while part into it's own function.
> + char path[PATH_LEN], temp_path[PATH_LEN];
> + int sleep_time = SLEEP, temp_high, temp;
> + double run_time = RUNTIME;
> +
> + snprintf(path, PATH_LEN, "/sys/class/thermal/thermal_zone%d/", i);
> + strncpy(temp_path, path, PATH_LEN);
> + strncat(temp_path, "temp", 4);
> + tst_res(TINFO, "Testing %s", temp_path);
> + SAFE_FILE_SCANF(temp_path, "%d", &temp);
> + if (temp < 0) {
> + tst_brk(TINFO, "Unexpected zone temperature value %d", temp);
tst_brk(TINFO, ...) is wrong, because tst_brk() quits testing. TINFO is always
used with tst_res() (normal message).
I guess it should be either tst_brk(TCONF), as wrong temperature looks to me as
a bug.
> + status = 0;
> + }
> + tst_res(TDEBUG, "Current temperature for %s: %d", path, temp);
> +
> + temp_high = temp + TEMP_INCREMENT;
> +
> + strncpy(trip_path, path, PATH_LEN);
> + strncat(trip_path, "trip_point_1_temp", 17);
> +
> + tst_res(TDEBUG, "Setting new trip_point_1_temp value: %d", temp_high);
> + SAFE_FILE_SCANF(trip_path, "%d", &trip);
> + SAFE_FILE_PRINTF(trip_path, "%d", temp_high);
> +
> + while (sleep_time > 0) {
> + tst_res(TDEBUG, "Running for %f seconds, then sleeping for %d seconds", run_time, sleep_time);
nit: %f should be %d, right?
tst_res(TDEBUG, "Running for %d seconds, then sleeping for %d seconds",
(int)run_time, sleep_time);
> +
> + for (int j = 0; j < nproc; j++) {
> + if (!SAFE_FORK()) {
> + cpu_workload(run_time);
> + exit(0);
> + }
> + }
> +
> + for (int j = 0; j < nproc; j++)
> + tst_reap_children();
tst_reap_children() should be called only once (not for all cpus).
Quoting doc:
The 'tst_reap_children()' function makes the process wait for all of its
children and exits with 'tst_brk(TBROK, ...)' if any of them returned
a non zero exit code.
See function itself in lib/tst_test.c and "Test 3" in lib/newlib_tests/tst_checkpoint.c.
> +
> + SAFE_FILE_SCANF(temp_path, "%d", &temp);
> + tst_res(TDEBUG, "Temperature for %s after a test: %d", path, temp);
> +
> + if (temp > temp_high)
> + break;
> + sleep(sleep_time--);
> + run_time -= 3;
> + }
> + if (temp <= temp_high) {
> + tst_res(TINFO, "Zone temperature is not rising as expected");
I'm not the expert on the topic, but IMHO how about have this as TFAIL
and print at the end only TPASS if no error found?
tst_res(TFAIL, "CPU %d: Zone temperature is not rising as expected", i);
> + status = 0;
> + }
> + }
> + }
> + read_interrupts(interrupt_later, nproc);
> +
> + for (int i = 0; i < nproc; i++) {
> + if (interrupt_later[i] < interrupt_init[i]) {
> + tst_res(TINFO, "For CPU %d interrupt counter is currently %ld, while it was %ld before the test", i, interrupt_later[i], interrupt_init[i]);
very nit: this line is really too long.
tst_res(TFAIL, "CPU %d interrupt counter: %ld (previous: %ld)",
i, interrupt_later[i], interrupt_init[i]);
> + status = 0;
> + }
> + }
> +
> + if (status)
> + tst_res(TPASS, "x86 package thermal interrupt triggered");
> + else
> + tst_res(TFAIL, "x86 package thermal interrupt did not trigger");
if (status)
tst_res(TPASS, "All interrupts triggered");
(Don't print final TFAIL when errors were printed previously.)
...
Kind regards,
Petr
More information about the ltp
mailing list