[LTP] [PATCH] misc: rewrite crash02 test
Andrea Cervesato
andrea.cervesato@suse.com
Mon Jun 23 10:15:27 CEST 2025
Hi!
On 6/19/25 4:34 PM, Martin Doucha wrote:
> Hi,
> some suggestions below.
>
> On 11. 06. 25 15:24, Andrea Cervesato wrote:
>> From: Andrea Cervesato <andrea.cervesato@suse.com>
>>
>> Rewrite the crash02 test, introducing new API but maintaining the logic
>> behind it. The test generates random syscall executions with random data
>> and it verifies that system didn't crash.
>>
>> Signed-off-by: Andrea Cervesato <andrea.cervesato@suse.com>
>> ---
>> testcases/misc/crash/crash02.c | 577
>> +++++++++--------------------------------
>> 1 file changed, 123 insertions(+), 454 deletions(-)
>>
>> diff --git a/testcases/misc/crash/crash02.c
>> b/testcases/misc/crash/crash02.c
>> index
>> c68f580ef62ad3b3c644093f72646a8908e55076..417c2aa2b8e4facf9ddcde358fe59a7f4419e859
>> 100644
>> --- a/testcases/misc/crash/crash02.c
>> +++ b/testcases/misc/crash/crash02.c
>> @@ -1,497 +1,166 @@
>> +// SPDX-License-Identifier: GPL-2.0-or-later
>> /*
>> - * crash02.c - Test OS robustness by executing syscalls with random
>> args.
>> - *
>> - * Copyright (C) 2001 Stephane Fillod <f4cfe@free.fr>
>> - *
>> - * This test program was inspired from crashme, by GEORGE J. CARRETT.
>> - *
>> - * This program is free software; you can redistribute it and/or
>> - * modify it under the terms of the GNU General Public License
>> - * as published by the Free Software Foundation; either version 2
>> - * of the License, or (at your option) any later version.
>> - *
>> - * This program is distributed in the hope that it will be useful,
>> - * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
>> - * GNU General Public License for more details.
>> - *
>> - * You should have received a copy of the GNU General Public License
>> - * along with this program; if not, write to the Free Software
>> - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
>> 02111-1307, USA.
>> + * Copyright (C) 2025 SUSE LLC <andrea.cervesato@suse.com>
>> */
>> -/*
>> -A signal handler is set up so that in most cases the machine exception
>> -generated by the illegal syscall, bad operands, etc in the procedure
>> -made up of random data are caught; and another round of randomness may
>> -be tried. Eventually a random syscall may corrupt the program or
>> -the machine state in such a way that the program must halt. This is
>> -a test of the robustness of the hardware/software for instruction
>> -fault handling.
>> -
>> -Note: Running this program just a few times, using total CPU time of
>> -less than a few seconds SHOULD NOT GIVE YOU ANY CONFIDENCE in system
>> -robustness. Having it run for hours, with tens of thousands of cases
>> -would be a different thing. It would also make sense to run this
>> -stress test at the same time you run other tests, like a multi-user
>> -benchmark.
>> -
>> -CAUTION: running this program may crash your system, your disk and all
>> - your data along! DO NOT RUN IT ON PRODUCTION SYSTEMS!
>> - CONSIDER YOUR DISK FRIED.
>> - REMEMBER THE DISCLAIMER PART OF THE LICENSE.
>> -
>> - Running as user nobody and with all your filesystems
>> - remounted to readonly may be wise..
>> -
>> -TODO:
>> - * in rand_long(), stuff in some real pointers to random data
>> - * Does a syscall is supposed to send SIGSEGV?
>> -*/
>> -
>> -#define _GNU_SOURCE
>> -#include <sys/syscall.h>
>> -#include <stdio.h>
>> -#include <stdlib.h>
>> -#include <string.h>
>> -#include <signal.h>
>> -#include <setjmp.h>
>> -#include <time.h>
>> -#include <unistd.h>
>> -#include <errno.h>
>> -#include <sys/types.h>
>> -#include <sys/wait.h>
>> -
>> -#include "test.h"
>> -
>> -char *TCID = "crash02";
>> -int TST_TOTAL = 1;
>> -
>> -static int x_opt = 0;
>> -static int v_opt = 0;
>> -static char *v_copt;
>> -static int s_opt = 0;
>> -static char *s_copt;
>> -static int l_opt = 0;
>> -static char *l_copt;
>> -static int n_opt = 0;
>> -static char *n_copt;
>> -
>> -int verbose_level = 2;
>> -
>> -/* depends on architecture.. */
>> -unsigned int sysno_max = 127;
>> -
>> -int nseed;
>> -int ntries = 100;
>> -
>> -/* max time allowed per try, in seconds */
>> -#define MAX_TRY_TIME 5
>> -
>> -void cleanup(void)
>> -{
>> -
>> - tst_rmdir();
>> -
>> -}
>> -
>> -void setup(void)
>> -{
>> - /*
>> - * setup a default signal hander and a
>> - * temporary working directory.
>> - */
>> - tst_sig(FORK, DEF_HANDLER, cleanup);
>> +/*\
>> + * Test the robustness of the system generating random syscalls
>> execution
>> + * with random data and expecting that the current system is not
>> crashing.
>> + */
>> - TEST_PAUSE;
>> +#include <limits.h>
>> +#include "tst_test.h"
>> +#include "lapi/syscalls.h"
>> +#include "lapi/getrandom.h"
>> - tst_tmpdir();
>> -}
>> +#define MAX_SYSCALLS 465
>
> Maybe this constant should be defined in lapi/syscalls.h?
This variable should be obtained by adding +1 to the latest syscall number.
It should be done via shell script generating lapi/syscalls.h, which is
actually quite sub-optimal and the variable value would change according
to the architecture. Not really an easy task but I can try to do it
after this patch is merged.
>
>> -void help(void)
>> -{
>> - printf
>> - (" -x dry run, hexdump random code instead\n");
>> - printf(" -l x max syscall no\n");
>> - printf(" -v x verbose level\n");
>> - printf(" -s x random seed\n");
>> - printf(" -n x ntries\n");
>> -}
>> +static int *num_errors;
>> +static char *str_num_tries;
>> +static int num_tries = 1000;
>> -/*
>> - */
>> -option_t options[] = {
>> - {"v:", &v_opt, &v_copt},
>> - {"l:", &l_opt, &l_copt},
>> - {"s:", &s_opt, &s_copt},
>> - {"n:", &n_opt, &n_copt},
>> - {"x", &x_opt, NULL},
>> -
>> - {NULL, NULL, NULL}
>> +static int blacklist[] = {
>> +#if defined(__ia64__)
>> + __NR_clone2, /* IA-64 uses clone2 instead of fork/vfork */
>> +#else
>> +# if defined(__NR_vfork)
>> + __NR_vfork,
>> +# endif
>> +# if defined(__NR_fork)
>> + __NR_fork,
>> +# endif
>> +#endif /* __ia64__ */
>> +#if defined(__NR_clone)
>> + __NR_clone,
>> +#endif
>> +#if defined(__NR_clone3)
>> + __NR_clone3,
>> +#endif
>> +#if defined(__NR_vhangup)
>> + __NR_vhangup, /* terminal logout */
>> +#endif
>> +#if defined(__NR_pause)
>> + __NR_pause, /* sleep indefinitely */
>> +#endif
>> +#if defined(__NR_read)
>> + __NR_read, /* sleep indefinitely if the first argument is 0 */
>> +#endif
>> + __LTP__NR_INVALID_SYSCALL,
>> };
>
> Since you #include "lapi/syscalls.h", the #ifdef checks will always be
> true, except for the __ia64__ check which should be removed anyway.
> Just make an unconditional list of all the blacklisted syscalls. Also
> remove __LTP__NR_INVALID_SYSCALL at the end of the list since you use
> ARRAY_SIZE() to find array bounds.
>
> I'd also recommend a follow-up patch to add __NR_kill and
> __NR_restart_syscall because both cause expected test failures with
> certain arguments.
+1
>
>> -void badboy_fork();
>> -void badboy_loop();
>> -
>> -void summarize_errno();
>> -void record_errno(unsigned int n);
>> -
>> -int main(int argc, char *argv[])
>> +static inline long rand_number(void)
>> {
>> - int lc;
>> -
>> - tst_parse_opts(argc, argv, options, help);
>> -
>> - if (v_opt)
>> - verbose_level = atoi(v_copt);
>> + int64_t num = 0;
>> + char buff[4];
>> - if (n_opt)
>> - ntries = atoi(n_copt);
>> + if (getrandom(buff, 4, 0) == -1)
>> + tst_brk(TBROK | TERRNO, "getrandom error");
>> - if (l_opt)
>> - sysno_max = atoi(l_copt);
>> + num = (buff[0] << 24) | (buff[1] << 16) | (buff[2] << 8) | buff[3];
>> + if (num < 0)
>> + num *= -1;
>> + num = (num % MAX_SYSCALLS) - 1;
>
> Why do you subtract on the last line above?
Some syscalls need to set their argument to -1 sometimes.
> Also, this would be much simpler and the truncation to MAX_SYSCALLS
> should be done by caller when needed.
>
> static inline long rand_long(void)
> {
> long ret;
>
> if (getrandom(&ret, sizeof(ret), 0) < 0)
> tst_brk(...);
>
> return ret;
> }
>
> Although it's be great to support setting random seed like in the
> original test so that crashes can be reproduced.
I don't think I understand this sentence. This code is taking into
account the specific numeric arch size, by casting a 64bit number to
"long" type. long has variable bytes and it's eventually cut during the
cast.
The crash can be reproduced because we are TDEBUG the full syscall
address and arguments which are given to it.
>
>> - if (s_opt)
>> - nseed = atoi(s_copt);
>> - else
>> - nseed = time(NULL);
>> -
>> - setup();
>> -
>> - for (lc = 0; TEST_LOOPING(lc); lc++) {
>> - tst_count = 0;
>> -
>> - tst_resm(TINFO, "crashme02 %d %d %d", sysno_max, nseed,
>> ntries);
>> -
>> - srand(nseed);
>> - badboy_fork();
>> -
>> - /* still there? */
>> - tst_resm(TPASS, "we're still here, OS seems to be robust");
>> -
>> - nseed++;
>> - }
>> - cleanup();
>> - tst_exit();
>> + return (long)num;
>> }
>> -/* ************************* */
>> -int badboy_pid;
>> -
>> -void my_signal(int sig, void (*func) ());
>> -
>> -void monitor_fcn(int sig)
>> -{
>> - int status;
>> -
>> - if (verbose_level >= 3)
>> - printf("time limit reached on pid. using kill.\n");
>> -
>> - status = kill(badboy_pid, SIGKILL);
>> - if (status < 0) {
>> - if (verbose_level >= 3)
>> - printf("failed to kill process\n");
>> - }
>> -}
>> -
>> -void badboy_fork(void)
>> +static void try_crash(const int num)
>> {
>> - int status, pid;
>> - pid_t child = fork();
>> + long sysno, arg0, arg1, arg2, arg3, arg4, arg5, arg6;
>> + int invalid;
>> + int ret;
>> - switch (child) {
>> - case -1:
>> - perror("fork");
>> - case 0:
>> -#ifdef DEBUG_LATE_BADBOY
>> - sleep(ntries * MAX_TRY_TIME + 10);
>> -#else
>> - badboy_loop();
>> -#endif
>> - exit(0);
>> - default:
>> - badboy_pid = child;
>> - if (verbose_level > 3)
>> - printf("badboy pid = %d\n", badboy_pid);
>> -
>> - /* don't trust the child to return at night */
>> - my_signal(SIGALRM, monitor_fcn);
>> - alarm(ntries * MAX_TRY_TIME);
>> -
>> - pid = waitpid(-1, &status, WUNTRACED);
>> - if (pid <= 0)
>> - perror("wait");
>> - else {
>> - if (verbose_level > 3)
>> - printf("pid %d exited with status %d\n",
>> - pid, status);
>> -#if 0
>> - record_status(status);
>> -#endif
>> + do {
>> + invalid = 0;
>> + sysno = rand_number() % MAX_SYSCALLS;
>> +
>> + for (size_t i = 0; i < ARRAY_SIZE(blacklist); i++) {
>> + if (blacklist[i] == sysno) {
>> + invalid = 1;
>> + break;
>> + }
>> }
>
> The original approach with in_blacklist() helper function was cleaner.
> Also note that both your rand_number() and my rand_long() can return
> negative values.
They can return -1 only. All the negative values below this value are
converted to positive numbers.
>
>> - }
>> - alarm(0);
>> -}
>> -
>> -/* *************** status recording ************************* */
>> -/* errno status table (max is actually around 127) */
>> -#define STATUS_MAX 256
>> -static int errno_table[STATUS_MAX];
>> -
>> -void record_errno(unsigned int n)
>> -{
>> - if (n >= STATUS_MAX)
>> - return;
>> + if (!invalid)
>> + break;
>> + } while (1);
>> - errno_table[n]++;
>> -}
>> + arg0 = rand_number();
>> + arg1 = rand_number();
>> + arg2 = rand_number();
>> + arg3 = rand_number();
>> + arg4 = rand_number();
>> + arg5 = rand_number();
>> + arg6 = rand_number();
>> -/* may not work with -c option */
>> -void summarize_errno(void)
>> -{
>> - int i;
>> + tst_res(TDEBUG,
>> + "%d: syscall(%ld, %#lx, %#lx, %#lx, %#lx, %#lx, %#lx, %#lx)",
>> + num, sysno, arg0, arg1, arg2, arg3, arg4, arg5, arg6);
>> - if (x_opt || verbose_level < 2)
>> - return;
>> + ret = syscall(sysno, arg0, arg1, arg2, arg3, arg4, arg5, arg6);
>> + if (ret == -1) {
>> + (*num_errors)++;
>> - printf("errno status ... number of cases\n");
>> - for (i = 0; i < STATUS_MAX; i++) {
>> - if (errno_table[i])
>> - printf("%12d ... %5d\n", i, errno_table[i]);
>> + tst_res(TDEBUG, "syscall error: %s", strerror(errno));
>> }
>> }
>> -/* ************* badboy
>> ******************************************* */
>> -
>> -jmp_buf again_buff;
>> -
>> -unsigned char *bad_malloc(int n);
>> -void my_signal(int sig, void (*func) ());
>> -void again_handler(int sig);
>> -void try_one_crash(int try_num);
>> -void set_up_signals();
>> -int in_blacklist(int sysno);
>> -
>> -/* badboy "entry" point */
>> -
>> -/*
>> - * Unlike crashme, faulty syscalls are not supposed to barf
>> - */
>> -void badboy_loop(void)
>> +static void run(void)
>> {
>> - int i;
>> -
>> - for (i = 0; i < ntries; ++i) {
>> - /* level 5 */
>> + pid_t pid;
>> + int status;
>> + int num_signals = 0;
>> - if (!x_opt && verbose_level >= 5) {
>> - printf("try %d\n", i);
>> + for (int i = 0; i < num_tries; i++) {
>> + pid = SAFE_FORK();
>> + if (!pid) {
>> + try_crash(i);
>> + exit(0);
>> }
>> - if (setjmp(again_buff) == 3) {
>> - if (verbose_level >= 5)
>> - printf("Barfed\n");
>> - } else {
>> - set_up_signals();
>> - alarm(MAX_TRY_TIME);
>> - try_one_crash(i);
>> - }
>> - }
>> - summarize_errno();
>> -}
>> + SAFE_WAITPID(pid, &status, 0);
>> -void again_handler(int sig)
>> -{
>> - char *ss;
>> + if (WIFSIGNALED(status)) {
>> + num_signals++;
>> - switch (sig) {
>> - case SIGILL:
>> - ss = " illegal instruction";
>> - break;
>> -#ifdef SIGTRAP
>> - case SIGTRAP:
>> - ss = " trace trap";
>> - break;
>> -#endif
>> - case SIGFPE:
>> - ss = " arithmetic exception";
>> - break;
>> -#ifdef SIGBUS
>> - case SIGBUS:
>> - ss = " bus error";
>> - break;
>> -#endif
>> - case SIGSEGV:
>> - ss = " segmentation violation";
>> - break;
>> -#ifdef SIGIOT
>> - case SIGIOT:
>> - ss = " IOT instruction";
>> - break;
>> -#endif
>> -#ifdef SIGEMT
>> - case SIGEMT:
>> - ss = " EMT instruction";
>> - break;
>> -#endif
>> -#ifdef SIGALRM
>> - case SIGALRM:
>> - ss = " alarm clock";
>> - break;
>> -#endif
>> - case SIGINT:
>> - ss = " interrupt";
>> - break;
>> - default:
>> - ss = "";
>> + tst_res(TDEBUG, "syscall signaled: %s",
>> + strsignal(WTERMSIG(status)));
>> + }
>> }
>> - if (verbose_level >= 5)
>> - printf("Got signal %d%s\n", sig, ss);
>> - longjmp(again_buff, 3);
>> -}
>> -
>> -void my_signal(int sig, void (*func) ())
>> -{
>> - struct sigaction act;
>> + tst_res(TINFO, "Detected errors: %d", *num_errors);
>> + tst_res(TINFO, "Detected signals: %d", num_signals);
>> - act.sa_handler = func;
>> - memset(&act.sa_mask, 0x00, sizeof(sigset_t));
>> - act.sa_flags = SA_NOMASK | SA_RESTART;
>> - sigaction(sig, &act, 0);
>> -}
>> -
>> -void set_up_signals(void)
>> -{
>> - my_signal(SIGILL, again_handler);
>> -#ifdef SIGTRAP
>> - my_signal(SIGTRAP, again_handler);
>> -#endif
>> - my_signal(SIGFPE, again_handler);
>> -#ifdef SIGBUS
>> - my_signal(SIGBUS, again_handler);
>> -#endif
>> - my_signal(SIGSEGV, again_handler);
>> -#ifdef SIGIOT
>> - my_signal(SIGIOT, again_handler);
>> -#endif
>> -#ifdef SIGEMT
>> - my_signal(SIGEMT, again_handler);
>> -#endif
>> -#ifdef SIGALRM
>> - my_signal(SIGALRM, again_handler);
>> -#endif
>> - my_signal(SIGINT, again_handler);
>> + tst_res(TPASS, "System is still up and running");
>> }
>> -/*
>> - * NB: rand() (ie. RAND_MAX) might be on 31bits only!
>> - *
>> - * FIXME: 64-bit systems
>> - *
>> - * TODO: improve arg mixing (16bits and 8bits values, NULLs, etc.).
>> - * big values as returned by rand() are no so interresting
>> - * (except when used as pointers) because they may fall too
>> - * quickly in the invalid parameter sieve. Smaller values,
>> - * will be more insidious because they may refer to existing
>> - * objects (pids, fd, etc.).
>> - */
>> -long int rand_long(void)
>> +static void setup(void)
>> {
>> - int r1, r2;
>> -
>> - r1 = rand();
>> - r2 = rand();
>> -
>> - if (r1 & 0x10000L)
>> - r1 = 0;
>> - if (!r1 && (r2 & 0x50000L))
>> - r2 = 0;
>> - else if (!r1 && (r2 & 0x20000L))
>> - r2 &= 0x00ffL;
>> -
>> - return (long int)((r1 & 0xffffL) << 16) | (r2 & 0xffffL);
>> + num_errors = SAFE_MMAP(
>> + NULL, sizeof(int),
>> + PROT_READ | PROT_WRITE,
>> + MAP_SHARED | MAP_ANONYMOUS,
>> + -1, 0);
>> +
>> + if (tst_parse_int(str_num_tries, &num_tries, 1, INT_MAX))
>> + tst_brk(TBROK, "Invalid number of entries '%s'",
>> str_num_tries);
>> }
>> -void try_one_crash(int try_num)
>> +static void cleanup(void)
>> {
>> - long int sysno, arg1, arg2, arg3, arg4, arg5, arg6, arg7;
>> -
>> - do {
>> - sysno = rand() % sysno_max;
>> - } while (in_blacklist(sysno));
>> -
>> - arg1 = rand_long();
>> - arg2 = rand_long();
>> - arg3 = rand_long();
>> - arg4 = rand_long();
>> - arg5 = rand_long();
>> - arg6 = rand_long();
>> - arg7 = rand_long();
>> -
>> - if (x_opt || verbose_level >= 1)
>> - printf("%04d: syscall(%ld, %#lx, %#lx, %#lx, %#lx, %#lx, "
>> - "%#lx, %#lx)\n", try_num, sysno, arg1, arg2, arg3,
>> - arg4, arg5, arg6, arg7);
>> -
>> - if (!x_opt) {
>> - syscall(sysno, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
>> - record_errno(errno);
>> - }
>> + if (num_errors)
>> + SAFE_MUNMAP(num_errors, sizeof(int));
>> }
>> -/* The following syscalls create new processes which may cause the
>> test
>> - unable to finish. */
>> -int in_blacklist(int sysno)
>> -{
>> - int i;
>> - const int list[] = {
>> -#if defined(__ia64__)
>> - SYS_clone2,
>> -#else
>> - /*
>> - * No SYS_fork(vfork) on IA-64. Instead, it uses,
>> - * clone(child_stack=0, flags=CLONE_VM|CLONE_VFORK|SIGCHLD)
>> - * clone2()
>> - */
>> -
>> - /*
>> - * NOTE (garrcoop):
>> - * Could not find reference to SYS_fork(vfork) on mips32
>> - * with the Montavista / Octeon toolchain. Need to develop an
>> - * autoconf check for this item.
>> - */
>> -#if defined(__NR_vfork) && __NR_vfork
>> - SYS_vfork,
>> -#endif
>> -#if defined(__NR_fork) && __NR_fork
>> - SYS_fork,
>> -#endif
>> -#endif /* __ia64__ */
>> -#if defined(__NR_clone) && __NR_clone
>> - SYS_clone,
>> -#endif
>> -#if defined(__NR_vhangup) && __NR_vhangup
>> - __NR_vhangup, /* int vhangup(void); - terminal logout */
>> -#endif
>> -#if defined(__NR_pause) && __NR_pause
>> - __NR_pause, /* int pause(void); - sleep indefinitely */
>> -#endif
>> -#if defined(__NR_read) && __NR_read
>> - /*
>> - * ssize_t read(int fd, void *buf, size_t count); - will sleep
>> - * indefinitely if the first argument is 0
>> - */
>> - __NR_read,
>> -#endif
>> - -1
>> - };
>> -
>> - for (i = 0; list[i] != -1; i++) {
>> - if (sysno == list[i])
>> - return 1;
>> - }
>> -
>> - return 0;
>> -}
>> +static struct tst_test test = {
>> + .test_all = run,
>> + .setup = setup,
>> + .cleanup = cleanup,
>> + .needs_root = 1,
>
> Why does this test need root?
it was an experiment. It was forgot.
>
>> + .forks_child = 1,
>> + .runtime = 40,
>> + .options = (struct tst_option []) {
>> + {"n:", &str_num_tries, "Number of retries (default: 1000)"},
>> + {}
>> + },
>> +};
>>
>> ---
>> base-commit: bf9589d5bdeef15b3dbb03f896793306552d0d0f
>> change-id: 20250611-crash02_rewrite-b84ec3d0d22a
>>
>> Best regards,
>
>
- Andrea
More information about the ltp
mailing list