[LTP] [PATCH] lapi/tls: reserve pre-TCB space to avoid undefined behavior in clone10.c
Li Wang
liwang@redhat.com
Thu Feb 26 07:39:00 CET 2026
Hi Changwei,
These two patches make sense, can you send another one separately to the
mailing list?
On Thu, Feb 12, 2026 at 9:33 AM Changwei Zou <changwei.zou@canonical.com>
wrote:
> Hi Team,
> With the following two patches (also available at
> https://github.com/sheisc/ltp.git),
> I ran clone10 -i 1000000 on three machines (s390x, aarch64, and x86_64)
> using different glibc versions (2.35, 2.31, and 2.39, respectively).
> The test passed on all of these machines.
> If you have any feedback or suggestions, please feel free to let me know.
> Thank you very much.
> Kind regards,
> Changwei
> // patch for clone10.c
> ```sh
> $ git diff HEAD~1 HEAD
> diff --git a/testcases/kernel/syscalls/clone/clone10.c
> b/testcases/kernel/syscalls/clone/clone10.c
> index 9ffb49c37..96de811ad 100644
> --- a/testcases/kernel/syscalls/clone/clone10.c
> +++ b/testcases/kernel/syscalls/clone/clone10.c
> @@ -20,6 +20,7 @@
> #include "tst_test.h"
> #include "clone_platform.h"
> #include "lapi/syscalls.h"
> +#include "tst_atomic.h"
> #include "lapi/tls.h"
> #define TLS_EXP 100
> @@ -34,21 +35,15 @@ struct user_desc *tls_desc;
> static __thread int tls_var;
> static char *child_stack;
> -static volatile int child_done;
> +static tst_atomic_t child_done;
> static int flags = CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES |
> CLONE_SIGHAND | CLONE_SETTLS;
> static int touch_tls_in_child(void *arg LTP_ATTRIBUTE_UNUSED)
> {
> -#if defined(__x86_64__)
> - if (syscall(SYS_arch_prctl, ARCH_SET_FS, tls_ptr) == -1)
> - exit(EXIT_FAILURE);
> -#endif
> tls_var = TLS_EXP + 1;
> - tst_res(TINFO, "Child (PID: %d, TID: %d): TLS value set to: %d", getpid
> (), (pid_t)syscall(SYS_gettid), tls_var);
> + tst_atomic_store(1, &child_done);
> - TST_CHECKPOINT_WAKE(0);
> - free_tls();
> tst_syscall(__NR_exit, 0);
> return 0;
> }
> @@ -56,13 +51,16 @@ static int touch_tls_in_child(void *arg
> LTP_ATTRIBUTE_UNUSED)
> static void verify_tls(void)
> {
> tls_var = TLS_EXP;
> + tst_atomic_store(0, &child_done);
> TEST(ltp_clone7(flags, touch_tls_in_child, NULL, CHILD_STACK_SIZE,
> child_stack, NULL, tls_ptr, NULL));
> if (TST_RET == -1)
> tst_brk(TBROK | TTERRNO, "clone() failed");
> - TST_CHECKPOINT_WAIT(0);
> + while (tst_atomic_load(&child_done) == 0) {
> + usleep(10);
> + }
> if (tls_var == TLS_EXP) {
> tst_res(TPASS,
> @@ -84,6 +82,7 @@ static void setup(void)
> static void cleanup(void)
> {
> free(child_stack);
> + free_tls();
> }
> static struct tst_test test = {
> ```
> // current patch for tls.h
> ```sh
> $ git diff HEAD~2 HEAD~1
> diff --git a/include/lapi/tls.h b/include/lapi/tls.h
> index 468fe3086..7f2fa18a1 100644
> --- a/include/lapi/tls.h
> +++ b/include/lapi/tls.h
> @@ -22,6 +22,15 @@
> #define TLS_SIZE 4096
> #define TLS_ALIGN 16
> +/*
> + * Space allocated large enough to hold a struct pthread.
> + *
> + * Zero-initialized to ensure THREAD_SELF->cancelhandling starts at 0,
> + * avoiding undefined behavior (e.g., in clone10.c) in
> __pthread_disable_asynccancel(),
> + * which is called at thread cancellation points such as write().
> + */
> +#define TLS_PRE_TCB_SIZE (TLS_ALIGN * 256)
> +
> #if defined(__x86_64__)
> typedef struct {
> void *tcb;
> @@ -36,10 +45,11 @@ extern void *tls_ptr;
> static inline void *allocate_tls_area(void)
> {
> - void *tls_area = aligned_alloc(TLS_ALIGN, TLS_SIZE);
> + char *tls_area = aligned_alloc(TLS_ALIGN, TLS_PRE_TCB_SIZE + TLS_SIZE);
> if (!tls_area)
> tst_brk(TBROK | TERRNO, "aligned_alloc failed");
> - memset(tls_area, 0, TLS_SIZE);
> + memset(tls_area, 0, TLS_PRE_TCB_SIZE + TLS_SIZE);
> + tls_area += TLS_PRE_TCB_SIZE;
> #if defined(__x86_64__)
> tcb_t *tcb = (tcb_t *)tls_area;
> @@ -59,7 +69,7 @@ static inline void free_tls(void)
> {
> usleep(10000);
> if (tls_ptr) {
> - free(tls_ptr);
> + free(((char *)tls_ptr) - TLS_PRE_TCB_SIZE);
> tls_ptr = NULL;
> }
> }
> ```
>
>
> On 2/10/26 23:40, Changwei Zou wrote:
>
> Hi Petr,
> I have rewritten the touch_tls_in_child() function and pushed it to my
> GitHub account.
> https://github.com/sheisc/ltp
> The motivation was to avoid interacting with pthread’s memory model.
> I have tested it on both x86_64 and arm64.
> The command clone10 -i 100000 passed successfully on both platforms.
> Could you please take a look at it?
> Thank you very much.
> Kine regards,
> Changwei
> 1 On x86_64
> ```sh
> x86_64_ltp$ ./testcases/kernel/syscalls/clone/clone10 -i 100000
> clone10.c:66: TPASS: Parent (PID: 655699, TID: 655699): TLS value
> correct: 100
> clone10.c:66: TPASS: Parent (PID: 655699, TID: 655699): TLS value
> correct: 100
> Summary:
> passed 100000
> failed 0
> broken 0
> skipped 0
> warnings 0
> ```
> 2 On arm64
> ```sh
> arm64_ltp$ ./testcases/kernel/syscalls/clone/clone10 -i 100000
> clone10.c:66: TPASS: Parent (PID: 222184, TID: 222184): TLS value
> correct: 100
> clone10.c:66: TPASS: Parent (PID: 222184, TID: 222184): TLS value
> correct: 100
> Summary:
> passed 100000
> failed 0
> broken 0
> skipped 0
> warnings 0
> ```
>
>
> On 2/10/26 18:03, Changwei Zou wrote:
>
> Hi Petr,
> LTP uses glibc.
> Even a simple library function like write(),
> which wraps the sys_write system call,
> is involved in the memory model of pthreads.
> Therefore, touch_tls_in_child() must behave almost exactly like a pthread.
> Otherwise, memory corruption can occur when executing functions inside
> glibc.
> In the current version of clone10.c, according to the assembly code,
> the statement tls_var = 0x65; already constitutes a buffer overflow on
> x86_64.
> Unfortunately, the struct pthread is opaque and may vary between different
> versions of glibc.
> I assume the purpose of clone10.c is to test whether the CLONE_SETTLS flag
> works.
> Making touch_tls_in_child() behave exactly like a pthread, however, is
> extremely difficult.
> static __thread int tls_var;
> static int touch_tls_in_child(void *arg)
> {
> // 0xfffffffffffffffc is -4
> // movl $0x65,%fs:0xfffffffffffffffc
> tls_var = 0x65;
> }
> ```sh
> (gdb) disassemble touch_tls_in_child
> Dump of assembler code for function touch_tls_in_child:
> 0x000055555555be40 <+0>: endbr64
> 0x000055555555be44 <+4>: push %rbx
> 0x000055555555be45 <+5>: mov 0x33c0c(%rip),%rdx # 0x55555558fa58 <tls_ptr>
> 0x000055555555be4c <+12>: xor %eax,%eax
> 0x000055555555be4e <+14>: mov $0x1002,%esi
> 0x000055555555be53 <+19>: mov $0x9e,%edi
> 0x000055555555be58 <+24>: call 0x55555555b500 <syscall@plt>
> 0x000055555555be5d <+29>: cmp $0xffffffffffffffff,%rax
> 0x000055555555be61 <+33>: je 0x55555555bf1d <touch_tls_in_child+221>
> 0x000055555555be67 <+39>: movl $0x65,%fs:0xfffffffffffffffc //buffer
> overflow ?
> ```
> On 2/9/26 22:47, Petr Vorel wrote:
>
> Hi Changwei,
>
>
> Hi Petr,
> With the original upstream LTP,
> I ran clone10 -i 1000 on three machines (including AArch64 and AMD64), and
> it failed on all of them.
>
> This suggests there may be another bug that we still need to identify.
>
> Yes, it's a separate bug, not relevant to your fix. I trigger it on x86_64.
>
> Kind regards,
> Petr
>
>
> Thank you very much for your invaluable information.
> Kind regards,
> Changwei
> *1. On an AArch64 cloud instance*
> ```sh
> azure@clone10-aarch64-kcp:~/orig/ltp$
> ./testcases/kernel/syscalls/clone/clone10-i1000clone10.c:68: TPASS:Parent(PID: 106163,TID:106163): TLS value correct: 100clone10.c:48: TINFO:Child(PID: 106163,TID:106200): TLS value set to: 101
> tst_test.c:1953: TBROK:TestkilledbySIGBUS!
> Summary:
> passed 36
> failed 0
> broken 1
> skipped 0
> warnings 0
> ```
> *2. On an AMD64 machine*
> ```sh
> ubuntu@ZBook:~/orig/ltp$ ./testcases/kernel/syscalls/clone/clone10-i1000clone10.c:48: TINFO:Child(PID: 125560,TID:125870): TLS value set to: 101clone10.c:68: TPASS:Parent(PID: 125560,TID:125560): TLS value correct: 100
> double freeorcorruption(out)clone10.c:48: TINFO:Child(PID: 125560,TID:125871): TLS value set to: 101clone10.c:68: TPASS:Parent(PID: 125560,TID:125560): TLS value correct: 100
> tst_test.c:1953: TBROK:TestkilledbySIGIOT/SIGABRT!
> Summary:
> passed 311
> failed 0
> broken 1
> skipped 0
> warnings 0
> ```
> *3. On an AArch64 machine*
> ```sh
> ubuntu@asus-pe100a:~/orig/ltp$
> ./testcases/kernel/syscalls/clone/clone10-i1000clone10.c:68: TPASS:Parent(PID: 158953,TID:158953): TLS value correct: 100clone10.c:48: TINFO:Child(PID: 158953,TID:159029): TLS value set to: 101
> tst_test.c:1953: TBROK:TestkilledbySIGSEGV!
> Summary:
> passed 75
> failed 0
> broken 1
> skipped 0
> warnings 0
> ```
>
> On 2/9/26 18:51, Petr Vorel wrote:
>
> Hi Changwei,
>
> Allocate extra space before the TLS area to hold a struct pthread, ensuring
> THREAD_SELF->cancelhandling is initialized to 0. This prevents undefined
> behavior in __pthread_disable_asynccancel(), which is called at thread
> cancellation points such as write().
> Without this, touch_tls_in_child() could get stuck in tst_res().
>
> LGTM, but I'd prefer others had a look on it.
> Acked-by: Petr Vorel<pvorel@suse.cz> <pvorel@suse.cz>
>
> BTW clone10.c segfaults w/a the patch when run with more iterations:
>
> ./clone10 -i200clone10.c:48: TINFO: Child (PID: 4271, TID: 4285): TLS value set to: 101clone10.c:68: TPASS: Parent (PID: 4271, TID: 4271): TLS value correct: 100clone10.c:48: TINFO: Child (PID: 4271, TID: 4286): TLS value set to: 101clone10.c:68: TPASS: Parent (PID: 4271, TID: 4271): TLS value correct: 100
> tst_test.c:1953: TBROK: Test killed by SIGSEGV!
>
> Summary:
> passed 15
> failed 0
> broken 1
> skipped 0
> warnings 0
>
> Kind regards,
> Petr
>
> (gdb) bt
> 0 futex_wait () at ../sysdeps/nptl/futex-internal.h:141
> 1 futex_wait_simple () at ../sysdeps/nptl/futex-internal.h:172
> 2 __libc_disable_asynccancel () at ../nptl/cancellation.c:100
> 3 __GI___libc_write () at ../sysdeps/unix/sysv/linux/write.c:26
> 4 __GI___libc_write () at ../sysdeps/unix/sysv/linux/write.c:24
> 5 print_result () at tst_test.c:387
> 6 tst_vres_ () at tst_test.c:401
> 7 tst_res_ () at tst_test.c:512
> 8 touch_tls_in_child (arg=<optimized out>) atclone10.c:48
> 9 thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:78
> Signed-off-by: Changwei Zou<changwei.zou@canonical.com> <changwei.zou@canonical.com>
> ---
> include/lapi/tls.h | 16 +++++++++++++---
> 1 file changed, 13 insertions(+), 3 deletions(-)
> diff --git a/include/lapi/tls.h b/include/lapi/tls.h
> index 468fe3086..7f2fa18a1 100644
> --- a/include/lapi/tls.h
> +++ b/include/lapi/tls.h
> @@ -22,6 +22,15 @@
> #define TLS_SIZE 4096
> #define TLS_ALIGN 16
> +/*
> + * Space allocated large enough to hold a struct pthread.
> + *
> + * Zero-initialized to ensure THREAD_SELF->cancelhandling starts at 0,
> + * avoiding undefined behavior (e.g., in clone10.c) in __pthread_disable_asynccancel(),
> + * which is called at thread cancellation points such as write().
> + */
> +#define TLS_PRE_TCB_SIZE (TLS_ALIGN * 256)
> +
> #if defined(__x86_64__)
> typedef struct {
> void *tcb;
> @@ -36,10 +45,11 @@ extern void *tls_ptr;
> static inline void *allocate_tls_area(void)
> {
> - void *tls_area = aligned_alloc(TLS_ALIGN, TLS_SIZE);
> + char *tls_area = aligned_alloc(TLS_ALIGN, TLS_PRE_TCB_SIZE + TLS_SIZE);
> if (!tls_area)
> tst_brk(TBROK | TERRNO, "aligned_alloc failed");
> - memset(tls_area, 0, TLS_SIZE);
> + memset(tls_area, 0, TLS_PRE_TCB_SIZE + TLS_SIZE);
> + tls_area += TLS_PRE_TCB_SIZE;
> #if defined(__x86_64__)
> tcb_t *tcb = (tcb_t *)tls_area;
> @@ -59,7 +69,7 @@ static inline void free_tls(void)
> {
> usleep(10000);
> if (tls_ptr) {
> - free(tls_ptr);
> + free(((char *)tls_ptr) - TLS_PRE_TCB_SIZE);
> tls_ptr = NULL;
> }
> }
>
>
--
Regards,
Li Wang
More information about the ltp
mailing list