mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2026-01-04 22:54:56 +01:00
Add "extern" to the glibc-defined weak rseq symbols to convert the rseq
selftest's usage from weak symbol definitions to weak symbol _references_.
Effectively re-defining the glibc symbols wreaks havoc when building with
-fno-common, e.g. generates segfaults when running multi-threaded programs,
as dynamically linked applications end up with multiple versions of the
symbols.
Building with -fcommon, which until recently has the been the default for
GCC and clang, papers over the bug by allowing the linker to resolve the
weak/tentative definition to glibc's "real" definition.
Note, the symbol itself (or rather its address), not the value of the
symbol, is set to 0/NULL for unresolved weak symbol references, as the
symbol doesn't exist and thus can't have a value. Check for a NULL rseq
size pointer to handle the scenario where the test is statically linked
against a libc that doesn't support rseq in any capacity.
Fixes: 3bcbc20942 ("selftests/rseq: Play nice with binaries statically linked against glibc 2.35+")
Reported-by: Thomas Gleixner <tglx@linutronix.de>
Suggested-by: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: stable@vger.kernel.org
Closes: https://lore.kernel.org/all/87frdoybk4.ffs@tglx
309 lines
7.4 KiB
C
309 lines
7.4 KiB
C
// SPDX-License-Identifier: LGPL-2.1
|
|
/*
|
|
* rseq.c
|
|
*
|
|
* Copyright (C) 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; only
|
|
* version 2.1 of the License.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*/
|
|
|
|
#define _GNU_SOURCE
|
|
#include <errno.h>
|
|
#include <sched.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <unistd.h>
|
|
#include <syscall.h>
|
|
#include <assert.h>
|
|
#include <signal.h>
|
|
#include <limits.h>
|
|
#include <dlfcn.h>
|
|
#include <stddef.h>
|
|
#include <sys/auxv.h>
|
|
#include <linux/auxvec.h>
|
|
|
|
#include <linux/compiler.h>
|
|
|
|
#include "../kselftest.h"
|
|
#include "rseq.h"
|
|
|
|
/*
|
|
* Define weak versions to play nice with binaries that are statically linked
|
|
* against a libc that doesn't support registering its own rseq.
|
|
*/
|
|
extern __weak ptrdiff_t __rseq_offset;
|
|
extern __weak unsigned int __rseq_size;
|
|
extern __weak unsigned int __rseq_flags;
|
|
|
|
static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset;
|
|
static const unsigned int *libc_rseq_size_p = &__rseq_size;
|
|
static const unsigned int *libc_rseq_flags_p = &__rseq_flags;
|
|
|
|
/* Offset from the thread pointer to the rseq area. */
|
|
ptrdiff_t rseq_offset;
|
|
|
|
/*
|
|
* Size of the registered rseq area. 0 if the registration was
|
|
* unsuccessful.
|
|
*/
|
|
unsigned int rseq_size = -1U;
|
|
|
|
/* Flags used during rseq registration. */
|
|
unsigned int rseq_flags;
|
|
|
|
static int rseq_ownership;
|
|
|
|
/* Allocate a large area for the TLS. */
|
|
#define RSEQ_THREAD_AREA_ALLOC_SIZE 1024
|
|
|
|
/* Original struct rseq feature size is 20 bytes. */
|
|
#define ORIG_RSEQ_FEATURE_SIZE 20
|
|
|
|
/* Original struct rseq allocation size is 32 bytes. */
|
|
#define ORIG_RSEQ_ALLOC_SIZE 32
|
|
|
|
/*
|
|
* Use a union to ensure we allocate a TLS area of 1024 bytes to accomodate an
|
|
* rseq registration that is larger than the current rseq ABI.
|
|
*/
|
|
union rseq_tls {
|
|
struct rseq_abi abi;
|
|
char dummy[RSEQ_THREAD_AREA_ALLOC_SIZE];
|
|
};
|
|
|
|
static
|
|
__thread union rseq_tls __rseq __attribute__((tls_model("initial-exec"))) = {
|
|
.abi = {
|
|
.cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
|
|
},
|
|
};
|
|
|
|
static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len,
|
|
int flags, uint32_t sig)
|
|
{
|
|
return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
|
|
}
|
|
|
|
static int sys_getcpu(unsigned *cpu, unsigned *node)
|
|
{
|
|
return syscall(__NR_getcpu, cpu, node, NULL);
|
|
}
|
|
|
|
bool rseq_available(void)
|
|
{
|
|
int rc;
|
|
|
|
rc = sys_rseq(NULL, 0, 0, 0);
|
|
if (rc != -1)
|
|
abort();
|
|
switch (errno) {
|
|
case ENOSYS:
|
|
return false;
|
|
case EINVAL:
|
|
return true;
|
|
default:
|
|
abort();
|
|
}
|
|
}
|
|
|
|
/* The rseq areas need to be at least 32 bytes. */
|
|
static
|
|
unsigned int get_rseq_min_alloc_size(void)
|
|
{
|
|
unsigned int alloc_size = rseq_size;
|
|
|
|
if (alloc_size < ORIG_RSEQ_ALLOC_SIZE)
|
|
alloc_size = ORIG_RSEQ_ALLOC_SIZE;
|
|
return alloc_size;
|
|
}
|
|
|
|
/*
|
|
* Return the feature size supported by the kernel.
|
|
*
|
|
* Depending on the value returned by getauxval(AT_RSEQ_FEATURE_SIZE):
|
|
*
|
|
* 0: Return ORIG_RSEQ_FEATURE_SIZE (20)
|
|
* > 0: Return the value from getauxval(AT_RSEQ_FEATURE_SIZE).
|
|
*
|
|
* It should never return a value below ORIG_RSEQ_FEATURE_SIZE.
|
|
*/
|
|
static
|
|
unsigned int get_rseq_kernel_feature_size(void)
|
|
{
|
|
unsigned long auxv_rseq_feature_size, auxv_rseq_align;
|
|
|
|
auxv_rseq_align = getauxval(AT_RSEQ_ALIGN);
|
|
assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE);
|
|
|
|
auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE);
|
|
assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE);
|
|
if (auxv_rseq_feature_size)
|
|
return auxv_rseq_feature_size;
|
|
else
|
|
return ORIG_RSEQ_FEATURE_SIZE;
|
|
}
|
|
|
|
int rseq_register_current_thread(void)
|
|
{
|
|
int rc;
|
|
|
|
if (!rseq_ownership) {
|
|
/* Treat libc's ownership as a successful registration. */
|
|
return 0;
|
|
}
|
|
rc = sys_rseq(&__rseq.abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG);
|
|
if (rc) {
|
|
/*
|
|
* After at least one thread has registered successfully
|
|
* (rseq_size > 0), the registration of other threads should
|
|
* never fail.
|
|
*/
|
|
if (RSEQ_READ_ONCE(rseq_size) > 0) {
|
|
/* Incoherent success/failure within process. */
|
|
abort();
|
|
}
|
|
return -1;
|
|
}
|
|
assert(rseq_current_cpu_raw() >= 0);
|
|
|
|
/*
|
|
* The first thread to register sets the rseq_size to mimic the libc
|
|
* behavior.
|
|
*/
|
|
if (RSEQ_READ_ONCE(rseq_size) == 0) {
|
|
RSEQ_WRITE_ONCE(rseq_size, get_rseq_kernel_feature_size());
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int rseq_unregister_current_thread(void)
|
|
{
|
|
int rc;
|
|
|
|
if (!rseq_ownership) {
|
|
/* Treat libc's ownership as a successful unregistration. */
|
|
return 0;
|
|
}
|
|
rc = sys_rseq(&__rseq.abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
|
|
if (rc)
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
static __attribute__((constructor))
|
|
void rseq_init(void)
|
|
{
|
|
/*
|
|
* If the libc's registered rseq size isn't already valid, it may be
|
|
* because the binary is dynamically linked and not necessarily due to
|
|
* libc not having registered a restartable sequence. Try to find the
|
|
* symbols if that's the case.
|
|
*/
|
|
if (!libc_rseq_size_p || !*libc_rseq_size_p) {
|
|
libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
|
|
libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
|
|
libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
|
|
}
|
|
if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
|
|
*libc_rseq_size_p != 0) {
|
|
unsigned int libc_rseq_size;
|
|
|
|
/* rseq registration owned by glibc */
|
|
rseq_offset = *libc_rseq_offset_p;
|
|
libc_rseq_size = *libc_rseq_size_p;
|
|
rseq_flags = *libc_rseq_flags_p;
|
|
|
|
/*
|
|
* Previous versions of glibc expose the value
|
|
* 32 even though the kernel only supported 20
|
|
* bytes initially. Therefore treat 32 as a
|
|
* special-case. glibc 2.40 exposes a 20 bytes
|
|
* __rseq_size without using getauxval(3) to
|
|
* query the supported size, while still allocating a 32
|
|
* bytes area. Also treat 20 as a special-case.
|
|
*
|
|
* Special-cases are handled by using the following
|
|
* value as active feature set size:
|
|
*
|
|
* rseq_size = min(32, get_rseq_kernel_feature_size())
|
|
*/
|
|
switch (libc_rseq_size) {
|
|
case ORIG_RSEQ_FEATURE_SIZE:
|
|
fallthrough;
|
|
case ORIG_RSEQ_ALLOC_SIZE:
|
|
{
|
|
unsigned int rseq_kernel_feature_size = get_rseq_kernel_feature_size();
|
|
|
|
if (rseq_kernel_feature_size < ORIG_RSEQ_ALLOC_SIZE)
|
|
rseq_size = rseq_kernel_feature_size;
|
|
else
|
|
rseq_size = ORIG_RSEQ_ALLOC_SIZE;
|
|
break;
|
|
}
|
|
default:
|
|
/* Otherwise just use the __rseq_size from libc as rseq_size. */
|
|
rseq_size = libc_rseq_size;
|
|
break;
|
|
}
|
|
return;
|
|
}
|
|
rseq_ownership = 1;
|
|
|
|
/* Calculate the offset of the rseq area from the thread pointer. */
|
|
rseq_offset = (void *)&__rseq.abi - rseq_thread_pointer();
|
|
|
|
/* rseq flags are deprecated, always set to 0. */
|
|
rseq_flags = 0;
|
|
|
|
/*
|
|
* Set the size to 0 until at least one thread registers to mimic the
|
|
* libc behavior.
|
|
*/
|
|
rseq_size = 0;
|
|
}
|
|
|
|
static __attribute__((destructor))
|
|
void rseq_exit(void)
|
|
{
|
|
if (!rseq_ownership)
|
|
return;
|
|
rseq_offset = 0;
|
|
rseq_size = -1U;
|
|
rseq_ownership = 0;
|
|
}
|
|
|
|
int32_t rseq_fallback_current_cpu(void)
|
|
{
|
|
int32_t cpu;
|
|
|
|
cpu = sched_getcpu();
|
|
if (cpu < 0) {
|
|
perror("sched_getcpu()");
|
|
abort();
|
|
}
|
|
return cpu;
|
|
}
|
|
|
|
int32_t rseq_fallback_current_node(void)
|
|
{
|
|
uint32_t cpu_id, node_id;
|
|
int ret;
|
|
|
|
ret = sys_getcpu(&cpu_id, &node_id);
|
|
if (ret) {
|
|
perror("sys_getcpu()");
|
|
return ret;
|
|
}
|
|
return (int32_t) node_id;
|
|
}
|