mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2026-03-03 18:28:01 +01:00
Before rseq became extensible, its original size was 32 bytes even though the active rseq area was only 20 bytes. This had the following impact in terms of userspace ecosystem evolution: * The GNU libc between 2.35 and 2.39 expose a __rseq_size symbol set to 32, even though the size of the active rseq area is really 20. * The GNU libc 2.40 changes this __rseq_size to 20, thus making it express the active rseq area. * Starting from glibc 2.41, __rseq_size corresponds to the AT_RSEQ_FEATURE_SIZE from getauxval(3). This means that users of __rseq_size can always expect it to correspond to the active rseq area, except for the value 32, for which the active rseq area is 20 bytes. Exposing a 32 bytes feature size would make life needlessly painful for userspace. Therefore, add a reserved field at the end of the rseq area to bump the feature size to 33 bytes. This reserved field is expected to be replaced with whatever field will come next, expecting that this field will be larger than 1 byte. The effect of this change is to increase the size from 32 to 64 bytes before we actually have fields using that memory. Clarify the allocation size and alignment requirements in the struct rseq uapi comment. Change the value returned by getauxval(AT_RSEQ_ALIGN) to return the value of the active rseq area size rounded up to next power of 2, which guarantees that the rseq structure will always be aligned on the nearest power of two large enough to contain it, even as it grows. Change the alignment check in the rseq registration accordingly. This will minimize the amount of ABI corner-cases we need to document and require userspace to play games with. The rule stays simple when __rseq_size != 32: #define rseq_field_available(field) (__rseq_size >= offsetofend(struct rseq_abi, field)) Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://patch.msgid.link/20260220200642.1317826-3-mathieu.desnoyers@efficios.com
208 lines
6.9 KiB
C
208 lines
6.9 KiB
C
/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
|
|
#ifndef _UAPI_LINUX_RSEQ_H
|
|
#define _UAPI_LINUX_RSEQ_H
|
|
|
|
/*
|
|
* linux/rseq.h
|
|
*
|
|
* Restartable sequences system call API
|
|
*
|
|
* Copyright (c) 2015-2018 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
|
|
*/
|
|
|
|
#include <linux/types.h>
|
|
#include <asm/byteorder.h>
|
|
|
|
enum rseq_cpu_id_state {
|
|
RSEQ_CPU_ID_UNINITIALIZED = -1,
|
|
RSEQ_CPU_ID_REGISTRATION_FAILED = -2,
|
|
};
|
|
|
|
enum rseq_flags {
|
|
RSEQ_FLAG_UNREGISTER = (1 << 0),
|
|
RSEQ_FLAG_SLICE_EXT_DEFAULT_ON = (1 << 1),
|
|
};
|
|
|
|
enum rseq_cs_flags_bit {
|
|
/* Historical and unsupported bits */
|
|
RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0,
|
|
RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1,
|
|
RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2,
|
|
/* (3) Intentional gap to put new bits into a separate byte */
|
|
|
|
/* User read only feature flags */
|
|
RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE_BIT = 4,
|
|
RSEQ_CS_FLAG_SLICE_EXT_ENABLED_BIT = 5,
|
|
};
|
|
|
|
enum rseq_cs_flags {
|
|
RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT =
|
|
(1U << RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT),
|
|
RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL =
|
|
(1U << RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT),
|
|
RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE =
|
|
(1U << RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT),
|
|
|
|
RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE =
|
|
(1U << RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE_BIT),
|
|
RSEQ_CS_FLAG_SLICE_EXT_ENABLED =
|
|
(1U << RSEQ_CS_FLAG_SLICE_EXT_ENABLED_BIT),
|
|
};
|
|
|
|
/*
|
|
* struct rseq_cs is aligned on 4 * 8 bytes to ensure it is always
|
|
* contained within a single cache-line. It is usually declared as
|
|
* link-time constant data.
|
|
*/
|
|
struct rseq_cs {
|
|
/* Version of this structure. */
|
|
__u32 version;
|
|
/* enum rseq_cs_flags */
|
|
__u32 flags;
|
|
__u64 start_ip;
|
|
/* Offset from start_ip. */
|
|
__u64 post_commit_offset;
|
|
__u64 abort_ip;
|
|
} __attribute__((aligned(4 * sizeof(__u64))));
|
|
|
|
/**
|
|
* rseq_slice_ctrl - Time slice extension control structure
|
|
* @all: Compound value
|
|
* @request: Request for a time slice extension
|
|
* @granted: Granted time slice extension
|
|
*
|
|
* @request is set by user space and can be cleared by user space or kernel
|
|
* space. @granted is set and cleared by the kernel and must only be read
|
|
* by user space.
|
|
*/
|
|
struct rseq_slice_ctrl {
|
|
union {
|
|
__u32 all;
|
|
struct {
|
|
__u8 request;
|
|
__u8 granted;
|
|
__u16 __reserved;
|
|
};
|
|
};
|
|
};
|
|
|
|
/*
|
|
* The original size and alignment of the allocation for struct rseq is
|
|
* 32 bytes.
|
|
*
|
|
* The allocation size needs to be greater or equal to
|
|
* max(getauxval(AT_RSEQ_FEATURE_SIZE), 32), and the allocation needs to
|
|
* be aligned on max(getauxval(AT_RSEQ_ALIGN), 32).
|
|
*
|
|
* As an alternative, userspace is allowed to use both the original size
|
|
* and alignment of 32 bytes for backward compatibility.
|
|
*
|
|
* A single active struct rseq registration per thread is allowed.
|
|
*/
|
|
struct rseq {
|
|
/*
|
|
* Restartable sequences cpu_id_start field. Updated by the
|
|
* kernel. Read by user-space with single-copy atomicity
|
|
* semantics. This field should only be read by the thread which
|
|
* registered this data structure. Aligned on 32-bit. Always
|
|
* contains a value in the range of possible CPUs, although the
|
|
* value may not be the actual current CPU (e.g. if rseq is not
|
|
* initialized). This CPU number value should always be compared
|
|
* against the value of the cpu_id field before performing a rseq
|
|
* commit or returning a value read from a data structure indexed
|
|
* using the cpu_id_start value.
|
|
*/
|
|
__u32 cpu_id_start;
|
|
/*
|
|
* Restartable sequences cpu_id field. Updated by the kernel.
|
|
* Read by user-space with single-copy atomicity semantics. This
|
|
* field should only be read by the thread which registered this
|
|
* data structure. Aligned on 32-bit. Values
|
|
* RSEQ_CPU_ID_UNINITIALIZED and RSEQ_CPU_ID_REGISTRATION_FAILED
|
|
* have a special semantic: the former means "rseq uninitialized",
|
|
* and latter means "rseq initialization failed". This value is
|
|
* meant to be read within rseq critical sections and compared
|
|
* with the cpu_id_start value previously read, before performing
|
|
* the commit instruction, or read and compared with the
|
|
* cpu_id_start value before returning a value loaded from a data
|
|
* structure indexed using the cpu_id_start value.
|
|
*/
|
|
__u32 cpu_id;
|
|
/*
|
|
* Restartable sequences rseq_cs field.
|
|
*
|
|
* Contains NULL when no critical section is active for the current
|
|
* thread, or holds a pointer to the currently active struct rseq_cs.
|
|
*
|
|
* Updated by user-space, which sets the address of the currently
|
|
* active rseq_cs at the beginning of assembly instruction sequence
|
|
* block, and set to NULL by the kernel when it restarts an assembly
|
|
* instruction sequence block, as well as when the kernel detects that
|
|
* it is preempting or delivering a signal outside of the range
|
|
* targeted by the rseq_cs. Also needs to be set to NULL by user-space
|
|
* before reclaiming memory that contains the targeted struct rseq_cs.
|
|
*
|
|
* Read and set by the kernel. Set by user-space with single-copy
|
|
* atomicity semantics. This field should only be updated by the
|
|
* thread which registered this data structure. Aligned on 64-bit.
|
|
*
|
|
* 32-bit architectures should update the low order bits of the
|
|
* rseq_cs field, leaving the high order bits initialized to 0.
|
|
*/
|
|
__u64 rseq_cs;
|
|
|
|
/*
|
|
* Restartable sequences flags field.
|
|
*
|
|
* This field was initially intended to allow event masking for
|
|
* single-stepping through rseq critical sections with debuggers.
|
|
* The kernel does not support this anymore and the relevant bits
|
|
* are checked for being always false:
|
|
* - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT
|
|
* - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL
|
|
* - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE
|
|
*/
|
|
__u32 flags;
|
|
|
|
/*
|
|
* Restartable sequences node_id field. Updated by the kernel. Read by
|
|
* user-space with single-copy atomicity semantics. This field should
|
|
* only be read by the thread which registered this data structure.
|
|
* Aligned on 32-bit. Contains the current NUMA node ID.
|
|
*/
|
|
__u32 node_id;
|
|
|
|
/*
|
|
* Restartable sequences mm_cid field. Updated by the kernel. Read by
|
|
* user-space with single-copy atomicity semantics. This field should
|
|
* only be read by the thread which registered this data structure.
|
|
* Aligned on 32-bit. Contains the current thread's concurrency ID
|
|
* (allocated uniquely within a memory map).
|
|
*/
|
|
__u32 mm_cid;
|
|
|
|
/*
|
|
* Time slice extension control structure. CPU local updates from
|
|
* kernel and user space.
|
|
*/
|
|
struct rseq_slice_ctrl slice_ctrl;
|
|
|
|
/*
|
|
* Before rseq became extensible, its original size was 32 bytes even
|
|
* though the active rseq area was only 20 bytes.
|
|
* Exposing a 32 bytes feature size would make life needlessly painful
|
|
* for userspace. Therefore, add a reserved byte after byte 32
|
|
* to bump the rseq feature size from 32 to 33.
|
|
* The next field to be added to the rseq area will be larger
|
|
* than one byte, and will replace this reserved byte.
|
|
*/
|
|
__u8 __reserved;
|
|
|
|
/*
|
|
* Flexible array member at end of structure, after last feature field.
|
|
*/
|
|
char end[];
|
|
} __attribute__((aligned(32)));
|
|
|
|
#endif /* _UAPI_LINUX_RSEQ_H */
|