Files
linux-stable-mirror/include/linux/uprobes.h
T
Liao Chang 83179cd678 uprobes: Remove the spinlock within handle_singlestep()
This patch introduces a flag to track TIF_SIGPENDING is suppress
temporarily during the uprobe single-step. Upon uprobe singlestep is
handled and the flag is confirmed, it could resume the TIF_SIGPENDING
directly without acquiring the siglock in most case, then reducing
contention and improving overall performance.

I've use the script developed by Andrii in [1] to run benchmark. The CPU
used was Kunpeng916 (Hi1616), 4 NUMA nodes, 64 cores@2.4GHz running the
kernel on next tree + the optimization for get_xol_insn_slot() [2].

before-opt
----------
uprobe-nop      ( 1 cpus):    0.907 ± 0.003M/s  (  0.907M/s/cpu)
uprobe-nop      ( 2 cpus):    1.676 ± 0.008M/s  (  0.838M/s/cpu)
uprobe-nop      ( 4 cpus):    3.210 ± 0.003M/s  (  0.802M/s/cpu)
uprobe-nop      ( 8 cpus):    4.457 ± 0.003M/s  (  0.557M/s/cpu)
uprobe-nop      (16 cpus):    3.724 ± 0.011M/s  (  0.233M/s/cpu)
uprobe-nop      (32 cpus):    2.761 ± 0.003M/s  (  0.086M/s/cpu)
uprobe-nop      (64 cpus):    1.293 ± 0.015M/s  (  0.020M/s/cpu)

uprobe-push     ( 1 cpus):    0.883 ± 0.001M/s  (  0.883M/s/cpu)
uprobe-push     ( 2 cpus):    1.642 ± 0.005M/s  (  0.821M/s/cpu)
uprobe-push     ( 4 cpus):    3.086 ± 0.002M/s  (  0.771M/s/cpu)
uprobe-push     ( 8 cpus):    3.390 ± 0.003M/s  (  0.424M/s/cpu)
uprobe-push     (16 cpus):    2.652 ± 0.005M/s  (  0.166M/s/cpu)
uprobe-push     (32 cpus):    2.713 ± 0.005M/s  (  0.085M/s/cpu)
uprobe-push     (64 cpus):    1.313 ± 0.009M/s  (  0.021M/s/cpu)

uprobe-ret      ( 1 cpus):    1.774 ± 0.000M/s  (  1.774M/s/cpu)
uprobe-ret      ( 2 cpus):    3.350 ± 0.001M/s  (  1.675M/s/cpu)
uprobe-ret      ( 4 cpus):    6.604 ± 0.000M/s  (  1.651M/s/cpu)
uprobe-ret      ( 8 cpus):    6.706 ± 0.005M/s  (  0.838M/s/cpu)
uprobe-ret      (16 cpus):    5.231 ± 0.001M/s  (  0.327M/s/cpu)
uprobe-ret      (32 cpus):    5.743 ± 0.003M/s  (  0.179M/s/cpu)
uprobe-ret      (64 cpus):    4.726 ± 0.016M/s  (  0.074M/s/cpu)

after-opt
---------
uprobe-nop      ( 1 cpus):    0.985 ± 0.002M/s  (  0.985M/s/cpu)
uprobe-nop      ( 2 cpus):    1.773 ± 0.005M/s  (  0.887M/s/cpu)
uprobe-nop      ( 4 cpus):    3.304 ± 0.001M/s  (  0.826M/s/cpu)
uprobe-nop      ( 8 cpus):    5.328 ± 0.002M/s  (  0.666M/s/cpu)
uprobe-nop      (16 cpus):    6.475 ± 0.002M/s  (  0.405M/s/cpu)
uprobe-nop      (32 cpus):    4.831 ± 0.082M/s  (  0.151M/s/cpu)
uprobe-nop      (64 cpus):    2.564 ± 0.053M/s  (  0.040M/s/cpu)

uprobe-push     ( 1 cpus):    0.964 ± 0.001M/s  (  0.964M/s/cpu)
uprobe-push     ( 2 cpus):    1.766 ± 0.002M/s  (  0.883M/s/cpu)
uprobe-push     ( 4 cpus):    3.290 ± 0.009M/s  (  0.823M/s/cpu)
uprobe-push     ( 8 cpus):    4.670 ± 0.002M/s  (  0.584M/s/cpu)
uprobe-push     (16 cpus):    5.197 ± 0.004M/s  (  0.325M/s/cpu)
uprobe-push     (32 cpus):    5.068 ± 0.161M/s  (  0.158M/s/cpu)
uprobe-push     (64 cpus):    2.605 ± 0.026M/s  (  0.041M/s/cpu)

uprobe-ret      ( 1 cpus):    1.833 ± 0.001M/s  (  1.833M/s/cpu)
uprobe-ret      ( 2 cpus):    3.384 ± 0.003M/s  (  1.692M/s/cpu)
uprobe-ret      ( 4 cpus):    6.677 ± 0.004M/s  (  1.669M/s/cpu)
uprobe-ret      ( 8 cpus):    6.854 ± 0.005M/s  (  0.857M/s/cpu)
uprobe-ret      (16 cpus):    6.508 ± 0.006M/s  (  0.407M/s/cpu)
uprobe-ret      (32 cpus):    5.793 ± 0.009M/s  (  0.181M/s/cpu)
uprobe-ret      (64 cpus):    4.743 ± 0.016M/s  (  0.074M/s/cpu)

Above benchmark results demonstrates a obivious improvement in the
scalability of trig-uprobe-nop and trig-uprobe-push, the peak throughput
of which are from 4.5M/s to 6.4M/s and 3.3M/s to 5.1M/s individually.

[1] https://lore.kernel.org/all/20240731214256.3588718-1-andrii@kernel.org
[2] https://lore.kernel.org/all/20240727094405.1362496-1-liaochang1@huawei.com

Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20250124093826.2123675-3-liaochang1@huawei.com
2025-02-05 10:29:12 +01:00

290 lines
9.4 KiB
C

/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _LINUX_UPROBES_H
#define _LINUX_UPROBES_H
/*
* User-space Probes (UProbes)
*
* Copyright (C) IBM Corporation, 2008-2012
* Authors:
* Srikar Dronamraju
* Jim Keniston
* Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra
*/
#include <linux/errno.h>
#include <linux/rbtree.h>
#include <linux/types.h>
#include <linux/wait.h>
#include <linux/timer.h>
#include <linux/seqlock.h>
struct uprobe;
struct vm_area_struct;
struct mm_struct;
struct inode;
struct notifier_block;
struct page;
/*
* Allowed return values from uprobe consumer's handler callback
* with following meaning:
*
* UPROBE_HANDLER_REMOVE
* - Remove the uprobe breakpoint from current->mm.
* UPROBE_HANDLER_IGNORE
* - Ignore ret_handler callback for this consumer.
*/
#define UPROBE_HANDLER_REMOVE 1
#define UPROBE_HANDLER_IGNORE 2
#define MAX_URETPROBE_DEPTH 64
struct uprobe_consumer {
/*
* handler() can return UPROBE_HANDLER_REMOVE to signal the need to
* unregister uprobe for current process. If UPROBE_HANDLER_REMOVE is
* returned, filter() callback has to be implemented as well and it
* should return false to "confirm" the decision to uninstall uprobe
* for the current process. If filter() is omitted or returns true,
* UPROBE_HANDLER_REMOVE is effectively ignored.
*/
int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs, __u64 *data);
int (*ret_handler)(struct uprobe_consumer *self,
unsigned long func,
struct pt_regs *regs, __u64 *data);
bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm);
struct list_head cons_node;
__u64 id; /* set when uprobe_consumer is registered */
};
#ifdef CONFIG_UPROBES
#include <asm/uprobes.h>
enum uprobe_task_state {
UTASK_RUNNING,
UTASK_SSTEP,
UTASK_SSTEP_ACK,
UTASK_SSTEP_TRAPPED,
};
/* The state of hybrid-lifetime uprobe inside struct return_instance */
enum hprobe_state {
HPROBE_LEASED, /* uretprobes_srcu-protected uprobe */
HPROBE_STABLE, /* refcounted uprobe */
HPROBE_GONE, /* NULL uprobe, SRCU expired, refcount failed */
HPROBE_CONSUMED, /* uprobe "consumed" by uretprobe handler */
};
/*
* Hybrid lifetime uprobe. Represents a uprobe instance that could be either
* SRCU protected (with SRCU protection eventually potentially timing out),
* refcounted using uprobe->ref, or there could be no valid uprobe (NULL).
*
* hprobe's internal state is setup such that background timer thread can
* atomically "downgrade" temporarily RCU-protected uprobe into refcounted one
* (or no uprobe, if refcounting failed).
*
* *stable* pointer always point to the uprobe (or could be NULL if there is
* was no valid underlying uprobe to begin with).
*
* *leased* pointer is the key to achieving race-free atomic lifetime state
* transition and can have three possible states:
* - either the same non-NULL value as *stable*, in which case uprobe is
* SRCU-protected;
* - NULL, in which case uprobe (if there is any) is refcounted;
* - special __UPROBE_DEAD value, which represents an uprobe that was SRCU
* protected initially, but SRCU period timed out and we attempted to
* convert it to refcounted, but refcount_inc_not_zero() failed, because
* uprobe effectively went away (the last consumer unsubscribed). In this
* case it's important to know that *stable* pointer (which still has
* non-NULL uprobe pointer) shouldn't be used, because lifetime of
* underlying uprobe is not guaranteed anymore. __UPROBE_DEAD is just an
* internal marker and is handled transparently by hprobe_fetch() helper.
*
* When uprobe is SRCU-protected, we also record srcu_idx value, necessary for
* SRCU unlocking.
*
* See hprobe_expire() and hprobe_fetch() for details of race-free uprobe
* state transitioning details. It all hinges on atomic xchg() over *leaded*
* pointer. *stable* pointer, once initially set, is not modified concurrently.
*/
struct hprobe {
enum hprobe_state state;
int srcu_idx;
struct uprobe *uprobe;
};
/*
* uprobe_task: Metadata of a task while it singlesteps.
*/
struct uprobe_task {
enum uprobe_task_state state;
unsigned int depth;
struct return_instance *return_instances;
struct return_instance *ri_pool;
struct timer_list ri_timer;
seqcount_t ri_seqcount;
union {
struct {
struct arch_uprobe_task autask;
unsigned long vaddr;
};
struct {
struct callback_head dup_xol_work;
unsigned long dup_xol_addr;
};
};
struct uprobe *active_uprobe;
unsigned long xol_vaddr;
bool signal_denied;
struct arch_uprobe *auprobe;
};
struct return_consumer {
__u64 cookie;
__u64 id;
};
struct return_instance {
struct hprobe hprobe;
unsigned long func;
unsigned long stack; /* stack pointer */
unsigned long orig_ret_vaddr; /* original return address */
bool chained; /* true, if instance is nested */
int cons_cnt; /* total number of session consumers */
struct return_instance *next; /* keep as stack */
struct rcu_head rcu;
/* singular pre-allocated return_consumer instance for common case */
struct return_consumer consumer;
/*
* extra return_consumer instances for rare cases of multiple session consumers,
* contains (cons_cnt - 1) elements
*/
struct return_consumer *extra_consumers;
} ____cacheline_aligned;
enum rp_check {
RP_CHECK_CALL,
RP_CHECK_CHAIN_CALL,
RP_CHECK_RET,
};
struct xol_area;
struct uprobes_state {
struct xol_area *xol_area;
};
extern void __init uprobes_init(void);
extern int set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
extern int set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
extern bool is_swbp_insn(uprobe_opcode_t *insn);
extern bool is_trap_insn(uprobe_opcode_t *insn);
extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs);
extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc);
extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool);
extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc);
extern void uprobe_unregister_sync(void);
extern int uprobe_mmap(struct vm_area_struct *vma);
extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end);
extern void uprobe_start_dup_mmap(void);
extern void uprobe_end_dup_mmap(void);
extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm);
extern void uprobe_free_utask(struct task_struct *t);
extern void uprobe_copy_process(struct task_struct *t, unsigned long flags);
extern int uprobe_post_sstep_notifier(struct pt_regs *regs);
extern int uprobe_pre_sstep_notifier(struct pt_regs *regs);
extern void uprobe_notify_resume(struct pt_regs *regs);
extern bool uprobe_deny_signal(void);
extern bool arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs);
extern void uprobe_clear_state(struct mm_struct *mm);
extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
extern bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, struct pt_regs *regs);
extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs);
extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
void *src, unsigned long len);
extern void uprobe_handle_trampoline(struct pt_regs *regs);
extern void *arch_uprobe_trampoline(unsigned long *psize);
extern unsigned long uprobe_get_trampoline_vaddr(void);
#else /* !CONFIG_UPROBES */
struct uprobes_state {
};
static inline void uprobes_init(void)
{
}
#define uprobe_get_trap_addr(regs) instruction_pointer(regs)
static inline struct uprobe *
uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc)
{
return ERR_PTR(-ENOSYS);
}
static inline int
uprobe_apply(struct uprobe* uprobe, struct uprobe_consumer *uc, bool add)
{
return -ENOSYS;
}
static inline void
uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc)
{
}
static inline void uprobe_unregister_sync(void)
{
}
static inline int uprobe_mmap(struct vm_area_struct *vma)
{
return 0;
}
static inline void
uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
}
static inline void uprobe_start_dup_mmap(void)
{
}
static inline void uprobe_end_dup_mmap(void)
{
}
static inline void
uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm)
{
}
static inline void uprobe_notify_resume(struct pt_regs *regs)
{
}
static inline bool uprobe_deny_signal(void)
{
return false;
}
static inline void uprobe_free_utask(struct task_struct *t)
{
}
static inline void uprobe_copy_process(struct task_struct *t, unsigned long flags)
{
}
static inline void uprobe_clear_state(struct mm_struct *mm)
{
}
#endif /* !CONFIG_UPROBES */
#endif /* _LINUX_UPROBES_H */