Files
linux-torvalds-mirror/include/linux/filelock.h
Linus Torvalds 9e355113f0 Merge tag 'vfs-7.0-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull misc vfs updates from Christian Brauner:
 "This contains a mix of VFS cleanups, performance improvements, API
  fixes, documentation, and a deprecation notice.

  Scalability and performance:

   - Rework pid allocation to only take pidmap_lock once instead of
     twice during alloc_pid(), improving thread creation/teardown
     throughput by 10-16% depending on false-sharing luck. Pad the
     namespace refcount to reduce false-sharing

   - Track file lock presence via a flag in ->i_opflags instead of
     reading ->i_flctx, avoiding false-sharing with ->i_readcount on
     open/close hot paths. Measured 4-16% improvement on 24-core
     open-in-a-loop benchmarks

   - Use a consume fence in locks_inode_context() to match the
     store-release/load-consume idiom, eliminating a hardware fence on
     some architectures

   - Annotate cdev_lock with __cacheline_aligned_in_smp to prevent
     false-sharing

   - Remove a redundant DCACHE_MANAGED_DENTRY check in
     __follow_mount_rcu() that never fires since the caller already
     verifies it, eliminating a 100% mispredicted branch

   - Fix a 100% mispredicted likely() in devcgroup_inode_permission()
     that became wrong after a prior code reorder

  Bug fixes and correctness:

   - Make insert_inode_locked() wait for inode destruction instead of
     skipping, fixing a corner case where two matching inodes could
     exist in the hash

   - Move f_mode initialization before file_ref_init() in alloc_file()
     to respect the SLAB_TYPESAFE_BY_RCU ordering contract

   - Add a WARN_ON_ONCE guard in try_to_free_buffers() for folios with
     no buffers attached, preventing a null pointer dereference when
     AS_RELEASE_ALWAYS is set but no release_folio op exists

   - Fix select restart_block to store end_time as timespec64, avoiding
     truncation of tv_sec on 32-bit architectures

   - Make dump_inode() use get_kernel_nofault() to safely access inode
     and superblock fields, matching the dump_mapping() pattern

  API modernization:

   - Make posix_acl_to_xattr() allocate the buffer internally since
     every single caller was doing it anyway. Reduces boilerplate and
     unnecessary error checking across ~15 filesystems

   - Replace deprecated simple_strtoul() with kstrtoul() for the
     ihash_entries, dhash_entries, mhash_entries, and mphash_entries
     boot parameters, adding proper error handling

   - Convert chardev code to use guard(mutex) and __free(kfree) cleanup
     patterns

   - Replace min_t() with min() or umin() in VFS code to avoid silently
     truncating unsigned long to unsigned int

   - Gate LOOKUP_RCU assertions behind CONFIG_DEBUG_VFS since callers
     already check the flag

  Deprecation:

   - Begin deprecating legacy BSD process accounting (acct(2)). The
     interface has numerous footguns and better alternatives exist
     (eBPF)

  Documentation:

   - Fix and complete kernel-doc for struct export_operations, removing
     duplicated documentation between ReST and source

   - Fix kernel-doc warnings for __start_dirop() and ilookup5_nowait()

  Testing:

   - Add a kunit test for initramfs cpio handling of entries with
     filesize > PATH_MAX

  Misc:

   - Add missing <linux/init_task.h> include in fs_struct.c"

* tag 'vfs-7.0-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (28 commits)
  posix_acl: make posix_acl_to_xattr() alloc the buffer
  fs: make insert_inode_locked() wait for inode destruction
  initramfs_test: kunit test for cpio.filesize > PATH_MAX
  fs: improve dump_inode() to safely access inode fields
  fs: add <linux/init_task.h> for 'init_fs'
  docs: exportfs: Use source code struct documentation
  fs: move initializing f_mode before file_ref_init()
  exportfs: Complete kernel-doc for struct export_operations
  exportfs: Mark struct export_operations functions at kernel-doc
  exportfs: Fix kernel-doc output for get_name()
  acct(2): begin the deprecation of legacy BSD process accounting
  device_cgroup: remove branch hint after code refactor
  VFS: fix __start_dirop() kernel-doc warnings
  fs: Describe @isnew parameter in ilookup5_nowait()
  fs/namei: Remove redundant DCACHE_MANAGED_DENTRY check in __follow_mount_rcu
  fs: only assert on LOOKUP_RCU when built with CONFIG_DEBUG_VFS
  select: store end_time as timespec64 in restart block
  chardev: Switch to guard(mutex) and __free(kfree)
  namespace: Replace simple_strtoul with kstrtoul to parse boot params
  dcache: Replace simple_strtoul with kstrtoul in set_dhash_entries
  ...
2026-02-09 15:13:05 -08:00

596 lines
15 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_FILELOCK_H
#define _LINUX_FILELOCK_H
#include <linux/fs.h>
#define FL_POSIX 1
#define FL_FLOCK 2
#define FL_DELEG 4 /* NFSv4 delegation */
#define FL_ACCESS 8 /* not trying to lock, just looking */
#define FL_EXISTS 16 /* when unlocking, test for existence */
#define FL_LEASE 32 /* lease held on this file */
#define FL_CLOSE 64 /* unlock on close */
#define FL_SLEEP 128 /* A blocking lock */
#define FL_DOWNGRADE_PENDING 256 /* Lease is being downgraded */
#define FL_UNLOCK_PENDING 512 /* Lease is being broken */
#define FL_OFDLCK 1024 /* lock is "owned" by struct file */
#define FL_LAYOUT 2048 /* outstanding pNFS layout */
#define FL_RECLAIM 4096 /* reclaiming from a reboot server */
#define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
/*
* Special return value from posix_lock_file() and vfs_lock_file() for
* asynchronous locking.
*/
#define FILE_LOCK_DEFERRED 1
struct file_lock;
struct file_lease;
struct file_lock_operations {
void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
void (*fl_release_private)(struct file_lock *);
};
struct lock_manager_operations {
void *lm_mod_owner;
fl_owner_t (*lm_get_owner)(fl_owner_t);
void (*lm_put_owner)(fl_owner_t);
void (*lm_notify)(struct file_lock *); /* unblock callback */
int (*lm_grant)(struct file_lock *, int);
bool (*lm_lock_expirable)(struct file_lock *cfl);
void (*lm_expire_lock)(void);
};
struct lease_manager_operations {
bool (*lm_break)(struct file_lease *);
int (*lm_change)(struct file_lease *, int, struct list_head *);
void (*lm_setup)(struct file_lease *, void **);
bool (*lm_breaker_owns_lease)(struct file_lease *);
int (*lm_open_conflict)(struct file *, int);
};
struct lock_manager {
struct list_head list;
/*
* NFSv4 and up also want opens blocked during the grace period;
* NLM doesn't care:
*/
bool block_opens;
};
struct net;
void locks_start_grace(struct net *, struct lock_manager *);
void locks_end_grace(struct lock_manager *);
bool locks_in_grace(struct net *);
bool opens_in_grace(struct net *);
/*
* struct file_lock has a union that some filesystems use to track
* their own private info. The NFS side of things is defined here:
*/
#include <linux/nfs_fs_i.h>
/*
* struct file_lock represents a generic "file lock". It's used to represent
* POSIX byte range locks, BSD (flock) locks, and leases. It's important to
* note that the same struct is used to represent both a request for a lock and
* the lock itself, but the same object is never used for both.
*
* FIXME: should we create a separate "struct lock_request" to help distinguish
* these two uses?
*
* The varous i_flctx lists are ordered by:
*
* 1) lock owner
* 2) lock range start
* 3) lock range end
*
* Obviously, the last two criteria only matter for POSIX locks.
*/
struct file_lock_core {
struct file_lock_core *flc_blocker; /* The lock that is blocking us */
struct list_head flc_list; /* link into file_lock_context */
struct hlist_node flc_link; /* node in global lists */
struct list_head flc_blocked_requests; /* list of requests with
* ->fl_blocker pointing here
*/
struct list_head flc_blocked_member; /* node in
* ->fl_blocker->fl_blocked_requests
*/
fl_owner_t flc_owner;
unsigned int flc_flags;
unsigned char flc_type;
pid_t flc_pid;
int flc_link_cpu; /* what cpu's list is this on? */
wait_queue_head_t flc_wait;
struct file *flc_file;
};
struct file_lock {
struct file_lock_core c;
loff_t fl_start;
loff_t fl_end;
const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */
const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */
union {
struct nfs_lock_info nfs_fl;
struct nfs4_lock_info nfs4_fl;
struct {
struct list_head link; /* link in AFS vnode's pending_locks list */
int state; /* state of grant or error if -ve */
unsigned int debug_id;
} afs;
struct {
struct inode *inode;
} ceph;
} fl_u;
} __randomize_layout;
struct file_lease {
struct file_lock_core c;
struct fasync_struct * fl_fasync; /* for lease break notifications */
/* for lease breaks: */
unsigned long fl_break_time;
unsigned long fl_downgrade_time;
const struct lease_manager_operations *fl_lmops; /* Callbacks for lease managers */
} __randomize_layout;
struct file_lock_context {
spinlock_t flc_lock;
struct list_head flc_flock;
struct list_head flc_posix;
struct list_head flc_lease;
};
#ifdef CONFIG_FILE_LOCKING
int fcntl_getlk(struct file *, unsigned int, struct flock *);
int fcntl_setlk(unsigned int, struct file *, unsigned int,
struct flock *);
#if BITS_PER_LONG == 32
int fcntl_getlk64(struct file *, unsigned int, struct flock64 *);
int fcntl_setlk64(unsigned int, struct file *, unsigned int,
struct flock64 *);
#endif
int fcntl_setlease(unsigned int fd, struct file *filp, int arg);
int fcntl_getlease(struct file *filp);
int fcntl_setdeleg(unsigned int fd, struct file *filp, struct delegation *deleg);
int fcntl_getdeleg(struct file *filp, struct delegation *deleg);
static inline bool lock_is_unlock(struct file_lock *fl)
{
return fl->c.flc_type == F_UNLCK;
}
static inline bool lock_is_read(struct file_lock *fl)
{
return fl->c.flc_type == F_RDLCK;
}
static inline bool lock_is_write(struct file_lock *fl)
{
return fl->c.flc_type == F_WRLCK;
}
static inline void locks_wake_up_waiter(struct file_lock_core *flc)
{
wake_up(&flc->flc_wait);
}
static inline void locks_wake_up(struct file_lock *fl)
{
locks_wake_up_waiter(&fl->c);
}
static inline bool locks_can_async_lock(const struct file_operations *fops)
{
return !fops->lock || fops->fop_flags & FOP_ASYNC_LOCK;
}
/* fs/locks.c */
void locks_free_lock_context(struct inode *inode);
void locks_free_lock(struct file_lock *fl);
void locks_init_lock(struct file_lock *);
struct file_lock *locks_alloc_lock(void);
void locks_copy_lock(struct file_lock *, struct file_lock *);
void locks_copy_conflock(struct file_lock *, struct file_lock *);
void locks_remove_posix(struct file *, fl_owner_t);
void locks_remove_file(struct file *);
void locks_release_private(struct file_lock *);
void posix_test_lock(struct file *, struct file_lock *);
int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
int locks_delete_block(struct file_lock *);
int vfs_test_lock(struct file *, struct file_lock *);
int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
bool vfs_inode_has_locks(struct inode *inode);
int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl);
void locks_init_lease(struct file_lease *);
void locks_free_lease(struct file_lease *fl);
struct file_lease *locks_alloc_lease(void);
#define LEASE_BREAK_LEASE BIT(0) // break leases and delegations
#define LEASE_BREAK_DELEG BIT(1) // break delegations only
#define LEASE_BREAK_LAYOUT BIT(2) // break layouts only
#define LEASE_BREAK_NONBLOCK BIT(3) // non-blocking break
#define LEASE_BREAK_OPEN_RDONLY BIT(4) // readonly open event
int __break_lease(struct inode *inode, unsigned int flags);
void lease_get_mtime(struct inode *, struct timespec64 *time);
int generic_setlease(struct file *, int, struct file_lease **, void **priv);
int kernel_setlease(struct file *, int, struct file_lease **, void **);
int vfs_setlease(struct file *, int, struct file_lease **, void **);
int lease_modify(struct file_lease *, int, struct list_head *);
struct notifier_block;
int lease_register_notifier(struct notifier_block *);
void lease_unregister_notifier(struct notifier_block *);
struct files_struct;
void show_fd_locks(struct seq_file *f,
struct file *filp, struct files_struct *files);
bool locks_owner_has_blockers(struct file_lock_context *flctx,
fl_owner_t owner);
static inline struct file_lock_context *
locks_inode_context(const struct inode *inode)
{
/*
* Paired with smp_store_release in locks_get_lock_context().
*
* Ensures ->i_flctx will be visible if we spotted the flag.
*/
if (likely(!(smp_load_acquire(&inode->i_opflags) & IOP_FLCTX)))
return NULL;
return READ_ONCE(inode->i_flctx);
}
#else /* !CONFIG_FILE_LOCKING */
static inline int fcntl_getlk(struct file *file, unsigned int cmd,
struct flock __user *user)
{
return -EINVAL;
}
static inline int fcntl_setlk(unsigned int fd, struct file *file,
unsigned int cmd, struct flock __user *user)
{
return -EACCES;
}
#if BITS_PER_LONG == 32
static inline int fcntl_getlk64(struct file *file, unsigned int cmd,
struct flock64 *user)
{
return -EINVAL;
}
static inline int fcntl_setlk64(unsigned int fd, struct file *file,
unsigned int cmd, struct flock64 *user)
{
return -EACCES;
}
#endif
static inline int fcntl_setlease(unsigned int fd, struct file *filp, int arg)
{
return -EINVAL;
}
static inline int fcntl_getlease(struct file *filp)
{
return F_UNLCK;
}
static inline int fcntl_setdeleg(unsigned int fd, struct file *filp, struct delegation *deleg)
{
return -EINVAL;
}
static inline int fcntl_getdeleg(struct file *filp, struct delegation *deleg)
{
return -EINVAL;
}
static inline bool lock_is_unlock(struct file_lock *fl)
{
return false;
}
static inline bool lock_is_read(struct file_lock *fl)
{
return false;
}
static inline bool lock_is_write(struct file_lock *fl)
{
return false;
}
static inline void locks_wake_up(struct file_lock *fl)
{
}
static inline void
locks_free_lock_context(struct inode *inode)
{
}
static inline void locks_init_lock(struct file_lock *fl)
{
return;
}
static inline void locks_init_lease(struct file_lease *fl)
{
return;
}
static inline void locks_copy_conflock(struct file_lock *new, struct file_lock *fl)
{
return;
}
static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
{
return;
}
static inline void locks_remove_posix(struct file *filp, fl_owner_t owner)
{
return;
}
static inline void locks_remove_file(struct file *filp)
{
return;
}
static inline void posix_test_lock(struct file *filp, struct file_lock *fl)
{
return;
}
static inline int posix_lock_file(struct file *filp, struct file_lock *fl,
struct file_lock *conflock)
{
return -ENOLCK;
}
static inline int locks_delete_block(struct file_lock *waiter)
{
return -ENOENT;
}
static inline int vfs_test_lock(struct file *filp, struct file_lock *fl)
{
return 0;
}
static inline int vfs_lock_file(struct file *filp, unsigned int cmd,
struct file_lock *fl, struct file_lock *conf)
{
return -ENOLCK;
}
static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
{
return 0;
}
static inline bool vfs_inode_has_locks(struct inode *inode)
{
return false;
}
static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl)
{
return -ENOLCK;
}
static inline int __break_lease(struct inode *inode, unsigned int flags)
{
return 0;
}
static inline void lease_get_mtime(struct inode *inode,
struct timespec64 *time)
{
return;
}
static inline int generic_setlease(struct file *filp, int arg,
struct file_lease **flp, void **priv)
{
return -EINVAL;
}
static inline int kernel_setlease(struct file *filp, int arg,
struct file_lease **lease, void **priv)
{
return -EINVAL;
}
static inline int vfs_setlease(struct file *filp, int arg,
struct file_lease **lease, void **priv)
{
return -EINVAL;
}
static inline int lease_modify(struct file_lease *fl, int arg,
struct list_head *dispose)
{
return -EINVAL;
}
struct files_struct;
static inline void show_fd_locks(struct seq_file *f,
struct file *filp, struct files_struct *files) {}
static inline bool locks_owner_has_blockers(struct file_lock_context *flctx,
fl_owner_t owner)
{
return false;
}
static inline struct file_lock_context *
locks_inode_context(const struct inode *inode)
{
return NULL;
}
#endif /* !CONFIG_FILE_LOCKING */
/* for walking lists of file_locks linked by fl_list */
#define for_each_file_lock(_fl, _head) list_for_each_entry(_fl, _head, c.flc_list)
static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
{
return locks_lock_inode_wait(file_inode(filp), fl);
}
#ifdef CONFIG_FILE_LOCKING
static inline unsigned int openmode_to_lease_flags(unsigned int mode)
{
unsigned int flags = 0;
if ((mode & O_ACCMODE) == O_RDONLY)
flags |= LEASE_BREAK_OPEN_RDONLY;
if (mode & O_NONBLOCK)
flags |= LEASE_BREAK_NONBLOCK;
return flags;
}
static inline int break_lease(struct inode *inode, unsigned int mode)
{
struct file_lock_context *flctx;
/*
* Since this check is lockless, we must ensure that any refcounts
* taken are done before checking i_flctx->flc_lease. Otherwise, we
* could end up racing with tasks trying to set a new lease on this
* file.
*/
flctx = locks_inode_context(inode);
if (!flctx)
return 0;
smp_mb();
if (!list_empty_careful(&flctx->flc_lease))
return __break_lease(inode, LEASE_BREAK_LEASE | openmode_to_lease_flags(mode));
return 0;
}
static inline int break_deleg(struct inode *inode, unsigned int flags)
{
struct file_lock_context *flctx;
/*
* Since this check is lockless, we must ensure that any refcounts
* taken are done before checking i_flctx->flc_lease. Otherwise, we
* could end up racing with tasks trying to set a new lease on this
* file.
*/
flctx = locks_inode_context(inode);
if (!flctx)
return 0;
smp_mb();
if (!list_empty_careful(&flctx->flc_lease)) {
flags |= LEASE_BREAK_DELEG;
return __break_lease(inode, flags);
}
return 0;
}
struct delegated_inode {
struct inode *di_inode;
};
static inline bool is_delegated(struct delegated_inode *di)
{
return di->di_inode;
}
static inline int try_break_deleg(struct inode *inode,
struct delegated_inode *di)
{
int ret;
ret = break_deleg(inode, LEASE_BREAK_NONBLOCK);
if (ret == -EWOULDBLOCK && di) {
di->di_inode = inode;
ihold(inode);
}
return ret;
}
static inline int break_deleg_wait(struct delegated_inode *di)
{
int ret;
ret = break_deleg(di->di_inode, 0);
iput(di->di_inode);
di->di_inode = NULL;
return ret;
}
static inline int break_layout(struct inode *inode, bool wait)
{
struct file_lock_context *flctx;
smp_mb();
flctx = locks_inode_context(inode);
if (flctx && !list_empty_careful(&flctx->flc_lease)) {
unsigned int flags = LEASE_BREAK_LAYOUT;
if (!wait)
flags |= LEASE_BREAK_NONBLOCK;
return __break_lease(inode, flags);
}
return 0;
}
#else /* !CONFIG_FILE_LOCKING */
struct delegated_inode { };
static inline bool is_delegated(struct delegated_inode *di)
{
return false;
}
static inline int break_lease(struct inode *inode, bool wait)
{
return 0;
}
static inline int break_deleg(struct inode *inode, unsigned int flags)
{
return 0;
}
static inline int try_break_deleg(struct inode *inode,
struct delegated_inode *delegated_inode)
{
return 0;
}
static inline int break_deleg_wait(struct delegated_inode *delegated_inode)
{
BUG();
return 0;
}
static inline int break_layout(struct inode *inode, bool wait)
{
return 0;
}
#endif /* CONFIG_FILE_LOCKING */
#endif /* _LINUX_FILELOCK_H */