Files
linux-stable-mirror/include/uapi/linux/mempolicy.h
Joshua Hahn 56b060d0a1 mempolicy: clarify what zone reclaim means
The zone_reclaim_mode API controls the reclaim behavior when a node runs
out of memory.  Contrary to its user-facing name, it is internally
referred to as "node_reclaim_mode".

This can be confusing.  But because we cannot change the name of the API
since it has been in place since at least 2.6, let's try to be more
explicit about what the behavior of this API is.

Change the description to clarify what zone reclaim entails, and be
explicit about the RECLAIM_ZONE bit, whose purpose has led to some
confusion in the past already [1] [2].

While at it, also soften the warning about changing these bits.

[joshua.hahnjy@gmail.com: remove the reference to the vm.zone_reclaim_mode sysctl as an ABI]
  Link: https://lkml.kernel.org/r/20250806134404.2000234-1-joshua.hahnjy@gmail.com
Link: https://lkml.kernel.org/r/20250805205048.1518453-1-joshua.hahnjy@gmail.com
Link: https://lore.kernel.org/linux-mm/1579005573-58923-1-git-send-email-alex.shi@linux.alibaba.com/ [1]
Link: https://lore.kernel.org/linux-mm/20200626003459.D8E015CA@viggo.jf.intel.com/ [2]
Signed-off-by: Joshua Hahn <joshua.hahnjy@gmail.com>
Acked-by: SeongJae Park <sj@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Huang Ying <ying.huang@linux.alibaba.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Acked-by: Byungchul Park <byungchul@sk.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Byungchul Park <byungchul@sk.com>
Cc: Gregory Price <gourry@gourry.net>
Cc: Mathew Brost <matthew.brost@intel.com>
Cc: Rakie Kim <rakie.kim@sk.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-09-13 16:54:41 -07:00

83 lines
2.9 KiB
C

/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* NUMA memory policies for Linux.
* Copyright 2003,2004 Andi Kleen SuSE Labs
*/
#ifndef _UAPI_LINUX_MEMPOLICY_H
#define _UAPI_LINUX_MEMPOLICY_H
#include <linux/errno.h>
/*
* Both the MPOL_* mempolicy mode and the MPOL_F_* optional mode flags are
* passed by the user to either set_mempolicy() or mbind() in an 'int' actual.
* The MPOL_MODE_FLAGS macro determines the legal set of optional mode flags.
*/
/* Policies */
enum {
MPOL_DEFAULT,
MPOL_PREFERRED,
MPOL_BIND,
MPOL_INTERLEAVE,
MPOL_LOCAL,
MPOL_PREFERRED_MANY,
MPOL_WEIGHTED_INTERLEAVE,
MPOL_MAX, /* always last member of enum */
};
/* Flags for set_mempolicy */
#define MPOL_F_STATIC_NODES (1 << 15)
#define MPOL_F_RELATIVE_NODES (1 << 14)
#define MPOL_F_NUMA_BALANCING (1 << 13) /* Optimize with NUMA balancing if possible */
/*
* MPOL_MODE_FLAGS is the union of all possible optional mode flags passed to
* either set_mempolicy() or mbind().
*/
#define MPOL_MODE_FLAGS \
(MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES | MPOL_F_NUMA_BALANCING)
/* Flags for get_mempolicy */
#define MPOL_F_NODE (1<<0) /* return next IL mode instead of node mask */
#define MPOL_F_ADDR (1<<1) /* look up vma using address */
#define MPOL_F_MEMS_ALLOWED (1<<2) /* return allowed memories */
/* Flags for mbind */
#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */
#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform
to policy */
#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to policy */
#define MPOL_MF_LAZY (1<<3) /* UNSUPPORTED FLAG: Lazy migrate on fault */
#define MPOL_MF_INTERNAL (1<<4) /* Internal flags start here */
#define MPOL_MF_VALID (MPOL_MF_STRICT | \
MPOL_MF_MOVE | \
MPOL_MF_MOVE_ALL)
/*
* Internal flags that share the struct mempolicy flags word with
* "mode flags". These flags are allocated from bit 0 up, as they
* are never OR'ed into the mode in mempolicy API arguments.
*/
#define MPOL_F_SHARED (1 << 0) /* identify shared policies */
#define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */
#define MPOL_F_MORON (1 << 4) /* Migrate On protnone Reference On Node */
/*
* Enabling zone reclaim means the page allocator will attempt to fulfill
* the allocation request on the current node by triggering reclaim and
* trying to shrink the current node.
* Fallback allocations on the next candidates in the zonelist are considered
* when reclaim fails to free up enough memory in the current node/zone.
*
* These bit locations are exposed in the vm.zone_reclaim_mode sysctl.
* New bits are OK, but existing bits should not be changed.
*/
#define RECLAIM_ZONE (1<<0) /* Enable zone reclaim */
#define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */
#define RECLAIM_UNMAP (1<<2) /* Unmap pages during reclaim */
#endif /* _UAPI_LINUX_MEMPOLICY_H */