Files
Maciej W. Rozycki 4cc67b0484 linux/log2.h: reduce instruction count for is_power_of_2()
Follow an observation that (n ^ (n - 1)) will only ever retain the most
significant bit set in the word operated on if that is the only bit set in
the first place, and use it to determine whether a number is a whole power
of 2, avoiding the need for an explicit check for nonzero.

This reduces the sequence produced to 3 instructions only across Alpha,
MIPS, and RISC-V targets, down from 4, 5, and 4 respectively, removing a
branch in the two latter cases.  And it's 5 instructions on POWER and
x86-64 vs 8 and 9 respectively.  There are no branches now emitted here
for targets that have a suitable conditional set operation, although an
inline expansion will often end with one, depending on what code a call to
this function is used in.

Credit goes to GCC authors for coming up with this optimisation used as
the fallback for (__builtin_popcountl(n) == 1), equivalent to this code,
for targets where the hardware population count operation is considered
expensive.

Link: https://lkml.kernel.org/r/alpine.DEB.2.21.2601111836250.30566@angie.orcam.me.uk
Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: John Garry <john.g.garry@oracle.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: Su Hui <suhui@nfschina.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-01-26 19:07:14 -08:00

273 lines
6.5 KiB
C

/* SPDX-License-Identifier: GPL-2.0-or-later */
/* Integer base 2 logarithm calculation
*
* Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*/
#ifndef _LINUX_LOG2_H
#define _LINUX_LOG2_H
#include <linux/types.h>
#include <linux/bitops.h>
/*
* non-constant log of base 2 calculators
* - the arch may override these in asm/bitops.h if they can be implemented
* more efficiently than using fls() and fls64()
* - the arch is not required to handle n==0 if implementing the fallback
*/
#ifndef CONFIG_ARCH_HAS_ILOG2_U32
static __always_inline __attribute__((const))
int __ilog2_u32(u32 n)
{
return fls(n) - 1;
}
#endif
#ifndef CONFIG_ARCH_HAS_ILOG2_U64
static __always_inline __attribute__((const))
int __ilog2_u64(u64 n)
{
return fls64(n) - 1;
}
#endif
/**
* is_power_of_2() - check if a value is a power of two
* @n: the value to check
*
* Determine whether some value is a power of two, where zero is
* *not* considered a power of two.
* Return: true if @n is a power of 2, otherwise false.
*/
static __always_inline __attribute__((const))
bool is_power_of_2(unsigned long n)
{
return n - 1 < (n ^ (n - 1));
}
/**
* __roundup_pow_of_two() - round up to nearest power of two
* @n: value to round up
*/
static inline __attribute__((const))
unsigned long __roundup_pow_of_two(unsigned long n)
{
return 1UL << fls_long(n - 1);
}
/**
* __rounddown_pow_of_two() - round down to nearest power of two
* @n: value to round down
*/
static inline __attribute__((const))
unsigned long __rounddown_pow_of_two(unsigned long n)
{
return 1UL << (fls_long(n) - 1);
}
/**
* const_ilog2 - log base 2 of 32-bit or a 64-bit constant unsigned value
* @n: parameter
*
* Use this where sparse expects a true constant expression, e.g. for array
* indices.
*/
#define const_ilog2(n) \
( \
__builtin_constant_p(n) ? ( \
(n) < 2 ? 0 : \
(n) & (1ULL << 63) ? 63 : \
(n) & (1ULL << 62) ? 62 : \
(n) & (1ULL << 61) ? 61 : \
(n) & (1ULL << 60) ? 60 : \
(n) & (1ULL << 59) ? 59 : \
(n) & (1ULL << 58) ? 58 : \
(n) & (1ULL << 57) ? 57 : \
(n) & (1ULL << 56) ? 56 : \
(n) & (1ULL << 55) ? 55 : \
(n) & (1ULL << 54) ? 54 : \
(n) & (1ULL << 53) ? 53 : \
(n) & (1ULL << 52) ? 52 : \
(n) & (1ULL << 51) ? 51 : \
(n) & (1ULL << 50) ? 50 : \
(n) & (1ULL << 49) ? 49 : \
(n) & (1ULL << 48) ? 48 : \
(n) & (1ULL << 47) ? 47 : \
(n) & (1ULL << 46) ? 46 : \
(n) & (1ULL << 45) ? 45 : \
(n) & (1ULL << 44) ? 44 : \
(n) & (1ULL << 43) ? 43 : \
(n) & (1ULL << 42) ? 42 : \
(n) & (1ULL << 41) ? 41 : \
(n) & (1ULL << 40) ? 40 : \
(n) & (1ULL << 39) ? 39 : \
(n) & (1ULL << 38) ? 38 : \
(n) & (1ULL << 37) ? 37 : \
(n) & (1ULL << 36) ? 36 : \
(n) & (1ULL << 35) ? 35 : \
(n) & (1ULL << 34) ? 34 : \
(n) & (1ULL << 33) ? 33 : \
(n) & (1ULL << 32) ? 32 : \
(n) & (1ULL << 31) ? 31 : \
(n) & (1ULL << 30) ? 30 : \
(n) & (1ULL << 29) ? 29 : \
(n) & (1ULL << 28) ? 28 : \
(n) & (1ULL << 27) ? 27 : \
(n) & (1ULL << 26) ? 26 : \
(n) & (1ULL << 25) ? 25 : \
(n) & (1ULL << 24) ? 24 : \
(n) & (1ULL << 23) ? 23 : \
(n) & (1ULL << 22) ? 22 : \
(n) & (1ULL << 21) ? 21 : \
(n) & (1ULL << 20) ? 20 : \
(n) & (1ULL << 19) ? 19 : \
(n) & (1ULL << 18) ? 18 : \
(n) & (1ULL << 17) ? 17 : \
(n) & (1ULL << 16) ? 16 : \
(n) & (1ULL << 15) ? 15 : \
(n) & (1ULL << 14) ? 14 : \
(n) & (1ULL << 13) ? 13 : \
(n) & (1ULL << 12) ? 12 : \
(n) & (1ULL << 11) ? 11 : \
(n) & (1ULL << 10) ? 10 : \
(n) & (1ULL << 9) ? 9 : \
(n) & (1ULL << 8) ? 8 : \
(n) & (1ULL << 7) ? 7 : \
(n) & (1ULL << 6) ? 6 : \
(n) & (1ULL << 5) ? 5 : \
(n) & (1ULL << 4) ? 4 : \
(n) & (1ULL << 3) ? 3 : \
(n) & (1ULL << 2) ? 2 : \
1) : \
-1)
/**
* ilog2 - log base 2 of 32-bit or a 64-bit unsigned value
* @n: parameter
*
* constant-capable log of base 2 calculation
* - this can be used to initialise global variables from constant data, hence
* the massive ternary operator construction
*
* selects the appropriately-sized optimised version depending on sizeof(n)
*/
#define ilog2(n) \
( \
__builtin_constant_p(n) ? \
((n) < 2 ? 0 : \
63 - __builtin_clzll(n)) : \
(sizeof(n) <= 4) ? \
__ilog2_u32(n) : \
__ilog2_u64(n) \
)
/**
* roundup_pow_of_two - round the given value up to nearest power of two
* @n: parameter
*
* round the given value up to the nearest power of two
* - the result is undefined when n == 0
* - this can be used to initialise global variables from constant data
*/
#define roundup_pow_of_two(n) \
( \
__builtin_constant_p(n) ? ( \
((n) == 1) ? 1 : \
(1UL << (ilog2((n) - 1) + 1)) \
) : \
__roundup_pow_of_two(n) \
)
/**
* rounddown_pow_of_two - round the given value down to nearest power of two
* @n: parameter
*
* round the given value down to the nearest power of two
* - the result is undefined when n == 0
* - this can be used to initialise global variables from constant data
*/
#define rounddown_pow_of_two(n) \
( \
__builtin_constant_p(n) ? ( \
(1UL << ilog2(n))) : \
__rounddown_pow_of_two(n) \
)
static inline __attribute_const__
int __order_base_2(unsigned long n)
{
return n > 1 ? ilog2(n - 1) + 1 : 0;
}
/**
* order_base_2 - calculate the (rounded up) base 2 order of the argument
* @n: parameter
*
* The first few values calculated by this routine:
* ob2(0) = 0
* ob2(1) = 0
* ob2(2) = 1
* ob2(3) = 2
* ob2(4) = 2
* ob2(5) = 3
* ... and so on.
*/
#define order_base_2(n) \
( \
__builtin_constant_p(n) ? ( \
((n) == 0 || (n) == 1) ? 0 : \
ilog2((n) - 1) + 1) : \
__order_base_2(n) \
)
static inline __attribute__((const))
int __bits_per(unsigned long n)
{
if (n < 2)
return 1;
if (is_power_of_2(n))
return order_base_2(n) + 1;
return order_base_2(n);
}
/**
* bits_per - calculate the number of bits required for the argument
* @n: parameter
*
* This is constant-capable and can be used for compile time
* initializations, e.g bitfields.
*
* The first few values calculated by this routine:
* bf(0) = 1
* bf(1) = 1
* bf(2) = 2
* bf(3) = 2
* bf(4) = 3
* ... and so on.
*/
#define bits_per(n) \
( \
__builtin_constant_p(n) ? ( \
((n) == 0 || (n) == 1) \
? 1 : ilog2(n) + 1 \
) : \
__bits_per(n) \
)
/**
* max_pow_of_two_factor - return highest power-of-2 factor
* @n: parameter
*
* find highest power-of-2 which is evenly divisible into n.
* 0 is returned for n == 0 or 1.
*/
static inline __attribute__((const))
unsigned int max_pow_of_two_factor(unsigned int n)
{
return n & -n;
}
#endif /* _LINUX_LOG2_H */