mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-02-28 19:06:51 +01:00
This patch implements bitwise tracking (tnum analysis) for BPF_END
(byte swap) operation.
Currently, the BPF verifier does not track value for BPF_END operation,
treating the result as completely unknown. This limits the verifier's
ability to prove safety of programs that perform endianness conversions,
which are common in networking code.
For example, the following code pattern for port number validation:
int test(struct pt_regs *ctx) {
__u64 x = bpf_get_prandom_u32();
x &= 0x3f00; // Range: [0, 0x3f00], var_off: (0x0; 0x3f00)
x = bswap16(x); // Should swap to range [0, 0x3f], var_off: (0x0; 0x3f)
if (x > 0x3f) goto trap;
return 0;
trap:
return *(u64 *)NULL; // Should be unreachable
}
Currently generates verifier output:
1: (54) w0 &= 16128 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=16128,var_off=(0x0; 0x3f00))
2: (d7) r0 = bswap16 r0 ; R0=scalar()
3: (25) if r0 > 0x3f goto pc+2 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=63,var_off=(0x0; 0x3f))
Without this patch, even though the verifier knows `x` has certain bits
set, after bswap16, it loses all tracking information and treats port
as having a completely unknown value [0, 65535].
According to the BPF instruction set[1], there are 3 kinds of BPF_END:
1. `bswap(16|32|64)`: opcode=0xd7 (BPF_END | BPF_ALU64 | BPF_TO_LE)
- do unconditional swap
2. `le(16|32|64)`: opcode=0xd4 (BPF_END | BPF_ALU | BPF_TO_LE)
- on big-endian: do swap
- on little-endian: truncation (16/32-bit) or no-op (64-bit)
3. `be(16|32|64)`: opcode=0xdc (BPF_END | BPF_ALU | BPF_TO_BE)
- on little-endian: do swap
- on big-endian: truncation (16/32-bit) or no-op (64-bit)
Since BPF_END operations are inherently bit-wise permutations, tnum
(bitwise tracking) offers the most efficient and precise mechanism
for value analysis. By implementing `tnum_bswap16`, `tnum_bswap32`,
and `tnum_bswap64`, we can derive exact `var_off` values concisely,
directly reflecting the bit-level changes.
Here is the overview of changes:
1. In `tnum_bswap(16|32|64)` (kernel/bpf/tnum.c):
Call `swab(16|32|64)` function on the value and mask of `var_off`, and
do truncation for 16/32-bit cases.
2. In `adjust_scalar_min_max_vals` (kernel/bpf/verifier.c):
Call helper function `scalar_byte_swap`.
- Only do byte swap when
* alu64 (unconditional swap) OR
* switching between big-endian and little-endian machines.
- If need do byte swap:
* Firstly call `tnum_bswap(16|32|64)` to update `var_off`.
* Then reset the bound since byte swap scrambles the range.
- For 16/32-bit cases, truncate dst register to match the swapped size.
This enables better verification of networking code that frequently uses
byte swaps for protocol processing, reducing false positive rejections.
[1] https://www.kernel.org/doc/Documentation/bpf/standardization/instruction-set.rst
Co-developed-by: Shenghao Yuan <shenghaoyuan0928@163.com>
Signed-off-by: Shenghao Yuan <shenghaoyuan0928@163.com>
Co-developed-by: Yazhou Tang <tangyazhou518@outlook.com>
Signed-off-by: Yazhou Tang <tangyazhou518@outlook.com>
Signed-off-by: Tianci Cao <ziye@zju.edu.cn>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260204111503.77871-2-ziye@zju.edu.cn
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
135 lines
4.6 KiB
C
135 lines
4.6 KiB
C
/* tnum: tracked (or tristate) numbers
|
|
*
|
|
* A tnum tracks knowledge about the bits of a value. Each bit can be either
|
|
* known (0 or 1), or unknown (x). Arithmetic operations on tnums will
|
|
* propagate the unknown bits such that the tnum result represents all the
|
|
* possible results for possible values of the operands.
|
|
*/
|
|
|
|
#ifndef _LINUX_TNUM_H
|
|
#define _LINUX_TNUM_H
|
|
|
|
#include <linux/types.h>
|
|
|
|
struct tnum {
|
|
u64 value;
|
|
u64 mask;
|
|
};
|
|
|
|
/* Constructors */
|
|
/* Represent a known constant as a tnum. */
|
|
struct tnum tnum_const(u64 value);
|
|
/* A completely unknown value */
|
|
extern const struct tnum tnum_unknown;
|
|
/* An unknown value that is a superset of @min <= value <= @max.
|
|
*
|
|
* Could include values outside the range of [@min, @max].
|
|
* For example tnum_range(0, 2) is represented by {0, 1, 2, *3*},
|
|
* rather than the intended set of {0, 1, 2}.
|
|
*/
|
|
struct tnum tnum_range(u64 min, u64 max);
|
|
|
|
/* Arithmetic and logical ops */
|
|
/* Shift a tnum left (by a fixed shift) */
|
|
struct tnum tnum_lshift(struct tnum a, u8 shift);
|
|
/* Shift (rsh) a tnum right (by a fixed shift) */
|
|
struct tnum tnum_rshift(struct tnum a, u8 shift);
|
|
/* Shift (arsh) a tnum right (by a fixed min_shift) */
|
|
struct tnum tnum_arshift(struct tnum a, u8 min_shift, u8 insn_bitness);
|
|
/* Add two tnums, return @a + @b */
|
|
struct tnum tnum_add(struct tnum a, struct tnum b);
|
|
/* Subtract two tnums, return @a - @b */
|
|
struct tnum tnum_sub(struct tnum a, struct tnum b);
|
|
/* Neg of a tnum, return 0 - @a */
|
|
struct tnum tnum_neg(struct tnum a);
|
|
/* Bitwise-AND, return @a & @b */
|
|
struct tnum tnum_and(struct tnum a, struct tnum b);
|
|
/* Bitwise-OR, return @a | @b */
|
|
struct tnum tnum_or(struct tnum a, struct tnum b);
|
|
/* Bitwise-XOR, return @a ^ @b */
|
|
struct tnum tnum_xor(struct tnum a, struct tnum b);
|
|
/* Multiply two tnums, return @a * @b */
|
|
struct tnum tnum_mul(struct tnum a, struct tnum b);
|
|
|
|
/* Return true if the known bits of both tnums have the same value */
|
|
bool tnum_overlap(struct tnum a, struct tnum b);
|
|
|
|
/* Return a tnum representing numbers satisfying both @a and @b */
|
|
struct tnum tnum_intersect(struct tnum a, struct tnum b);
|
|
|
|
/* Returns a tnum representing numbers satisfying either @a or @b */
|
|
struct tnum tnum_union(struct tnum t1, struct tnum t2);
|
|
|
|
/* Return @a with all but the lowest @size bytes cleared */
|
|
struct tnum tnum_cast(struct tnum a, u8 size);
|
|
|
|
/* Swap the bytes of a tnum */
|
|
struct tnum tnum_bswap16(struct tnum a);
|
|
struct tnum tnum_bswap32(struct tnum a);
|
|
struct tnum tnum_bswap64(struct tnum a);
|
|
|
|
/* Returns true if @a is a known constant */
|
|
static inline bool tnum_is_const(struct tnum a)
|
|
{
|
|
return !a.mask;
|
|
}
|
|
|
|
/* Returns true if @a == tnum_const(@b) */
|
|
static inline bool tnum_equals_const(struct tnum a, u64 b)
|
|
{
|
|
return tnum_is_const(a) && a.value == b;
|
|
}
|
|
|
|
/* Returns true if @a is completely unknown */
|
|
static inline bool tnum_is_unknown(struct tnum a)
|
|
{
|
|
return !~a.mask;
|
|
}
|
|
|
|
/* Returns true if @a is known to be a multiple of @size.
|
|
* @size must be a power of two.
|
|
*/
|
|
bool tnum_is_aligned(struct tnum a, u64 size);
|
|
|
|
/* Returns true if @b represents a subset of @a.
|
|
*
|
|
* Note that using tnum_range() as @a requires extra cautions as tnum_in() may
|
|
* return true unexpectedly due to tnum limited ability to represent tight
|
|
* range, e.g.
|
|
*
|
|
* tnum_in(tnum_range(0, 2), tnum_const(3)) == true
|
|
*
|
|
* As a rule of thumb, if @a is explicitly coded rather than coming from
|
|
* reg->var_off, it should be in form of tnum_const(), tnum_range(0, 2**n - 1),
|
|
* or tnum_range(2**n, 2**(n+1) - 1).
|
|
*/
|
|
bool tnum_in(struct tnum a, struct tnum b);
|
|
|
|
/* Formatting functions. These have snprintf-like semantics: they will write
|
|
* up to @size bytes (including the terminating NUL byte), and return the number
|
|
* of bytes (excluding the terminating NUL) which would have been written had
|
|
* sufficient space been available. (Thus tnum_sbin always returns 64.)
|
|
*/
|
|
/* Format a tnum as a pair of hex numbers (value; mask) */
|
|
int tnum_strn(char *str, size_t size, struct tnum a);
|
|
/* Format a tnum as tristate binary expansion */
|
|
int tnum_sbin(char *str, size_t size, struct tnum a);
|
|
|
|
/* Returns the 32-bit subreg */
|
|
struct tnum tnum_subreg(struct tnum a);
|
|
/* Returns the tnum with the lower 32-bit subreg cleared */
|
|
struct tnum tnum_clear_subreg(struct tnum a);
|
|
/* Returns the tnum with the lower 32-bit subreg in *reg* set to the lower
|
|
* 32-bit subreg in *subreg*
|
|
*/
|
|
struct tnum tnum_with_subreg(struct tnum reg, struct tnum subreg);
|
|
/* Returns the tnum with the lower 32-bit subreg set to value */
|
|
struct tnum tnum_const_subreg(struct tnum a, u32 value);
|
|
/* Returns true if 32-bit subreg @a is a known constant*/
|
|
static inline bool tnum_subreg_is_const(struct tnum a)
|
|
{
|
|
return !(tnum_subreg(a)).mask;
|
|
}
|
|
|
|
#endif /* _LINUX_TNUM_H */
|