mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2026-04-24 10:49:54 +02:00
b8a5308fee
commit af6d10345c upstream.
In ip6_dst_gc() replace:
if (entries > gc_thresh)
With:
if (entries > ops->gc_thresh)
Sending Ipv6 packets in a loop via a raw socket triggers an issue where a
route is cloned by ip6_rt_cache_alloc() for each packet sent. This quickly
consumes the Ipv6 max_size threshold which defaults to 4096 resulting in
these warnings:
[1] 99.187805] dst_alloc: 7728 callbacks suppressed
[2] Route cache is full: consider increasing sysctl net.ipv6.route.max_size.
.
.
[300] Route cache is full: consider increasing sysctl net.ipv6.route.max_size.
When this happens the packet is dropped and sendto() gets a network is
unreachable error:
remaining pkt 200557 errno 101
remaining pkt 196462 errno 101
.
.
remaining pkt 126821 errno 101
Implement David Aherns suggestion to remove max_size check seeing that Ipv6
has a GC to manage memory usage. Ipv4 already does not check max_size.
Here are some memory comparisons for Ipv4 vs Ipv6 with the patch:
Test by running 5 instances of a program that sends UDP packets to a raw
socket 5000000 times. Compare Ipv4 and Ipv6 performance with a similar
program.
Ipv4:
Before test:
MemFree: 29427108 kB
Slab: 237612 kB
ip6_dst_cache 1912 2528 256 32 2 : tunables 0 0 0
xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0
ip_dst_cache 2881 3990 192 42 2 : tunables 0 0 0
During test:
MemFree: 29417608 kB
Slab: 247712 kB
ip6_dst_cache 1912 2528 256 32 2 : tunables 0 0 0
xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0
ip_dst_cache 44394 44394 192 42 2 : tunables 0 0 0
After test:
MemFree: 29422308 kB
Slab: 238104 kB
ip6_dst_cache 1912 2528 256 32 2 : tunables 0 0 0
xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0
ip_dst_cache 3048 4116 192 42 2 : tunables 0 0 0
Ipv6 with patch:
Errno 101 errors are not observed anymore with the patch.
Before test:
MemFree: 29422308 kB
Slab: 238104 kB
ip6_dst_cache 1912 2528 256 32 2 : tunables 0 0 0
xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0
ip_dst_cache 3048 4116 192 42 2 : tunables 0 0 0
During Test:
MemFree: 29431516 kB
Slab: 240940 kB
ip6_dst_cache 11980 12064 256 32 2 : tunables 0 0 0
xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0
ip_dst_cache 3048 4116 192 42 2 : tunables 0 0 0
After Test:
MemFree: 29441816 kB
Slab: 238132 kB
ip6_dst_cache 1902 2432 256 32 2 : tunables 0 0 0
xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0
ip_dst_cache 3048 4116 192 42 2 : tunables 0 0 0
Tested-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: Jon Maxwell <jmaxwell37@gmail.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20230112012532.311021-1-jmaxwell37@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Cc: "Jitindar Singh, Suraj" <surajjs@amazon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
74 lines
2.1 KiB
C
74 lines
2.1 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _NET_DST_OPS_H
|
|
#define _NET_DST_OPS_H
|
|
#include <linux/types.h>
|
|
#include <linux/percpu_counter.h>
|
|
#include <linux/cache.h>
|
|
|
|
struct dst_entry;
|
|
struct kmem_cachep;
|
|
struct net_device;
|
|
struct sk_buff;
|
|
struct sock;
|
|
struct net;
|
|
|
|
struct dst_ops {
|
|
unsigned short family;
|
|
unsigned int gc_thresh;
|
|
|
|
void (*gc)(struct dst_ops *ops);
|
|
struct dst_entry * (*check)(struct dst_entry *, __u32 cookie);
|
|
unsigned int (*default_advmss)(const struct dst_entry *);
|
|
unsigned int (*mtu)(const struct dst_entry *);
|
|
u32 * (*cow_metrics)(struct dst_entry *, unsigned long);
|
|
void (*destroy)(struct dst_entry *);
|
|
void (*ifdown)(struct dst_entry *,
|
|
struct net_device *dev, int how);
|
|
struct dst_entry * (*negative_advice)(struct dst_entry *);
|
|
void (*link_failure)(struct sk_buff *);
|
|
void (*update_pmtu)(struct dst_entry *dst, struct sock *sk,
|
|
struct sk_buff *skb, u32 mtu,
|
|
bool confirm_neigh);
|
|
void (*redirect)(struct dst_entry *dst, struct sock *sk,
|
|
struct sk_buff *skb);
|
|
int (*local_out)(struct net *net, struct sock *sk, struct sk_buff *skb);
|
|
struct neighbour * (*neigh_lookup)(const struct dst_entry *dst,
|
|
struct sk_buff *skb,
|
|
const void *daddr);
|
|
void (*confirm_neigh)(const struct dst_entry *dst,
|
|
const void *daddr);
|
|
|
|
struct kmem_cache *kmem_cachep;
|
|
|
|
struct percpu_counter pcpuc_entries ____cacheline_aligned_in_smp;
|
|
};
|
|
|
|
static inline int dst_entries_get_fast(struct dst_ops *dst)
|
|
{
|
|
return percpu_counter_read_positive(&dst->pcpuc_entries);
|
|
}
|
|
|
|
static inline int dst_entries_get_slow(struct dst_ops *dst)
|
|
{
|
|
return percpu_counter_sum_positive(&dst->pcpuc_entries);
|
|
}
|
|
|
|
#define DST_PERCPU_COUNTER_BATCH 32
|
|
static inline void dst_entries_add(struct dst_ops *dst, int val)
|
|
{
|
|
percpu_counter_add_batch(&dst->pcpuc_entries, val,
|
|
DST_PERCPU_COUNTER_BATCH);
|
|
}
|
|
|
|
static inline int dst_entries_init(struct dst_ops *dst)
|
|
{
|
|
return percpu_counter_init(&dst->pcpuc_entries, 0, GFP_KERNEL);
|
|
}
|
|
|
|
static inline void dst_entries_destroy(struct dst_ops *dst)
|
|
{
|
|
percpu_counter_destroy(&dst->pcpuc_entries);
|
|
}
|
|
|
|
#endif
|