Files
Chuck Lever afcae7d7b8 RDMA/core: add rdma_rw_max_sge() helper for SQ sizing
svc_rdma_accept() computes sc_sq_depth as the sum of rq_depth and the
number of rdma_rw contexts (ctxts). This value is used to allocate the
Send CQ and to initialize the sc_sq_avail credit pool.

However, when the device uses memory registration for RDMA operations,
rdma_rw_init_qp() inflates the QP's max_send_wr by a factor of three
per context to account for REG and INV work requests. The Send CQ and
credit pool remain sized for only one work request per context,
causing Send Queue exhaustion under heavy NFS WRITE workloads.

Introduce rdma_rw_max_sge() to compute the actual number of Send Queue
entries required for a given number of rdma_rw contexts. Upper layer
protocols call this helper before creating a Queue Pair so that their
Send CQs and credit accounting match the QP's true capacity.

Update svc_rdma_accept() to use rdma_rw_max_sge() when computing
sc_sq_depth, ensuring the credit pool reflects the work requests
that rdma_rw_init_qp() will reserve.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Fixes: 00bd1439f4 ("RDMA/rw: Support threshold for registration vs scattering to local pages")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Link: https://patch.msgid.link/20260128005400.25147-5-cel@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-01-28 05:54:53 -05:00

96 lines
3.0 KiB
C

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2016 HGST, a Western Digital Company.
*/
#ifndef _RDMA_RW_H
#define _RDMA_RW_H
#include <linux/bvec.h>
#include <linux/dma-mapping.h>
#include <linux/scatterlist.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#include <rdma/mr_pool.h>
struct rdma_rw_ctx {
/* number of RDMA READ/WRITE WRs (not counting MR WRs) */
u32 nr_ops;
/* tag for the union below: */
u8 type;
union {
/* for mapping a single SGE: */
struct {
struct ib_sge sge;
struct ib_rdma_wr wr;
} single;
/* for mapping of multiple SGEs: */
struct {
struct ib_sge *sges;
struct ib_rdma_wr *wrs;
} map;
/* for IOVA-based mapping of bvecs into contiguous DMA range: */
struct {
struct dma_iova_state state;
struct ib_sge sge;
struct ib_rdma_wr wr;
size_t mapped_len;
} iova;
/* for registering multiple WRs: */
struct rdma_rw_reg_ctx {
struct ib_sge sge;
struct ib_rdma_wr wr;
struct ib_reg_wr reg_wr;
struct ib_send_wr inv_wr;
struct ib_mr *mr;
struct sg_table sgt;
} *reg;
};
};
int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num,
struct scatterlist *sg, u32 sg_cnt, u32 sg_offset,
u64 remote_addr, u32 rkey, enum dma_data_direction dir);
void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
u32 port_num, struct scatterlist *sg, u32 sg_cnt,
enum dma_data_direction dir);
struct bio_vec;
int rdma_rw_ctx_init_bvec(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
u32 port_num, const struct bio_vec *bvecs, u32 nr_bvec,
struct bvec_iter iter, u64 remote_addr, u32 rkey,
enum dma_data_direction dir);
void rdma_rw_ctx_destroy_bvec(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
u32 port_num, const struct bio_vec *bvecs, u32 nr_bvec,
enum dma_data_direction dir);
int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
u32 port_num, struct scatterlist *sg, u32 sg_cnt,
struct scatterlist *prot_sg, u32 prot_sg_cnt,
struct ib_sig_attrs *sig_attrs, u64 remote_addr, u32 rkey,
enum dma_data_direction dir);
void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
u32 port_num, struct scatterlist *sg, u32 sg_cnt,
struct scatterlist *prot_sg, u32 prot_sg_cnt,
enum dma_data_direction dir);
struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
u32 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr);
int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num,
struct ib_cqe *cqe, struct ib_send_wr *chain_wr);
unsigned int rdma_rw_mr_factor(struct ib_device *device, u32 port_num,
unsigned int maxpages);
unsigned int rdma_rw_max_send_wr(struct ib_device *dev, u32 port_num,
unsigned int max_rdma_ctxs, u32 create_flags);
void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr);
int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr);
void rdma_rw_cleanup_mrs(struct ib_qp *qp);
#endif /* _RDMA_RW_H */