mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2026-06-21 15:43:21 +02:00
rds: convert to getsockopt_iter
Convert RDS socket's getsockopt implementation to use the new getsockopt_iter callback with sockopt_t. Key changes: - Replace (char __user *optval, int __user *optlen) with sockopt_t *opt - Use opt->optlen for buffer length (input) and returned size (output) - Use copy_to_iter() instead of put_user()/copy_to_user() The RDS_INFO_* snapshot path in rds_info_getsockopt() used to pin the userspace buffer with pin_user_pages_fast() on the raw optval address; the info producers then memcpy into those pages under a spinlock via kmap_atomic() and so must not fault. Obtain the same page array and starting offset from opt->iter_out with iov_iter_extract_pages(), which pins for write because iter_out is ITER_DEST. The page array is preallocated here (sized with iov_iter_npages()) and passed in, so iov_iter_extract_pages() fills it in place rather than allocating one for us; RDS therefore keeps ownership of the array on every return path and frees it itself. The rds_info_iterator / rds_info_copy machinery and all producer callbacks are unchanged. Kernel buffers (ITER_KVEC) are not page-backed in a way the info producers can use, so the RDS_INFO path returns -EOPNOTSUPP for them; this matches the previous behaviour, where a kernel-buffer getsockopt hit the WARN_ONCE() path in do_sock_getsockopt() and returned -EOPNOTSUPP. The simple RDS_RECVERR and SO_RDS_TRANSPORT options keep working for kernel buffers via copy_to_iter(). Signed-off-by: Breno Leitao <leitao@debian.org> Reviewed-by: Allison Henderson <achender@kernel.org> Link: https://patch.msgid.link/20260608-getsock_more-v3-2-706ecf2ea332@debian.org Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
committed by
Paolo Abeni
parent
b74360369e
commit
6e94eeb2a2
+20
-16
@@ -37,6 +37,7 @@
|
||||
#include <linux/in.h>
|
||||
#include <linux/ipv6.h>
|
||||
#include <linux/poll.h>
|
||||
#include <linux/uio.h>
|
||||
#include <net/sock.h>
|
||||
|
||||
#include "rds.h"
|
||||
@@ -485,35 +486,36 @@ out:
|
||||
}
|
||||
|
||||
static int rds_getsockopt(struct socket *sock, int level, int optname,
|
||||
char __user *optval, int __user *optlen)
|
||||
sockopt_t *opt)
|
||||
{
|
||||
struct rds_sock *rs = rds_sk_to_rs(sock->sk);
|
||||
int ret = -ENOPROTOOPT, len;
|
||||
int trans;
|
||||
int val;
|
||||
|
||||
if (level != SOL_RDS)
|
||||
goto out;
|
||||
|
||||
if (get_user(len, optlen)) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
len = opt->optlen;
|
||||
|
||||
switch (optname) {
|
||||
case RDS_INFO_FIRST ... RDS_INFO_LAST:
|
||||
ret = rds_info_getsockopt(sock, optname, optval,
|
||||
optlen);
|
||||
ret = rds_info_getsockopt(sock, optname, opt);
|
||||
break;
|
||||
|
||||
case RDS_RECVERR:
|
||||
if (len < sizeof(int))
|
||||
if (len < sizeof(int)) {
|
||||
ret = -EINVAL;
|
||||
else
|
||||
if (put_user(rs->rs_recverr, (int __user *) optval) ||
|
||||
put_user(sizeof(int), optlen))
|
||||
break;
|
||||
}
|
||||
val = rs->rs_recverr;
|
||||
if (copy_to_iter(&val, sizeof(int), &opt->iter_out) !=
|
||||
sizeof(int)) {
|
||||
ret = -EFAULT;
|
||||
else
|
||||
} else {
|
||||
opt->optlen = sizeof(int);
|
||||
ret = 0;
|
||||
}
|
||||
break;
|
||||
case SO_RDS_TRANSPORT:
|
||||
if (len < sizeof(int)) {
|
||||
@@ -522,11 +524,13 @@ static int rds_getsockopt(struct socket *sock, int level, int optname,
|
||||
}
|
||||
trans = (rs->rs_transport ? rs->rs_transport->t_type :
|
||||
RDS_TRANS_NONE); /* unbound */
|
||||
if (put_user(trans, (int __user *)optval) ||
|
||||
put_user(sizeof(int), optlen))
|
||||
if (copy_to_iter(&trans, sizeof(int), &opt->iter_out) !=
|
||||
sizeof(int)) {
|
||||
ret = -EFAULT;
|
||||
else
|
||||
} else {
|
||||
opt->optlen = sizeof(int);
|
||||
ret = 0;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@@ -653,7 +657,7 @@ static const struct proto_ops rds_proto_ops = {
|
||||
.listen = sock_no_listen,
|
||||
.shutdown = sock_no_shutdown,
|
||||
.setsockopt = rds_setsockopt,
|
||||
.getsockopt = rds_getsockopt,
|
||||
.getsockopt_iter = rds_getsockopt,
|
||||
.sendmsg = rds_sendmsg,
|
||||
.recvmsg = rds_recvmsg,
|
||||
.mmap = sock_no_mmap,
|
||||
|
||||
+44
-32
@@ -35,6 +35,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/uio.h>
|
||||
|
||||
#include "rds.h"
|
||||
|
||||
@@ -144,39 +145,41 @@ void rds_info_copy(struct rds_info_iterator *iter, void *data,
|
||||
EXPORT_SYMBOL_GPL(rds_info_copy);
|
||||
|
||||
/*
|
||||
* @optval points to the userspace buffer that the information snapshot
|
||||
* will be copied into.
|
||||
*
|
||||
* @optlen on input is the size of the buffer in userspace. @optlen
|
||||
* on output is the size of the requested snapshot in bytes.
|
||||
* @opt->iter_out describes the buffer that the information snapshot will be
|
||||
* copied into, and @opt->optlen is the size of that buffer on input. On
|
||||
* output @opt->optlen is set to the size of the requested snapshot in bytes.
|
||||
*
|
||||
* This function returns -errno if there is a failure, particularly -ENOSPC
|
||||
* if the given userspace buffer was not large enough to fit the snapshot.
|
||||
* On success it returns the positive number of bytes of each array element
|
||||
* in the snapshot.
|
||||
* if the given buffer was not large enough to fit the snapshot. On success
|
||||
* it returns the positive number of bytes of each array element in the
|
||||
* snapshot.
|
||||
*/
|
||||
int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
|
||||
int __user *optlen)
|
||||
int rds_info_getsockopt(struct socket *sock, int optname, sockopt_t *opt)
|
||||
{
|
||||
struct rds_info_iterator iter;
|
||||
struct rds_info_lengths lens;
|
||||
unsigned long nr_pages = 0;
|
||||
unsigned long start;
|
||||
rds_info_func func;
|
||||
struct page **pages = NULL;
|
||||
size_t offset0 = 0;
|
||||
int npages = 0;
|
||||
int ret;
|
||||
int len;
|
||||
int total;
|
||||
|
||||
if (get_user(len, optlen)) {
|
||||
ret = -EFAULT;
|
||||
len = opt->optlen;
|
||||
|
||||
/* check for all kinds of wrapping and the like */
|
||||
if (len < 0 || len > INT_MAX - PAGE_SIZE + 1) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* check for all kinds of wrapping and the like */
|
||||
start = (unsigned long)optval;
|
||||
if (len < 0 || len > INT_MAX - PAGE_SIZE + 1 || start + len < start) {
|
||||
ret = -EINVAL;
|
||||
/* The info producers write into the pages with kmap_atomic() while
|
||||
* holding a spinlock, so they need a genuine page-backed user buffer.
|
||||
*/
|
||||
if (!user_backed_iter(&opt->iter_out)) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -184,20 +187,26 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
|
||||
if (len == 0)
|
||||
goto call_func;
|
||||
|
||||
nr_pages = (PAGE_ALIGN(start + len) - (start & PAGE_MASK))
|
||||
>> PAGE_SHIFT;
|
||||
|
||||
pages = kmalloc_objs(struct page *, nr_pages);
|
||||
/*
|
||||
* Preallocate the page array and pass it in so that
|
||||
* iov_iter_extract_pages() fills it in place rather than allocating
|
||||
* one for us. Handing it a non-NULL array keeps ownership of the
|
||||
* array with us on every return path, instead of depending on the
|
||||
* iterator code to allocate and hand it back.
|
||||
*/
|
||||
npages = iov_iter_npages(&opt->iter_out, INT_MAX);
|
||||
pages = kvmalloc_array(npages, sizeof(*pages), GFP_KERNEL);
|
||||
if (!pages) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
ret = pin_user_pages_fast(start, nr_pages, FOLL_WRITE, pages);
|
||||
if (ret != nr_pages) {
|
||||
if (ret > 0)
|
||||
nr_pages = ret;
|
||||
else
|
||||
nr_pages = 0;
|
||||
|
||||
ret = iov_iter_extract_pages(&opt->iter_out, &pages, len, npages,
|
||||
0, &offset0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
nr_pages = DIV_ROUND_UP(offset0 + ret, PAGE_SIZE);
|
||||
if (ret != len) {
|
||||
ret = -EAGAIN; /* XXX ? */
|
||||
goto out;
|
||||
}
|
||||
@@ -213,7 +222,7 @@ call_func:
|
||||
|
||||
iter.pages = pages;
|
||||
iter.addr = NULL;
|
||||
iter.offset = start & (PAGE_SIZE - 1);
|
||||
iter.offset = offset0;
|
||||
|
||||
func(sock, len, &iter, &lens);
|
||||
BUG_ON(lens.each == 0);
|
||||
@@ -230,13 +239,16 @@ call_func:
|
||||
ret = lens.each;
|
||||
}
|
||||
|
||||
if (put_user(len, optlen))
|
||||
ret = -EFAULT;
|
||||
opt->optlen = len;
|
||||
|
||||
out:
|
||||
if (pages)
|
||||
/*
|
||||
* iov_iter_extract_pages() pins only user-backed (ubuf) iters;
|
||||
* iov_iter_extract_will_pin() reports whether an unpin is owed here.
|
||||
*/
|
||||
if (pages && iov_iter_extract_will_pin(&opt->iter_out))
|
||||
unpin_user_pages(pages, nr_pages);
|
||||
kfree(pages);
|
||||
kvfree(pages);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
+1
-2
@@ -21,8 +21,7 @@ typedef void (*rds_info_func)(struct socket *sock, unsigned int len,
|
||||
|
||||
void rds_info_register_func(int optname, rds_info_func func);
|
||||
void rds_info_deregister_func(int optname, rds_info_func func);
|
||||
int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
|
||||
int __user *optlen);
|
||||
int rds_info_getsockopt(struct socket *sock, int optname, sockopt_t *opt);
|
||||
void rds_info_copy(struct rds_info_iterator *iter, void *data,
|
||||
unsigned long bytes);
|
||||
void rds_info_iter_unmap(struct rds_info_iterator *iter);
|
||||
|
||||
Reference in New Issue
Block a user