mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2026-04-03 12:05:13 +02:00
[ Upstream commitae431059e7] When unbinding a memslot from a guest_memfd instance, remove the bindings even if the guest_memfd file is dying, i.e. even if its file refcount has gone to zero. If the memslot is freed before the file is fully released, nullifying the memslot side of the binding in kvm_gmem_release() will write to freed memory, as detected by syzbot+KASAN: ================================================================== BUG: KASAN: slab-use-after-free in kvm_gmem_release+0x176/0x440 virt/kvm/guest_memfd.c:353 Write of size 8 at addr ffff88807befa508 by task syz.0.17/6022 CPU: 0 UID: 0 PID: 6022 Comm: syz.0.17 Not tainted syzkaller #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/02/2025 Call Trace: <TASK> dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0xca/0x240 mm/kasan/report.c:482 kasan_report+0x118/0x150 mm/kasan/report.c:595 kvm_gmem_release+0x176/0x440 virt/kvm/guest_memfd.c:353 __fput+0x44c/0xa70 fs/file_table.c:468 task_work_run+0x1d4/0x260 kernel/task_work.c:227 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop+0xe9/0x130 kernel/entry/common.c:43 exit_to_user_mode_prepare include/linux/irq-entry-common.h:225 [inline] syscall_exit_to_user_mode_work include/linux/entry-common.h:175 [inline] syscall_exit_to_user_mode include/linux/entry-common.h:210 [inline] do_syscall_64+0x2bd/0xfa0 arch/x86/entry/syscall_64.c:100 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7fbeeff8efc9 </TASK> Allocated by task 6023: kasan_save_stack mm/kasan/common.c:56 [inline] kasan_save_track+0x3e/0x80 mm/kasan/common.c:77 poison_kmalloc_redzone mm/kasan/common.c:397 [inline] __kasan_kmalloc+0x93/0xb0 mm/kasan/common.c:414 kasan_kmalloc include/linux/kasan.h:262 [inline] __kmalloc_cache_noprof+0x3e2/0x700 mm/slub.c:5758 kmalloc_noprof include/linux/slab.h:957 [inline] kzalloc_noprof include/linux/slab.h:1094 [inline] kvm_set_memory_region+0x747/0xb90 virt/kvm/kvm_main.c:2104 kvm_vm_ioctl_set_memory_region+0x6f/0xd0 virt/kvm/kvm_main.c:2154 kvm_vm_ioctl+0x957/0xc60 virt/kvm/kvm_main.c:5201 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:597 [inline] __se_sys_ioctl+0xfc/0x170 fs/ioctl.c:583 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Freed by task 6023: kasan_save_stack mm/kasan/common.c:56 [inline] kasan_save_track+0x3e/0x80 mm/kasan/common.c:77 kasan_save_free_info+0x46/0x50 mm/kasan/generic.c:584 poison_slab_object mm/kasan/common.c:252 [inline] __kasan_slab_free+0x5c/0x80 mm/kasan/common.c:284 kasan_slab_free include/linux/kasan.h:234 [inline] slab_free_hook mm/slub.c:2533 [inline] slab_free mm/slub.c:6622 [inline] kfree+0x19a/0x6d0 mm/slub.c:6829 kvm_set_memory_region+0x9c4/0xb90 virt/kvm/kvm_main.c:2130 kvm_vm_ioctl_set_memory_region+0x6f/0xd0 virt/kvm/kvm_main.c:2154 kvm_vm_ioctl+0x957/0xc60 virt/kvm/kvm_main.c:5201 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:597 [inline] __se_sys_ioctl+0xfc/0x170 fs/ioctl.c:583 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Deliberately don't acquire filemap invalid lock when the file is dying as the lifecycle of f_mapping is outside the purview of KVM. Dereferencing the mapping is *probably* fine, but there's no need to invalidate anything as memslot deletion is responsible for zapping SPTEs, and the only code that can access the dying file is kvm_gmem_release(), whose core code is mutually exclusive with unbinding. Note, the mutual exclusivity is also what makes it safe to access the bindings on a dying gmem instance. Unbinding either runs with slots_lock held, or after the last reference to the owning "struct kvm" is put, and kvm_gmem_release() nullifies the slot pointer under slots_lock, and puts its reference to the VM after that is done. Reported-by: syzbot+2479e53d0db9b32ae2aa@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/68fa7a22.a70a0220.3bf6c6.008b.GAE@google.com Tested-by: syzbot+2479e53d0db9b32ae2aa@syzkaller.appspotmail.com Fixes:a7800aa80e("KVM: Add KVM_CREATE_GUEST_MEMFD ioctl() for guest-specific backing memory") Cc: stable@vger.kernel.org Cc: Hillf Danton <hdanton@sina.com> Reviewed-By: Vishal Annapurve <vannapurve@google.com> Link: https://patch.msgid.link/20251104011205.3853541-1-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com> Signed-off-by: Sasha Levin <sashal@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
738 lines
18 KiB
C
738 lines
18 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <linux/backing-dev.h>
|
|
#include <linux/falloc.h>
|
|
#include <linux/kvm_host.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/anon_inodes.h>
|
|
|
|
#include "kvm_mm.h"
|
|
|
|
struct kvm_gmem {
|
|
struct kvm *kvm;
|
|
struct xarray bindings;
|
|
struct list_head entry;
|
|
};
|
|
|
|
/**
|
|
* folio_file_pfn - like folio_file_page, but return a pfn.
|
|
* @folio: The folio which contains this index.
|
|
* @index: The index we want to look up.
|
|
*
|
|
* Return: The pfn for this index.
|
|
*/
|
|
static inline kvm_pfn_t folio_file_pfn(struct folio *folio, pgoff_t index)
|
|
{
|
|
return folio_pfn(folio) + (index & (folio_nr_pages(folio) - 1));
|
|
}
|
|
|
|
static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
|
|
pgoff_t index, struct folio *folio)
|
|
{
|
|
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
|
|
kvm_pfn_t pfn = folio_file_pfn(folio, index);
|
|
gfn_t gfn = slot->base_gfn + index - slot->gmem.pgoff;
|
|
int rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, folio_order(folio));
|
|
if (rc) {
|
|
pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx GFN %llx PFN %llx error %d.\n",
|
|
index, gfn, pfn, rc);
|
|
return rc;
|
|
}
|
|
#endif
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline void kvm_gmem_mark_prepared(struct folio *folio)
|
|
{
|
|
folio_mark_uptodate(folio);
|
|
}
|
|
|
|
/*
|
|
* Process @folio, which contains @gfn, so that the guest can use it.
|
|
* The folio must be locked and the gfn must be contained in @slot.
|
|
* On successful return the guest sees a zero page so as to avoid
|
|
* leaking host data and the up-to-date flag is set.
|
|
*/
|
|
static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
|
|
gfn_t gfn, struct folio *folio)
|
|
{
|
|
unsigned long nr_pages, i;
|
|
pgoff_t index;
|
|
int r;
|
|
|
|
nr_pages = folio_nr_pages(folio);
|
|
for (i = 0; i < nr_pages; i++)
|
|
clear_highpage(folio_page(folio, i));
|
|
|
|
/*
|
|
* Preparing huge folios should always be safe, since it should
|
|
* be possible to split them later if needed.
|
|
*
|
|
* Right now the folio order is always going to be zero, but the
|
|
* code is ready for huge folios. The only assumption is that
|
|
* the base pgoff of memslots is naturally aligned with the
|
|
* requested page order, ensuring that huge folios can also use
|
|
* huge page table entries for GPA->HPA mapping.
|
|
*
|
|
* The order will be passed when creating the guest_memfd, and
|
|
* checked when creating memslots.
|
|
*/
|
|
WARN_ON(!IS_ALIGNED(slot->gmem.pgoff, 1 << folio_order(folio)));
|
|
index = gfn - slot->base_gfn + slot->gmem.pgoff;
|
|
index = ALIGN_DOWN(index, 1 << folio_order(folio));
|
|
r = __kvm_gmem_prepare_folio(kvm, slot, index, folio);
|
|
if (!r)
|
|
kvm_gmem_mark_prepared(folio);
|
|
|
|
return r;
|
|
}
|
|
|
|
/*
|
|
* Returns a locked folio on success. The caller is responsible for
|
|
* setting the up-to-date flag before the memory is mapped into the guest.
|
|
* There is no backing storage for the memory, so the folio will remain
|
|
* up-to-date until it's removed.
|
|
*
|
|
* Ignore accessed, referenced, and dirty flags. The memory is
|
|
* unevictable and there is no storage to write back to.
|
|
*/
|
|
static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
|
|
{
|
|
/* TODO: Support huge pages. */
|
|
return filemap_grab_folio(inode->i_mapping, index);
|
|
}
|
|
|
|
static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
|
|
pgoff_t end)
|
|
{
|
|
bool flush = false, found_memslot = false;
|
|
struct kvm_memory_slot *slot;
|
|
struct kvm *kvm = gmem->kvm;
|
|
unsigned long index;
|
|
|
|
xa_for_each_range(&gmem->bindings, index, slot, start, end - 1) {
|
|
pgoff_t pgoff = slot->gmem.pgoff;
|
|
|
|
struct kvm_gfn_range gfn_range = {
|
|
.start = slot->base_gfn + max(pgoff, start) - pgoff,
|
|
.end = slot->base_gfn + min(pgoff + slot->npages, end) - pgoff,
|
|
.slot = slot,
|
|
.may_block = true,
|
|
/* guest memfd is relevant to only private mappings. */
|
|
.attr_filter = KVM_FILTER_PRIVATE,
|
|
};
|
|
|
|
if (!found_memslot) {
|
|
found_memslot = true;
|
|
|
|
KVM_MMU_LOCK(kvm);
|
|
kvm_mmu_invalidate_begin(kvm);
|
|
}
|
|
|
|
flush |= kvm_mmu_unmap_gfn_range(kvm, &gfn_range);
|
|
}
|
|
|
|
if (flush)
|
|
kvm_flush_remote_tlbs(kvm);
|
|
|
|
if (found_memslot)
|
|
KVM_MMU_UNLOCK(kvm);
|
|
}
|
|
|
|
static void kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start,
|
|
pgoff_t end)
|
|
{
|
|
struct kvm *kvm = gmem->kvm;
|
|
|
|
if (xa_find(&gmem->bindings, &start, end - 1, XA_PRESENT)) {
|
|
KVM_MMU_LOCK(kvm);
|
|
kvm_mmu_invalidate_end(kvm);
|
|
KVM_MMU_UNLOCK(kvm);
|
|
}
|
|
}
|
|
|
|
static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len)
|
|
{
|
|
struct list_head *gmem_list = &inode->i_mapping->i_private_list;
|
|
pgoff_t start = offset >> PAGE_SHIFT;
|
|
pgoff_t end = (offset + len) >> PAGE_SHIFT;
|
|
struct kvm_gmem *gmem;
|
|
|
|
/*
|
|
* Bindings must be stable across invalidation to ensure the start+end
|
|
* are balanced.
|
|
*/
|
|
filemap_invalidate_lock(inode->i_mapping);
|
|
|
|
list_for_each_entry(gmem, gmem_list, entry)
|
|
kvm_gmem_invalidate_begin(gmem, start, end);
|
|
|
|
truncate_inode_pages_range(inode->i_mapping, offset, offset + len - 1);
|
|
|
|
list_for_each_entry(gmem, gmem_list, entry)
|
|
kvm_gmem_invalidate_end(gmem, start, end);
|
|
|
|
filemap_invalidate_unlock(inode->i_mapping);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len)
|
|
{
|
|
struct address_space *mapping = inode->i_mapping;
|
|
pgoff_t start, index, end;
|
|
int r;
|
|
|
|
/* Dedicated guest is immutable by default. */
|
|
if (offset + len > i_size_read(inode))
|
|
return -EINVAL;
|
|
|
|
filemap_invalidate_lock_shared(mapping);
|
|
|
|
start = offset >> PAGE_SHIFT;
|
|
end = (offset + len) >> PAGE_SHIFT;
|
|
|
|
r = 0;
|
|
for (index = start; index < end; ) {
|
|
struct folio *folio;
|
|
|
|
if (signal_pending(current)) {
|
|
r = -EINTR;
|
|
break;
|
|
}
|
|
|
|
folio = kvm_gmem_get_folio(inode, index);
|
|
if (IS_ERR(folio)) {
|
|
r = PTR_ERR(folio);
|
|
break;
|
|
}
|
|
|
|
index = folio_next_index(folio);
|
|
|
|
folio_unlock(folio);
|
|
folio_put(folio);
|
|
|
|
/* 64-bit only, wrapping the index should be impossible. */
|
|
if (WARN_ON_ONCE(!index))
|
|
break;
|
|
|
|
cond_resched();
|
|
}
|
|
|
|
filemap_invalidate_unlock_shared(mapping);
|
|
|
|
return r;
|
|
}
|
|
|
|
static long kvm_gmem_fallocate(struct file *file, int mode, loff_t offset,
|
|
loff_t len)
|
|
{
|
|
int ret;
|
|
|
|
if (!(mode & FALLOC_FL_KEEP_SIZE))
|
|
return -EOPNOTSUPP;
|
|
|
|
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
|
|
return -EOPNOTSUPP;
|
|
|
|
if (!PAGE_ALIGNED(offset) || !PAGE_ALIGNED(len))
|
|
return -EINVAL;
|
|
|
|
if (mode & FALLOC_FL_PUNCH_HOLE)
|
|
ret = kvm_gmem_punch_hole(file_inode(file), offset, len);
|
|
else
|
|
ret = kvm_gmem_allocate(file_inode(file), offset, len);
|
|
|
|
if (!ret)
|
|
file_modified(file);
|
|
return ret;
|
|
}
|
|
|
|
static int kvm_gmem_release(struct inode *inode, struct file *file)
|
|
{
|
|
struct kvm_gmem *gmem = file->private_data;
|
|
struct kvm_memory_slot *slot;
|
|
struct kvm *kvm = gmem->kvm;
|
|
unsigned long index;
|
|
|
|
/*
|
|
* Prevent concurrent attempts to *unbind* a memslot. This is the last
|
|
* reference to the file and thus no new bindings can be created, but
|
|
* dereferencing the slot for existing bindings needs to be protected
|
|
* against memslot updates, specifically so that unbind doesn't race
|
|
* and free the memslot (kvm_gmem_get_file() will return NULL).
|
|
*
|
|
* Since .release is called only when the reference count is zero,
|
|
* after which file_ref_get() and get_file_active() fail,
|
|
* kvm_gmem_get_pfn() cannot be using the file concurrently.
|
|
* file_ref_put() provides a full barrier, and get_file_active() the
|
|
* matching acquire barrier.
|
|
*/
|
|
mutex_lock(&kvm->slots_lock);
|
|
|
|
filemap_invalidate_lock(inode->i_mapping);
|
|
|
|
xa_for_each(&gmem->bindings, index, slot)
|
|
WRITE_ONCE(slot->gmem.file, NULL);
|
|
|
|
/*
|
|
* All in-flight operations are gone and new bindings can be created.
|
|
* Zap all SPTEs pointed at by this file. Do not free the backing
|
|
* memory, as its lifetime is associated with the inode, not the file.
|
|
*/
|
|
kvm_gmem_invalidate_begin(gmem, 0, -1ul);
|
|
kvm_gmem_invalidate_end(gmem, 0, -1ul);
|
|
|
|
list_del(&gmem->entry);
|
|
|
|
filemap_invalidate_unlock(inode->i_mapping);
|
|
|
|
mutex_unlock(&kvm->slots_lock);
|
|
|
|
xa_destroy(&gmem->bindings);
|
|
kfree(gmem);
|
|
|
|
kvm_put_kvm(kvm);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline struct file *kvm_gmem_get_file(struct kvm_memory_slot *slot)
|
|
{
|
|
/*
|
|
* Do not return slot->gmem.file if it has already been closed;
|
|
* there might be some time between the last fput() and when
|
|
* kvm_gmem_release() clears slot->gmem.file.
|
|
*/
|
|
return get_file_active(&slot->gmem.file);
|
|
}
|
|
|
|
static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
|
|
{
|
|
return gfn - slot->base_gfn + slot->gmem.pgoff;
|
|
}
|
|
|
|
static struct file_operations kvm_gmem_fops = {
|
|
.open = generic_file_open,
|
|
.release = kvm_gmem_release,
|
|
.fallocate = kvm_gmem_fallocate,
|
|
};
|
|
|
|
void kvm_gmem_init(struct module *module)
|
|
{
|
|
kvm_gmem_fops.owner = module;
|
|
}
|
|
|
|
static int kvm_gmem_migrate_folio(struct address_space *mapping,
|
|
struct folio *dst, struct folio *src,
|
|
enum migrate_mode mode)
|
|
{
|
|
WARN_ON_ONCE(1);
|
|
return -EINVAL;
|
|
}
|
|
|
|
static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *folio)
|
|
{
|
|
struct list_head *gmem_list = &mapping->i_private_list;
|
|
struct kvm_gmem *gmem;
|
|
pgoff_t start, end;
|
|
|
|
filemap_invalidate_lock_shared(mapping);
|
|
|
|
start = folio->index;
|
|
end = start + folio_nr_pages(folio);
|
|
|
|
list_for_each_entry(gmem, gmem_list, entry)
|
|
kvm_gmem_invalidate_begin(gmem, start, end);
|
|
|
|
/*
|
|
* Do not truncate the range, what action is taken in response to the
|
|
* error is userspace's decision (assuming the architecture supports
|
|
* gracefully handling memory errors). If/when the guest attempts to
|
|
* access a poisoned page, kvm_gmem_get_pfn() will return -EHWPOISON,
|
|
* at which point KVM can either terminate the VM or propagate the
|
|
* error to userspace.
|
|
*/
|
|
|
|
list_for_each_entry(gmem, gmem_list, entry)
|
|
kvm_gmem_invalidate_end(gmem, start, end);
|
|
|
|
filemap_invalidate_unlock_shared(mapping);
|
|
|
|
return MF_DELAYED;
|
|
}
|
|
|
|
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
|
|
static void kvm_gmem_free_folio(struct folio *folio)
|
|
{
|
|
struct page *page = folio_page(folio, 0);
|
|
kvm_pfn_t pfn = page_to_pfn(page);
|
|
int order = folio_order(folio);
|
|
|
|
kvm_arch_gmem_invalidate(pfn, pfn + (1ul << order));
|
|
}
|
|
#endif
|
|
|
|
static const struct address_space_operations kvm_gmem_aops = {
|
|
.dirty_folio = noop_dirty_folio,
|
|
.migrate_folio = kvm_gmem_migrate_folio,
|
|
.error_remove_folio = kvm_gmem_error_folio,
|
|
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
|
|
.free_folio = kvm_gmem_free_folio,
|
|
#endif
|
|
};
|
|
|
|
static int kvm_gmem_getattr(struct mnt_idmap *idmap, const struct path *path,
|
|
struct kstat *stat, u32 request_mask,
|
|
unsigned int query_flags)
|
|
{
|
|
struct inode *inode = path->dentry->d_inode;
|
|
|
|
generic_fillattr(idmap, request_mask, inode, stat);
|
|
return 0;
|
|
}
|
|
|
|
static int kvm_gmem_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
|
|
struct iattr *attr)
|
|
{
|
|
return -EINVAL;
|
|
}
|
|
static const struct inode_operations kvm_gmem_iops = {
|
|
.getattr = kvm_gmem_getattr,
|
|
.setattr = kvm_gmem_setattr,
|
|
};
|
|
|
|
static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
|
|
{
|
|
const char *anon_name = "[kvm-gmem]";
|
|
struct kvm_gmem *gmem;
|
|
struct inode *inode;
|
|
struct file *file;
|
|
int fd, err;
|
|
|
|
fd = get_unused_fd_flags(0);
|
|
if (fd < 0)
|
|
return fd;
|
|
|
|
gmem = kzalloc(sizeof(*gmem), GFP_KERNEL);
|
|
if (!gmem) {
|
|
err = -ENOMEM;
|
|
goto err_fd;
|
|
}
|
|
|
|
file = anon_inode_create_getfile(anon_name, &kvm_gmem_fops, gmem,
|
|
O_RDWR, NULL);
|
|
if (IS_ERR(file)) {
|
|
err = PTR_ERR(file);
|
|
goto err_gmem;
|
|
}
|
|
|
|
file->f_flags |= O_LARGEFILE;
|
|
|
|
inode = file->f_inode;
|
|
WARN_ON(file->f_mapping != inode->i_mapping);
|
|
|
|
inode->i_private = (void *)(unsigned long)flags;
|
|
inode->i_op = &kvm_gmem_iops;
|
|
inode->i_mapping->a_ops = &kvm_gmem_aops;
|
|
inode->i_mode |= S_IFREG;
|
|
inode->i_size = size;
|
|
mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
|
|
mapping_set_inaccessible(inode->i_mapping);
|
|
/* Unmovable mappings are supposed to be marked unevictable as well. */
|
|
WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
|
|
|
|
kvm_get_kvm(kvm);
|
|
gmem->kvm = kvm;
|
|
xa_init(&gmem->bindings);
|
|
list_add(&gmem->entry, &inode->i_mapping->i_private_list);
|
|
|
|
fd_install(fd, file);
|
|
return fd;
|
|
|
|
err_gmem:
|
|
kfree(gmem);
|
|
err_fd:
|
|
put_unused_fd(fd);
|
|
return err;
|
|
}
|
|
|
|
int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args)
|
|
{
|
|
loff_t size = args->size;
|
|
u64 flags = args->flags;
|
|
u64 valid_flags = 0;
|
|
|
|
if (flags & ~valid_flags)
|
|
return -EINVAL;
|
|
|
|
if (size <= 0 || !PAGE_ALIGNED(size))
|
|
return -EINVAL;
|
|
|
|
return __kvm_gmem_create(kvm, size, flags);
|
|
}
|
|
|
|
int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot,
|
|
unsigned int fd, loff_t offset)
|
|
{
|
|
loff_t size = slot->npages << PAGE_SHIFT;
|
|
unsigned long start, end;
|
|
struct kvm_gmem *gmem;
|
|
struct inode *inode;
|
|
struct file *file;
|
|
int r = -EINVAL;
|
|
|
|
BUILD_BUG_ON(sizeof(gfn_t) != sizeof(slot->gmem.pgoff));
|
|
|
|
file = fget(fd);
|
|
if (!file)
|
|
return -EBADF;
|
|
|
|
if (file->f_op != &kvm_gmem_fops)
|
|
goto err;
|
|
|
|
gmem = file->private_data;
|
|
if (gmem->kvm != kvm)
|
|
goto err;
|
|
|
|
inode = file_inode(file);
|
|
|
|
if (offset < 0 || !PAGE_ALIGNED(offset) ||
|
|
offset + size > i_size_read(inode))
|
|
goto err;
|
|
|
|
filemap_invalidate_lock(inode->i_mapping);
|
|
|
|
start = offset >> PAGE_SHIFT;
|
|
end = start + slot->npages;
|
|
|
|
if (!xa_empty(&gmem->bindings) &&
|
|
xa_find(&gmem->bindings, &start, end - 1, XA_PRESENT)) {
|
|
filemap_invalidate_unlock(inode->i_mapping);
|
|
goto err;
|
|
}
|
|
|
|
/*
|
|
* memslots of flag KVM_MEM_GUEST_MEMFD are immutable to change, so
|
|
* kvm_gmem_bind() must occur on a new memslot. Because the memslot
|
|
* is not visible yet, kvm_gmem_get_pfn() is guaranteed to see the file.
|
|
*/
|
|
WRITE_ONCE(slot->gmem.file, file);
|
|
slot->gmem.pgoff = start;
|
|
|
|
xa_store_range(&gmem->bindings, start, end - 1, slot, GFP_KERNEL);
|
|
filemap_invalidate_unlock(inode->i_mapping);
|
|
|
|
/*
|
|
* Drop the reference to the file, even on success. The file pins KVM,
|
|
* not the other way 'round. Active bindings are invalidated if the
|
|
* file is closed before memslots are destroyed.
|
|
*/
|
|
r = 0;
|
|
err:
|
|
fput(file);
|
|
return r;
|
|
}
|
|
|
|
static void __kvm_gmem_unbind(struct kvm_memory_slot *slot, struct kvm_gmem *gmem)
|
|
{
|
|
unsigned long start = slot->gmem.pgoff;
|
|
unsigned long end = start + slot->npages;
|
|
|
|
xa_store_range(&gmem->bindings, start, end - 1, NULL, GFP_KERNEL);
|
|
|
|
/*
|
|
* synchronize_srcu(&kvm->srcu) ensured that kvm_gmem_get_pfn()
|
|
* cannot see this memslot.
|
|
*/
|
|
WRITE_ONCE(slot->gmem.file, NULL);
|
|
}
|
|
|
|
void kvm_gmem_unbind(struct kvm_memory_slot *slot)
|
|
{
|
|
struct file *file;
|
|
|
|
/*
|
|
* Nothing to do if the underlying file was _already_ closed, as
|
|
* kvm_gmem_release() invalidates and nullifies all bindings.
|
|
*/
|
|
if (!slot->gmem.file)
|
|
return;
|
|
|
|
file = kvm_gmem_get_file(slot);
|
|
|
|
/*
|
|
* However, if the file is _being_ closed, then the bindings need to be
|
|
* removed as kvm_gmem_release() might not run until after the memslot
|
|
* is freed. Note, modifying the bindings is safe even though the file
|
|
* is dying as kvm_gmem_release() nullifies slot->gmem.file under
|
|
* slots_lock, and only puts its reference to KVM after destroying all
|
|
* bindings. I.e. reaching this point means kvm_gmem_release() hasn't
|
|
* yet destroyed the bindings or freed the gmem_file, and can't do so
|
|
* until the caller drops slots_lock.
|
|
*/
|
|
if (!file) {
|
|
__kvm_gmem_unbind(slot, slot->gmem.file->private_data);
|
|
return;
|
|
}
|
|
|
|
filemap_invalidate_lock(file->f_mapping);
|
|
__kvm_gmem_unbind(slot, file->private_data);
|
|
filemap_invalidate_unlock(file->f_mapping);
|
|
|
|
fput(file);
|
|
}
|
|
|
|
/* Returns a locked folio on success. */
|
|
static struct folio *__kvm_gmem_get_pfn(struct file *file,
|
|
struct kvm_memory_slot *slot,
|
|
pgoff_t index, kvm_pfn_t *pfn,
|
|
bool *is_prepared, int *max_order)
|
|
{
|
|
struct file *gmem_file = READ_ONCE(slot->gmem.file);
|
|
struct kvm_gmem *gmem = file->private_data;
|
|
struct folio *folio;
|
|
|
|
if (file != gmem_file) {
|
|
WARN_ON_ONCE(gmem_file);
|
|
return ERR_PTR(-EFAULT);
|
|
}
|
|
|
|
gmem = file->private_data;
|
|
if (xa_load(&gmem->bindings, index) != slot) {
|
|
WARN_ON_ONCE(xa_load(&gmem->bindings, index));
|
|
return ERR_PTR(-EIO);
|
|
}
|
|
|
|
folio = kvm_gmem_get_folio(file_inode(file), index);
|
|
if (IS_ERR(folio))
|
|
return folio;
|
|
|
|
if (folio_test_hwpoison(folio)) {
|
|
folio_unlock(folio);
|
|
folio_put(folio);
|
|
return ERR_PTR(-EHWPOISON);
|
|
}
|
|
|
|
*pfn = folio_file_pfn(folio, index);
|
|
if (max_order)
|
|
*max_order = 0;
|
|
|
|
*is_prepared = folio_test_uptodate(folio);
|
|
return folio;
|
|
}
|
|
|
|
int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
|
|
gfn_t gfn, kvm_pfn_t *pfn, int *max_order)
|
|
{
|
|
pgoff_t index = kvm_gmem_get_index(slot, gfn);
|
|
struct file *file = kvm_gmem_get_file(slot);
|
|
struct folio *folio;
|
|
bool is_prepared = false;
|
|
int r = 0;
|
|
|
|
if (!file)
|
|
return -EFAULT;
|
|
|
|
folio = __kvm_gmem_get_pfn(file, slot, index, pfn, &is_prepared, max_order);
|
|
if (IS_ERR(folio)) {
|
|
r = PTR_ERR(folio);
|
|
goto out;
|
|
}
|
|
|
|
if (!is_prepared)
|
|
r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio);
|
|
|
|
folio_unlock(folio);
|
|
if (r < 0)
|
|
folio_put(folio);
|
|
|
|
out:
|
|
fput(file);
|
|
return r;
|
|
}
|
|
EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn);
|
|
|
|
#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM
|
|
long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages,
|
|
kvm_gmem_populate_cb post_populate, void *opaque)
|
|
{
|
|
struct file *file;
|
|
struct kvm_memory_slot *slot;
|
|
void __user *p;
|
|
|
|
int ret = 0, max_order;
|
|
long i;
|
|
|
|
lockdep_assert_held(&kvm->slots_lock);
|
|
if (npages < 0)
|
|
return -EINVAL;
|
|
|
|
slot = gfn_to_memslot(kvm, start_gfn);
|
|
if (!kvm_slot_can_be_private(slot))
|
|
return -EINVAL;
|
|
|
|
file = kvm_gmem_get_file(slot);
|
|
if (!file)
|
|
return -EFAULT;
|
|
|
|
filemap_invalidate_lock(file->f_mapping);
|
|
|
|
npages = min_t(ulong, slot->npages - (start_gfn - slot->base_gfn), npages);
|
|
for (i = 0; i < npages; i += (1 << max_order)) {
|
|
struct folio *folio;
|
|
gfn_t gfn = start_gfn + i;
|
|
pgoff_t index = kvm_gmem_get_index(slot, gfn);
|
|
bool is_prepared = false;
|
|
kvm_pfn_t pfn;
|
|
|
|
if (signal_pending(current)) {
|
|
ret = -EINTR;
|
|
break;
|
|
}
|
|
|
|
folio = __kvm_gmem_get_pfn(file, slot, index, &pfn, &is_prepared, &max_order);
|
|
if (IS_ERR(folio)) {
|
|
ret = PTR_ERR(folio);
|
|
break;
|
|
}
|
|
|
|
if (is_prepared) {
|
|
folio_unlock(folio);
|
|
folio_put(folio);
|
|
ret = -EEXIST;
|
|
break;
|
|
}
|
|
|
|
folio_unlock(folio);
|
|
WARN_ON(!IS_ALIGNED(gfn, 1 << max_order) ||
|
|
(npages - i) < (1 << max_order));
|
|
|
|
ret = -EINVAL;
|
|
while (!kvm_range_has_memory_attributes(kvm, gfn, gfn + (1 << max_order),
|
|
KVM_MEMORY_ATTRIBUTE_PRIVATE,
|
|
KVM_MEMORY_ATTRIBUTE_PRIVATE)) {
|
|
if (!max_order)
|
|
goto put_folio_and_exit;
|
|
max_order--;
|
|
}
|
|
|
|
p = src ? src + i * PAGE_SIZE : NULL;
|
|
ret = post_populate(kvm, gfn, pfn, p, max_order, opaque);
|
|
if (!ret)
|
|
kvm_gmem_mark_prepared(folio);
|
|
|
|
put_folio_and_exit:
|
|
folio_put(folio);
|
|
if (ret)
|
|
break;
|
|
}
|
|
|
|
filemap_invalidate_unlock(file->f_mapping);
|
|
|
|
fput(file);
|
|
return ret && !i ? ret : i;
|
|
}
|
|
EXPORT_SYMBOL_GPL(kvm_gmem_populate);
|
|
#endif
|