lib/test_hmm: add zone device private THP test infrastructure

Enhance the hmm test driver (lib/test_hmm) with support for THP pages.

A new pool of free_folios() has now been added to the dmirror device,
which can be allocated when a request for a THP zone device private page
is made.

Add compound page awareness to the allocation function during normal
migration and fault based migration.  These routines also copy
folio_nr_pages() when moving data between system memory and device memory.

args.src and args.dst used to hold migration entries are now dynamically
allocated (as they need to hold HPAGE_PMD_NR entries or more).

Split and migrate support will be added in future patches in this series.

Link: https://lkml.kernel.org/r/20251001065707.920170-10-balbirs@nvidia.com
Signed-off-by: Balbir Singh <balbirs@nvidia.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Byungchul Park <byungchul@sk.com>
Cc: Gregory Price <gourry@gourry.net>
Cc: Ying Huang <ying.huang@linux.alibaba.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Nico Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Lyude Paul <lyude@redhat.com>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: David Airlie <airlied@gmail.com>
Cc: Simona Vetter <simona@ffwll.ch>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Mika Penttilä <mpenttil@redhat.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Balbir Singh
2025-10-01 16:57:00 +10:00
committed by Andrew Morton
parent 4964099163
commit 775465fd26
2 changed files with 304 additions and 76 deletions
+12
View File
@@ -177,6 +177,18 @@ static inline bool folio_is_pci_p2pdma(const struct folio *folio)
folio->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA;
}
static inline void *folio_zone_device_data(const struct folio *folio)
{
VM_WARN_ON_FOLIO(!folio_is_device_private(folio), folio);
return folio->page.zone_device_data;
}
static inline void folio_set_zone_device_data(struct folio *folio, void *data)
{
VM_WARN_ON_FOLIO(!folio_is_device_private(folio), folio);
folio->page.zone_device_data = data;
}
static inline bool is_pci_p2pdma_page(const struct page *page)
{
return IS_ENABLED(CONFIG_PCI_P2PDMA) &&
+292 -76
View File
@@ -119,6 +119,7 @@ struct dmirror_device {
unsigned long calloc;
unsigned long cfree;
struct page *free_pages;
struct folio *free_folios;
spinlock_t lock; /* protects the above */
};
@@ -492,7 +493,7 @@ fini:
}
static int dmirror_allocate_chunk(struct dmirror_device *mdevice,
struct page **ppage)
struct page **ppage, bool is_large)
{
struct dmirror_chunk *devmem;
struct resource *res = NULL;
@@ -572,20 +573,45 @@ static int dmirror_allocate_chunk(struct dmirror_device *mdevice,
pfn_first, pfn_last);
spin_lock(&mdevice->lock);
for (pfn = pfn_first; pfn < pfn_last; pfn++) {
for (pfn = pfn_first; pfn < pfn_last; ) {
struct page *page = pfn_to_page(pfn);
if (is_large && IS_ALIGNED(pfn, HPAGE_PMD_NR)
&& (pfn + HPAGE_PMD_NR <= pfn_last)) {
page->zone_device_data = mdevice->free_folios;
mdevice->free_folios = page_folio(page);
pfn += HPAGE_PMD_NR;
continue;
}
page->zone_device_data = mdevice->free_pages;
mdevice->free_pages = page;
pfn++;
}
ret = 0;
if (ppage) {
*ppage = mdevice->free_pages;
mdevice->free_pages = (*ppage)->zone_device_data;
mdevice->calloc++;
if (is_large) {
if (!mdevice->free_folios) {
ret = -ENOMEM;
goto err_unlock;
}
*ppage = folio_page(mdevice->free_folios, 0);
mdevice->free_folios = (*ppage)->zone_device_data;
mdevice->calloc += HPAGE_PMD_NR;
} else if (mdevice->free_pages) {
*ppage = mdevice->free_pages;
mdevice->free_pages = (*ppage)->zone_device_data;
mdevice->calloc++;
} else {
ret = -ENOMEM;
goto err_unlock;
}
}
err_unlock:
spin_unlock(&mdevice->lock);
return 0;
return ret;
err_release:
mutex_unlock(&mdevice->devmem_lock);
@@ -598,10 +624,13 @@ err_devmem:
return ret;
}
static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
static struct page *dmirror_devmem_alloc_page(struct dmirror *dmirror,
bool is_large)
{
struct page *dpage = NULL;
struct page *rpage = NULL;
unsigned int order = is_large ? HPAGE_PMD_ORDER : 0;
struct dmirror_device *mdevice = dmirror->mdevice;
/*
* For ZONE_DEVICE private type, this is a fake device so we allocate
@@ -610,49 +639,55 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
* data and ignore rpage.
*/
if (dmirror_is_private_zone(mdevice)) {
rpage = alloc_page(GFP_HIGHUSER);
rpage = folio_page(folio_alloc(GFP_HIGHUSER, order), 0);
if (!rpage)
return NULL;
}
spin_lock(&mdevice->lock);
if (mdevice->free_pages) {
if (is_large && mdevice->free_folios) {
dpage = folio_page(mdevice->free_folios, 0);
mdevice->free_folios = dpage->zone_device_data;
mdevice->calloc += 1 << order;
spin_unlock(&mdevice->lock);
} else if (!is_large && mdevice->free_pages) {
dpage = mdevice->free_pages;
mdevice->free_pages = dpage->zone_device_data;
mdevice->calloc++;
spin_unlock(&mdevice->lock);
} else {
spin_unlock(&mdevice->lock);
if (dmirror_allocate_chunk(mdevice, &dpage))
if (dmirror_allocate_chunk(mdevice, &dpage, is_large))
goto error;
}
zone_device_page_init(dpage, 0);
zone_device_folio_init(page_folio(dpage), order);
dpage->zone_device_data = rpage;
return dpage;
error:
if (rpage)
__free_page(rpage);
__free_pages(rpage, order);
return NULL;
}
static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
struct dmirror *dmirror)
{
struct dmirror_device *mdevice = dmirror->mdevice;
const unsigned long *src = args->src;
unsigned long *dst = args->dst;
unsigned long addr;
for (addr = args->start; addr < args->end; addr += PAGE_SIZE,
src++, dst++) {
for (addr = args->start; addr < args->end; ) {
struct page *spage;
struct page *dpage;
struct page *rpage;
bool is_large = *src & MIGRATE_PFN_COMPOUND;
int write = (*src & MIGRATE_PFN_WRITE) ? MIGRATE_PFN_WRITE : 0;
unsigned long nr = 1;
if (!(*src & MIGRATE_PFN_MIGRATE))
continue;
goto next;
/*
* Note that spage might be NULL which is OK since it is an
@@ -662,17 +697,45 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
if (WARN(spage && is_zone_device_page(spage),
"page already in device spage pfn: 0x%lx\n",
page_to_pfn(spage)))
continue;
goto next;
dpage = dmirror_devmem_alloc_page(mdevice);
if (!dpage)
dpage = dmirror_devmem_alloc_page(dmirror, is_large);
if (!dpage) {
struct folio *folio;
unsigned long i;
unsigned long spfn = *src >> MIGRATE_PFN_SHIFT;
struct page *src_page;
if (!is_large)
goto next;
if (!spage && is_large) {
nr = HPAGE_PMD_NR;
} else {
folio = page_folio(spage);
nr = folio_nr_pages(folio);
}
for (i = 0; i < nr && addr < args->end; i++) {
dpage = dmirror_devmem_alloc_page(dmirror, false);
rpage = BACKING_PAGE(dpage);
rpage->zone_device_data = dmirror;
*dst = migrate_pfn(page_to_pfn(dpage)) | write;
src_page = pfn_to_page(spfn + i);
if (spage)
copy_highpage(rpage, src_page);
else
clear_highpage(rpage);
src++;
dst++;
addr += PAGE_SIZE;
}
continue;
}
rpage = BACKING_PAGE(dpage);
if (spage)
copy_highpage(rpage, spage);
else
clear_highpage(rpage);
/*
* Normally, a device would use the page->zone_device_data to
@@ -684,10 +747,42 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 0x%lx\n",
page_to_pfn(spage), page_to_pfn(dpage));
*dst = migrate_pfn(page_to_pfn(dpage));
if ((*src & MIGRATE_PFN_WRITE) ||
(!spage && args->vma->vm_flags & VM_WRITE))
*dst |= MIGRATE_PFN_WRITE;
*dst = migrate_pfn(page_to_pfn(dpage)) | write;
if (is_large) {
int i;
struct folio *folio = page_folio(dpage);
*dst |= MIGRATE_PFN_COMPOUND;
if (folio_test_large(folio)) {
for (i = 0; i < folio_nr_pages(folio); i++) {
struct page *dst_page =
pfn_to_page(page_to_pfn(rpage) + i);
struct page *src_page =
pfn_to_page(page_to_pfn(spage) + i);
if (spage)
copy_highpage(dst_page, src_page);
else
clear_highpage(dst_page);
src++;
dst++;
addr += PAGE_SIZE;
}
continue;
}
}
if (spage)
copy_highpage(rpage, spage);
else
clear_highpage(rpage);
next:
src++;
dst++;
addr += PAGE_SIZE;
}
}
@@ -734,14 +829,17 @@ static int dmirror_migrate_finalize_and_map(struct migrate_vma *args,
const unsigned long *src = args->src;
const unsigned long *dst = args->dst;
unsigned long pfn;
const unsigned long start_pfn = start >> PAGE_SHIFT;
const unsigned long end_pfn = end >> PAGE_SHIFT;
/* Map the migrated pages into the device's page tables. */
mutex_lock(&dmirror->mutex);
for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++,
src++, dst++) {
for (pfn = start_pfn; pfn < end_pfn; pfn++, src++, dst++) {
struct page *dpage;
void *entry;
int nr, i;
struct page *rpage;
if (!(*src & MIGRATE_PFN_MIGRATE))
continue;
@@ -750,13 +848,25 @@ static int dmirror_migrate_finalize_and_map(struct migrate_vma *args,
if (!dpage)
continue;
entry = BACKING_PAGE(dpage);
if (*dst & MIGRATE_PFN_WRITE)
entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE);
entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
if (xa_is_err(entry)) {
mutex_unlock(&dmirror->mutex);
return xa_err(entry);
if (*dst & MIGRATE_PFN_COMPOUND)
nr = folio_nr_pages(page_folio(dpage));
else
nr = 1;
WARN_ON_ONCE(end_pfn < start_pfn + nr);
rpage = BACKING_PAGE(dpage);
VM_WARN_ON(folio_nr_pages(page_folio(rpage)) != nr);
for (i = 0; i < nr; i++) {
entry = folio_page(page_folio(rpage), i);
if (*dst & MIGRATE_PFN_WRITE)
entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE);
entry = xa_store(&dmirror->pt, pfn + i, entry, GFP_ATOMIC);
if (xa_is_err(entry)) {
mutex_unlock(&dmirror->mutex);
return xa_err(entry);
}
}
}
@@ -829,31 +939,66 @@ static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
unsigned long start = args->start;
unsigned long end = args->end;
unsigned long addr;
unsigned int order = 0;
int i;
for (addr = start; addr < end; addr += PAGE_SIZE,
src++, dst++) {
for (addr = start; addr < end; ) {
struct page *dpage, *spage;
spage = migrate_pfn_to_page(*src);
if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
continue;
if (!spage || !(*src & MIGRATE_PFN_MIGRATE)) {
addr += PAGE_SIZE;
goto next;
}
if (WARN_ON(!is_device_private_page(spage) &&
!is_device_coherent_page(spage)))
continue;
spage = BACKING_PAGE(spage);
dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
if (!dpage)
continue;
pr_debug("migrating from dev to sys pfn src: 0x%lx pfn dst: 0x%lx\n",
page_to_pfn(spage), page_to_pfn(dpage));
!is_device_coherent_page(spage))) {
addr += PAGE_SIZE;
goto next;
}
spage = BACKING_PAGE(spage);
order = folio_order(page_folio(spage));
if (order)
dpage = folio_page(vma_alloc_folio(GFP_HIGHUSER_MOVABLE,
order, args->vma, addr), 0);
else
dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
/* Try with smaller pages if large allocation fails */
if (!dpage && order) {
dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
if (!dpage)
return VM_FAULT_OOM;
order = 0;
}
pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 0x%lx\n",
page_to_pfn(spage), page_to_pfn(dpage));
lock_page(dpage);
xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
copy_highpage(dpage, spage);
*dst = migrate_pfn(page_to_pfn(dpage));
if (*src & MIGRATE_PFN_WRITE)
*dst |= MIGRATE_PFN_WRITE;
if (order)
*dst |= MIGRATE_PFN_COMPOUND;
for (i = 0; i < (1 << order); i++) {
struct page *src_page;
struct page *dst_page;
src_page = pfn_to_page(page_to_pfn(spage) + i);
dst_page = pfn_to_page(page_to_pfn(dpage) + i);
xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
copy_highpage(dst_page, src_page);
}
next:
addr += PAGE_SIZE << order;
src += 1 << order;
dst += 1 << order;
}
return 0;
}
@@ -879,11 +1024,14 @@ static int dmirror_migrate_to_system(struct dmirror *dmirror,
unsigned long size = cmd->npages << PAGE_SHIFT;
struct mm_struct *mm = dmirror->notifier.mm;
struct vm_area_struct *vma;
unsigned long src_pfns[32] = { 0 };
unsigned long dst_pfns[32] = { 0 };
struct migrate_vma args = { 0 };
unsigned long next;
int ret;
unsigned long *src_pfns;
unsigned long *dst_pfns;
src_pfns = kvcalloc(PTRS_PER_PTE, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL);
dst_pfns = kvcalloc(PTRS_PER_PTE, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL);
start = cmd->addr;
end = start + size;
@@ -902,7 +1050,7 @@ static int dmirror_migrate_to_system(struct dmirror *dmirror,
ret = -EINVAL;
goto out;
}
next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT));
next = min(end, addr + (PTRS_PER_PTE << PAGE_SHIFT));
if (next > vma->vm_end)
next = vma->vm_end;
@@ -912,7 +1060,7 @@ static int dmirror_migrate_to_system(struct dmirror *dmirror,
args.start = addr;
args.end = next;
args.pgmap_owner = dmirror->mdevice;
args.flags = dmirror_select_device(dmirror);
args.flags = dmirror_select_device(dmirror) | MIGRATE_VMA_SELECT_COMPOUND;
ret = migrate_vma_setup(&args);
if (ret)
@@ -928,6 +1076,8 @@ static int dmirror_migrate_to_system(struct dmirror *dmirror,
out:
mmap_read_unlock(mm);
mmput(mm);
kvfree(src_pfns);
kvfree(dst_pfns);
return ret;
}
@@ -939,12 +1089,12 @@ static int dmirror_migrate_to_device(struct dmirror *dmirror,
unsigned long size = cmd->npages << PAGE_SHIFT;
struct mm_struct *mm = dmirror->notifier.mm;
struct vm_area_struct *vma;
unsigned long src_pfns[32] = { 0 };
unsigned long dst_pfns[32] = { 0 };
struct dmirror_bounce bounce;
struct migrate_vma args = { 0 };
unsigned long next;
int ret;
unsigned long *src_pfns = NULL;
unsigned long *dst_pfns = NULL;
start = cmd->addr;
end = start + size;
@@ -955,6 +1105,18 @@ static int dmirror_migrate_to_device(struct dmirror *dmirror,
if (!mmget_not_zero(mm))
return -EINVAL;
ret = -ENOMEM;
src_pfns = kvcalloc(PTRS_PER_PTE, sizeof(*src_pfns),
GFP_KERNEL | __GFP_NOFAIL);
if (!src_pfns)
goto free_mem;
dst_pfns = kvcalloc(PTRS_PER_PTE, sizeof(*dst_pfns),
GFP_KERNEL | __GFP_NOFAIL);
if (!dst_pfns)
goto free_mem;
ret = 0;
mmap_read_lock(mm);
for (addr = start; addr < end; addr = next) {
vma = vma_lookup(mm, addr);
@@ -962,7 +1124,7 @@ static int dmirror_migrate_to_device(struct dmirror *dmirror,
ret = -EINVAL;
goto out;
}
next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT));
next = min(end, addr + (PTRS_PER_PTE << PAGE_SHIFT));
if (next > vma->vm_end)
next = vma->vm_end;
@@ -972,7 +1134,8 @@ static int dmirror_migrate_to_device(struct dmirror *dmirror,
args.start = addr;
args.end = next;
args.pgmap_owner = dmirror->mdevice;
args.flags = MIGRATE_VMA_SELECT_SYSTEM;
args.flags = MIGRATE_VMA_SELECT_SYSTEM |
MIGRATE_VMA_SELECT_COMPOUND;
ret = migrate_vma_setup(&args);
if (ret)
goto out;
@@ -992,7 +1155,7 @@ static int dmirror_migrate_to_device(struct dmirror *dmirror,
*/
ret = dmirror_bounce_init(&bounce, start, size);
if (ret)
return ret;
goto free_mem;
mutex_lock(&dmirror->mutex);
ret = dmirror_do_read(dmirror, start, end, &bounce);
mutex_unlock(&dmirror->mutex);
@@ -1003,11 +1166,14 @@ static int dmirror_migrate_to_device(struct dmirror *dmirror,
}
cmd->cpages = bounce.cpages;
dmirror_bounce_fini(&bounce);
return ret;
goto free_mem;
out:
mmap_read_unlock(mm);
mmput(mm);
free_mem:
kfree(src_pfns);
kfree(dst_pfns);
return ret;
}
@@ -1200,6 +1366,7 @@ static void dmirror_device_evict_chunk(struct dmirror_chunk *chunk)
unsigned long i;
unsigned long *src_pfns;
unsigned long *dst_pfns;
unsigned int order = 0;
src_pfns = kvcalloc(npages, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL);
dst_pfns = kvcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL);
@@ -1215,13 +1382,25 @@ static void dmirror_device_evict_chunk(struct dmirror_chunk *chunk)
if (WARN_ON(!is_device_private_page(spage) &&
!is_device_coherent_page(spage)))
continue;
order = folio_order(page_folio(spage));
spage = BACKING_PAGE(spage);
dpage = alloc_page(GFP_HIGHUSER_MOVABLE | __GFP_NOFAIL);
if (src_pfns[i] & MIGRATE_PFN_COMPOUND) {
dpage = folio_page(folio_alloc(GFP_HIGHUSER_MOVABLE,
order), 0);
} else {
dpage = alloc_page(GFP_HIGHUSER_MOVABLE | __GFP_NOFAIL);
order = 0;
}
/* TODO Support splitting here */
lock_page(dpage);
copy_highpage(dpage, spage);
dst_pfns[i] = migrate_pfn(page_to_pfn(dpage));
if (src_pfns[i] & MIGRATE_PFN_WRITE)
dst_pfns[i] |= MIGRATE_PFN_WRITE;
if (order)
dst_pfns[i] |= MIGRATE_PFN_COMPOUND;
folio_copy(page_folio(dpage), page_folio(spage));
}
migrate_device_pages(src_pfns, dst_pfns, npages);
migrate_device_finalize(src_pfns, dst_pfns, npages);
@@ -1234,7 +1413,12 @@ static void dmirror_remove_free_pages(struct dmirror_chunk *devmem)
{
struct dmirror_device *mdevice = devmem->mdevice;
struct page *page;
struct folio *folio;
for (folio = mdevice->free_folios; folio; folio = folio_zone_device_data(folio))
if (dmirror_page_to_chunk(folio_page(folio, 0)) == devmem)
mdevice->free_folios = folio_zone_device_data(folio);
for (page = mdevice->free_pages; page; page = page->zone_device_data)
if (dmirror_page_to_chunk(page) == devmem)
mdevice->free_pages = page->zone_device_data;
@@ -1265,6 +1449,7 @@ static void dmirror_device_remove_chunks(struct dmirror_device *mdevice)
mdevice->devmem_count = 0;
mdevice->devmem_capacity = 0;
mdevice->free_pages = NULL;
mdevice->free_folios = NULL;
kfree(mdevice->devmem_chunks);
mdevice->devmem_chunks = NULL;
}
@@ -1379,18 +1564,30 @@ static void dmirror_devmem_free(struct folio *folio)
struct page *page = &folio->page;
struct page *rpage = BACKING_PAGE(page);
struct dmirror_device *mdevice;
struct folio *rfolio = page_folio(rpage);
unsigned int order = folio_order(rfolio);
if (rpage != page)
__free_page(rpage);
if (rpage != page) {
if (order)
__free_pages(rpage, order);
else
__free_page(rpage);
rpage = NULL;
}
mdevice = dmirror_page_to_device(page);
spin_lock(&mdevice->lock);
/* Return page to our allocator if not freeing the chunk */
if (!dmirror_page_to_chunk(page)->remove) {
mdevice->cfree++;
page->zone_device_data = mdevice->free_pages;
mdevice->free_pages = page;
mdevice->cfree += 1 << order;
if (order) {
page->zone_device_data = mdevice->free_folios;
mdevice->free_folios = page_folio(page);
} else {
page->zone_device_data = mdevice->free_pages;
mdevice->free_pages = page;
}
}
spin_unlock(&mdevice->lock);
}
@@ -1398,36 +1595,52 @@ static void dmirror_devmem_free(struct folio *folio)
static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
{
struct migrate_vma args = { 0 };
unsigned long src_pfns = 0;
unsigned long dst_pfns = 0;
struct page *rpage;
struct dmirror *dmirror;
vm_fault_t ret;
vm_fault_t ret = 0;
unsigned int order, nr;
/*
* Normally, a device would use the page->zone_device_data to point to
* the mirror but here we use it to hold the page for the simulated
* device memory and that page holds the pointer to the mirror.
*/
rpage = vmf->page->zone_device_data;
rpage = folio_zone_device_data(page_folio(vmf->page));
dmirror = rpage->zone_device_data;
/* FIXME demonstrate how we can adjust migrate range */
order = folio_order(page_folio(vmf->page));
nr = 1 << order;
/*
* Consider a per-cpu cache of src and dst pfns, but with
* large number of cpus that might not scale well.
*/
args.start = ALIGN_DOWN(vmf->address, (PAGE_SIZE << order));
args.vma = vmf->vma;
args.start = vmf->address;
args.end = args.start + PAGE_SIZE;
args.src = &src_pfns;
args.dst = &dst_pfns;
args.end = args.start + (PAGE_SIZE << order);
nr = (args.end - args.start) >> PAGE_SHIFT;
args.src = kcalloc(nr, sizeof(unsigned long), GFP_KERNEL);
args.dst = kcalloc(nr, sizeof(unsigned long), GFP_KERNEL);
args.pgmap_owner = dmirror->mdevice;
args.flags = dmirror_select_device(dmirror);
args.fault_page = vmf->page;
if (!args.src || !args.dst) {
ret = VM_FAULT_OOM;
goto err;
}
if (order)
args.flags |= MIGRATE_VMA_SELECT_COMPOUND;
if (migrate_vma_setup(&args))
return VM_FAULT_SIGBUS;
ret = dmirror_devmem_fault_alloc_and_copy(&args, dmirror);
if (ret)
return ret;
goto err;
migrate_vma_pages(&args);
/*
* No device finalize step is needed since
@@ -1435,7 +1648,10 @@ static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
* invalidated the device page table.
*/
migrate_vma_finalize(&args);
return 0;
err:
kfree(args.src);
kfree(args.dst);
return ret;
}
static const struct dev_pagemap_ops dmirror_devmem_ops = {
@@ -1466,7 +1682,7 @@ static int dmirror_device_init(struct dmirror_device *mdevice, int id)
return ret;
/* Build a list of free ZONE_DEVICE struct pages */
return dmirror_allocate_chunk(mdevice, NULL);
return dmirror_allocate_chunk(mdevice, NULL, false);
}
static void dmirror_device_remove(struct dmirror_device *mdevice)