mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2026-06-21 15:43:21 +02:00
lib/test_hmm: add zone device private THP test infrastructure
Enhance the hmm test driver (lib/test_hmm) with support for THP pages. A new pool of free_folios() has now been added to the dmirror device, which can be allocated when a request for a THP zone device private page is made. Add compound page awareness to the allocation function during normal migration and fault based migration. These routines also copy folio_nr_pages() when moving data between system memory and device memory. args.src and args.dst used to hold migration entries are now dynamically allocated (as they need to hold HPAGE_PMD_NR entries or more). Split and migrate support will be added in future patches in this series. Link: https://lkml.kernel.org/r/20251001065707.920170-10-balbirs@nvidia.com Signed-off-by: Balbir Singh <balbirs@nvidia.com> Cc: David Hildenbrand <david@redhat.com> Cc: Zi Yan <ziy@nvidia.com> Cc: Joshua Hahn <joshua.hahnjy@gmail.com> Cc: Rakie Kim <rakie.kim@sk.com> Cc: Byungchul Park <byungchul@sk.com> Cc: Gregory Price <gourry@gourry.net> Cc: Ying Huang <ying.huang@linux.alibaba.com> Cc: Alistair Popple <apopple@nvidia.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Cc: Baolin Wang <baolin.wang@linux.alibaba.com> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com> Cc: Nico Pache <npache@redhat.com> Cc: Ryan Roberts <ryan.roberts@arm.com> Cc: Dev Jain <dev.jain@arm.com> Cc: Barry Song <baohua@kernel.org> Cc: Lyude Paul <lyude@redhat.com> Cc: Danilo Krummrich <dakr@kernel.org> Cc: David Airlie <airlied@gmail.com> Cc: Simona Vetter <simona@ffwll.ch> Cc: Ralph Campbell <rcampbell@nvidia.com> Cc: Mika Penttilä <mpenttil@redhat.com> Cc: Matthew Brost <matthew.brost@intel.com> Cc: Francois Dugast <francois.dugast@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
committed by
Andrew Morton
parent
4964099163
commit
775465fd26
@@ -177,6 +177,18 @@ static inline bool folio_is_pci_p2pdma(const struct folio *folio)
|
||||
folio->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA;
|
||||
}
|
||||
|
||||
static inline void *folio_zone_device_data(const struct folio *folio)
|
||||
{
|
||||
VM_WARN_ON_FOLIO(!folio_is_device_private(folio), folio);
|
||||
return folio->page.zone_device_data;
|
||||
}
|
||||
|
||||
static inline void folio_set_zone_device_data(struct folio *folio, void *data)
|
||||
{
|
||||
VM_WARN_ON_FOLIO(!folio_is_device_private(folio), folio);
|
||||
folio->page.zone_device_data = data;
|
||||
}
|
||||
|
||||
static inline bool is_pci_p2pdma_page(const struct page *page)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_PCI_P2PDMA) &&
|
||||
|
||||
+292
-76
@@ -119,6 +119,7 @@ struct dmirror_device {
|
||||
unsigned long calloc;
|
||||
unsigned long cfree;
|
||||
struct page *free_pages;
|
||||
struct folio *free_folios;
|
||||
spinlock_t lock; /* protects the above */
|
||||
};
|
||||
|
||||
@@ -492,7 +493,7 @@ fini:
|
||||
}
|
||||
|
||||
static int dmirror_allocate_chunk(struct dmirror_device *mdevice,
|
||||
struct page **ppage)
|
||||
struct page **ppage, bool is_large)
|
||||
{
|
||||
struct dmirror_chunk *devmem;
|
||||
struct resource *res = NULL;
|
||||
@@ -572,20 +573,45 @@ static int dmirror_allocate_chunk(struct dmirror_device *mdevice,
|
||||
pfn_first, pfn_last);
|
||||
|
||||
spin_lock(&mdevice->lock);
|
||||
for (pfn = pfn_first; pfn < pfn_last; pfn++) {
|
||||
for (pfn = pfn_first; pfn < pfn_last; ) {
|
||||
struct page *page = pfn_to_page(pfn);
|
||||
|
||||
if (is_large && IS_ALIGNED(pfn, HPAGE_PMD_NR)
|
||||
&& (pfn + HPAGE_PMD_NR <= pfn_last)) {
|
||||
page->zone_device_data = mdevice->free_folios;
|
||||
mdevice->free_folios = page_folio(page);
|
||||
pfn += HPAGE_PMD_NR;
|
||||
continue;
|
||||
}
|
||||
|
||||
page->zone_device_data = mdevice->free_pages;
|
||||
mdevice->free_pages = page;
|
||||
pfn++;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
if (ppage) {
|
||||
*ppage = mdevice->free_pages;
|
||||
mdevice->free_pages = (*ppage)->zone_device_data;
|
||||
mdevice->calloc++;
|
||||
if (is_large) {
|
||||
if (!mdevice->free_folios) {
|
||||
ret = -ENOMEM;
|
||||
goto err_unlock;
|
||||
}
|
||||
*ppage = folio_page(mdevice->free_folios, 0);
|
||||
mdevice->free_folios = (*ppage)->zone_device_data;
|
||||
mdevice->calloc += HPAGE_PMD_NR;
|
||||
} else if (mdevice->free_pages) {
|
||||
*ppage = mdevice->free_pages;
|
||||
mdevice->free_pages = (*ppage)->zone_device_data;
|
||||
mdevice->calloc++;
|
||||
} else {
|
||||
ret = -ENOMEM;
|
||||
goto err_unlock;
|
||||
}
|
||||
}
|
||||
err_unlock:
|
||||
spin_unlock(&mdevice->lock);
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
|
||||
err_release:
|
||||
mutex_unlock(&mdevice->devmem_lock);
|
||||
@@ -598,10 +624,13 @@ err_devmem:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
|
||||
static struct page *dmirror_devmem_alloc_page(struct dmirror *dmirror,
|
||||
bool is_large)
|
||||
{
|
||||
struct page *dpage = NULL;
|
||||
struct page *rpage = NULL;
|
||||
unsigned int order = is_large ? HPAGE_PMD_ORDER : 0;
|
||||
struct dmirror_device *mdevice = dmirror->mdevice;
|
||||
|
||||
/*
|
||||
* For ZONE_DEVICE private type, this is a fake device so we allocate
|
||||
@@ -610,49 +639,55 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
|
||||
* data and ignore rpage.
|
||||
*/
|
||||
if (dmirror_is_private_zone(mdevice)) {
|
||||
rpage = alloc_page(GFP_HIGHUSER);
|
||||
rpage = folio_page(folio_alloc(GFP_HIGHUSER, order), 0);
|
||||
if (!rpage)
|
||||
return NULL;
|
||||
}
|
||||
spin_lock(&mdevice->lock);
|
||||
|
||||
if (mdevice->free_pages) {
|
||||
if (is_large && mdevice->free_folios) {
|
||||
dpage = folio_page(mdevice->free_folios, 0);
|
||||
mdevice->free_folios = dpage->zone_device_data;
|
||||
mdevice->calloc += 1 << order;
|
||||
spin_unlock(&mdevice->lock);
|
||||
} else if (!is_large && mdevice->free_pages) {
|
||||
dpage = mdevice->free_pages;
|
||||
mdevice->free_pages = dpage->zone_device_data;
|
||||
mdevice->calloc++;
|
||||
spin_unlock(&mdevice->lock);
|
||||
} else {
|
||||
spin_unlock(&mdevice->lock);
|
||||
if (dmirror_allocate_chunk(mdevice, &dpage))
|
||||
if (dmirror_allocate_chunk(mdevice, &dpage, is_large))
|
||||
goto error;
|
||||
}
|
||||
|
||||
zone_device_page_init(dpage, 0);
|
||||
zone_device_folio_init(page_folio(dpage), order);
|
||||
dpage->zone_device_data = rpage;
|
||||
return dpage;
|
||||
|
||||
error:
|
||||
if (rpage)
|
||||
__free_page(rpage);
|
||||
__free_pages(rpage, order);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
|
||||
struct dmirror *dmirror)
|
||||
{
|
||||
struct dmirror_device *mdevice = dmirror->mdevice;
|
||||
const unsigned long *src = args->src;
|
||||
unsigned long *dst = args->dst;
|
||||
unsigned long addr;
|
||||
|
||||
for (addr = args->start; addr < args->end; addr += PAGE_SIZE,
|
||||
src++, dst++) {
|
||||
for (addr = args->start; addr < args->end; ) {
|
||||
struct page *spage;
|
||||
struct page *dpage;
|
||||
struct page *rpage;
|
||||
bool is_large = *src & MIGRATE_PFN_COMPOUND;
|
||||
int write = (*src & MIGRATE_PFN_WRITE) ? MIGRATE_PFN_WRITE : 0;
|
||||
unsigned long nr = 1;
|
||||
|
||||
if (!(*src & MIGRATE_PFN_MIGRATE))
|
||||
continue;
|
||||
goto next;
|
||||
|
||||
/*
|
||||
* Note that spage might be NULL which is OK since it is an
|
||||
@@ -662,17 +697,45 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
|
||||
if (WARN(spage && is_zone_device_page(spage),
|
||||
"page already in device spage pfn: 0x%lx\n",
|
||||
page_to_pfn(spage)))
|
||||
continue;
|
||||
goto next;
|
||||
|
||||
dpage = dmirror_devmem_alloc_page(mdevice);
|
||||
if (!dpage)
|
||||
dpage = dmirror_devmem_alloc_page(dmirror, is_large);
|
||||
if (!dpage) {
|
||||
struct folio *folio;
|
||||
unsigned long i;
|
||||
unsigned long spfn = *src >> MIGRATE_PFN_SHIFT;
|
||||
struct page *src_page;
|
||||
|
||||
if (!is_large)
|
||||
goto next;
|
||||
|
||||
if (!spage && is_large) {
|
||||
nr = HPAGE_PMD_NR;
|
||||
} else {
|
||||
folio = page_folio(spage);
|
||||
nr = folio_nr_pages(folio);
|
||||
}
|
||||
|
||||
for (i = 0; i < nr && addr < args->end; i++) {
|
||||
dpage = dmirror_devmem_alloc_page(dmirror, false);
|
||||
rpage = BACKING_PAGE(dpage);
|
||||
rpage->zone_device_data = dmirror;
|
||||
|
||||
*dst = migrate_pfn(page_to_pfn(dpage)) | write;
|
||||
src_page = pfn_to_page(spfn + i);
|
||||
|
||||
if (spage)
|
||||
copy_highpage(rpage, src_page);
|
||||
else
|
||||
clear_highpage(rpage);
|
||||
src++;
|
||||
dst++;
|
||||
addr += PAGE_SIZE;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
rpage = BACKING_PAGE(dpage);
|
||||
if (spage)
|
||||
copy_highpage(rpage, spage);
|
||||
else
|
||||
clear_highpage(rpage);
|
||||
|
||||
/*
|
||||
* Normally, a device would use the page->zone_device_data to
|
||||
@@ -684,10 +747,42 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
|
||||
|
||||
pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 0x%lx\n",
|
||||
page_to_pfn(spage), page_to_pfn(dpage));
|
||||
*dst = migrate_pfn(page_to_pfn(dpage));
|
||||
if ((*src & MIGRATE_PFN_WRITE) ||
|
||||
(!spage && args->vma->vm_flags & VM_WRITE))
|
||||
*dst |= MIGRATE_PFN_WRITE;
|
||||
|
||||
*dst = migrate_pfn(page_to_pfn(dpage)) | write;
|
||||
|
||||
if (is_large) {
|
||||
int i;
|
||||
struct folio *folio = page_folio(dpage);
|
||||
*dst |= MIGRATE_PFN_COMPOUND;
|
||||
|
||||
if (folio_test_large(folio)) {
|
||||
for (i = 0; i < folio_nr_pages(folio); i++) {
|
||||
struct page *dst_page =
|
||||
pfn_to_page(page_to_pfn(rpage) + i);
|
||||
struct page *src_page =
|
||||
pfn_to_page(page_to_pfn(spage) + i);
|
||||
|
||||
if (spage)
|
||||
copy_highpage(dst_page, src_page);
|
||||
else
|
||||
clear_highpage(dst_page);
|
||||
src++;
|
||||
dst++;
|
||||
addr += PAGE_SIZE;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (spage)
|
||||
copy_highpage(rpage, spage);
|
||||
else
|
||||
clear_highpage(rpage);
|
||||
|
||||
next:
|
||||
src++;
|
||||
dst++;
|
||||
addr += PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -734,14 +829,17 @@ static int dmirror_migrate_finalize_and_map(struct migrate_vma *args,
|
||||
const unsigned long *src = args->src;
|
||||
const unsigned long *dst = args->dst;
|
||||
unsigned long pfn;
|
||||
const unsigned long start_pfn = start >> PAGE_SHIFT;
|
||||
const unsigned long end_pfn = end >> PAGE_SHIFT;
|
||||
|
||||
/* Map the migrated pages into the device's page tables. */
|
||||
mutex_lock(&dmirror->mutex);
|
||||
|
||||
for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++,
|
||||
src++, dst++) {
|
||||
for (pfn = start_pfn; pfn < end_pfn; pfn++, src++, dst++) {
|
||||
struct page *dpage;
|
||||
void *entry;
|
||||
int nr, i;
|
||||
struct page *rpage;
|
||||
|
||||
if (!(*src & MIGRATE_PFN_MIGRATE))
|
||||
continue;
|
||||
@@ -750,13 +848,25 @@ static int dmirror_migrate_finalize_and_map(struct migrate_vma *args,
|
||||
if (!dpage)
|
||||
continue;
|
||||
|
||||
entry = BACKING_PAGE(dpage);
|
||||
if (*dst & MIGRATE_PFN_WRITE)
|
||||
entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE);
|
||||
entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
|
||||
if (xa_is_err(entry)) {
|
||||
mutex_unlock(&dmirror->mutex);
|
||||
return xa_err(entry);
|
||||
if (*dst & MIGRATE_PFN_COMPOUND)
|
||||
nr = folio_nr_pages(page_folio(dpage));
|
||||
else
|
||||
nr = 1;
|
||||
|
||||
WARN_ON_ONCE(end_pfn < start_pfn + nr);
|
||||
|
||||
rpage = BACKING_PAGE(dpage);
|
||||
VM_WARN_ON(folio_nr_pages(page_folio(rpage)) != nr);
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
entry = folio_page(page_folio(rpage), i);
|
||||
if (*dst & MIGRATE_PFN_WRITE)
|
||||
entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE);
|
||||
entry = xa_store(&dmirror->pt, pfn + i, entry, GFP_ATOMIC);
|
||||
if (xa_is_err(entry)) {
|
||||
mutex_unlock(&dmirror->mutex);
|
||||
return xa_err(entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -829,31 +939,66 @@ static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
|
||||
unsigned long start = args->start;
|
||||
unsigned long end = args->end;
|
||||
unsigned long addr;
|
||||
unsigned int order = 0;
|
||||
int i;
|
||||
|
||||
for (addr = start; addr < end; addr += PAGE_SIZE,
|
||||
src++, dst++) {
|
||||
for (addr = start; addr < end; ) {
|
||||
struct page *dpage, *spage;
|
||||
|
||||
spage = migrate_pfn_to_page(*src);
|
||||
if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
|
||||
continue;
|
||||
if (!spage || !(*src & MIGRATE_PFN_MIGRATE)) {
|
||||
addr += PAGE_SIZE;
|
||||
goto next;
|
||||
}
|
||||
|
||||
if (WARN_ON(!is_device_private_page(spage) &&
|
||||
!is_device_coherent_page(spage)))
|
||||
continue;
|
||||
spage = BACKING_PAGE(spage);
|
||||
dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
|
||||
if (!dpage)
|
||||
continue;
|
||||
pr_debug("migrating from dev to sys pfn src: 0x%lx pfn dst: 0x%lx\n",
|
||||
page_to_pfn(spage), page_to_pfn(dpage));
|
||||
!is_device_coherent_page(spage))) {
|
||||
addr += PAGE_SIZE;
|
||||
goto next;
|
||||
}
|
||||
|
||||
spage = BACKING_PAGE(spage);
|
||||
order = folio_order(page_folio(spage));
|
||||
|
||||
if (order)
|
||||
dpage = folio_page(vma_alloc_folio(GFP_HIGHUSER_MOVABLE,
|
||||
order, args->vma, addr), 0);
|
||||
else
|
||||
dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
|
||||
|
||||
/* Try with smaller pages if large allocation fails */
|
||||
if (!dpage && order) {
|
||||
dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
|
||||
if (!dpage)
|
||||
return VM_FAULT_OOM;
|
||||
order = 0;
|
||||
}
|
||||
|
||||
pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 0x%lx\n",
|
||||
page_to_pfn(spage), page_to_pfn(dpage));
|
||||
lock_page(dpage);
|
||||
xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
|
||||
copy_highpage(dpage, spage);
|
||||
*dst = migrate_pfn(page_to_pfn(dpage));
|
||||
if (*src & MIGRATE_PFN_WRITE)
|
||||
*dst |= MIGRATE_PFN_WRITE;
|
||||
if (order)
|
||||
*dst |= MIGRATE_PFN_COMPOUND;
|
||||
|
||||
for (i = 0; i < (1 << order); i++) {
|
||||
struct page *src_page;
|
||||
struct page *dst_page;
|
||||
|
||||
src_page = pfn_to_page(page_to_pfn(spage) + i);
|
||||
dst_page = pfn_to_page(page_to_pfn(dpage) + i);
|
||||
|
||||
xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
|
||||
copy_highpage(dst_page, src_page);
|
||||
}
|
||||
next:
|
||||
addr += PAGE_SIZE << order;
|
||||
src += 1 << order;
|
||||
dst += 1 << order;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -879,11 +1024,14 @@ static int dmirror_migrate_to_system(struct dmirror *dmirror,
|
||||
unsigned long size = cmd->npages << PAGE_SHIFT;
|
||||
struct mm_struct *mm = dmirror->notifier.mm;
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long src_pfns[32] = { 0 };
|
||||
unsigned long dst_pfns[32] = { 0 };
|
||||
struct migrate_vma args = { 0 };
|
||||
unsigned long next;
|
||||
int ret;
|
||||
unsigned long *src_pfns;
|
||||
unsigned long *dst_pfns;
|
||||
|
||||
src_pfns = kvcalloc(PTRS_PER_PTE, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL);
|
||||
dst_pfns = kvcalloc(PTRS_PER_PTE, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL);
|
||||
|
||||
start = cmd->addr;
|
||||
end = start + size;
|
||||
@@ -902,7 +1050,7 @@ static int dmirror_migrate_to_system(struct dmirror *dmirror,
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT));
|
||||
next = min(end, addr + (PTRS_PER_PTE << PAGE_SHIFT));
|
||||
if (next > vma->vm_end)
|
||||
next = vma->vm_end;
|
||||
|
||||
@@ -912,7 +1060,7 @@ static int dmirror_migrate_to_system(struct dmirror *dmirror,
|
||||
args.start = addr;
|
||||
args.end = next;
|
||||
args.pgmap_owner = dmirror->mdevice;
|
||||
args.flags = dmirror_select_device(dmirror);
|
||||
args.flags = dmirror_select_device(dmirror) | MIGRATE_VMA_SELECT_COMPOUND;
|
||||
|
||||
ret = migrate_vma_setup(&args);
|
||||
if (ret)
|
||||
@@ -928,6 +1076,8 @@ static int dmirror_migrate_to_system(struct dmirror *dmirror,
|
||||
out:
|
||||
mmap_read_unlock(mm);
|
||||
mmput(mm);
|
||||
kvfree(src_pfns);
|
||||
kvfree(dst_pfns);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -939,12 +1089,12 @@ static int dmirror_migrate_to_device(struct dmirror *dmirror,
|
||||
unsigned long size = cmd->npages << PAGE_SHIFT;
|
||||
struct mm_struct *mm = dmirror->notifier.mm;
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long src_pfns[32] = { 0 };
|
||||
unsigned long dst_pfns[32] = { 0 };
|
||||
struct dmirror_bounce bounce;
|
||||
struct migrate_vma args = { 0 };
|
||||
unsigned long next;
|
||||
int ret;
|
||||
unsigned long *src_pfns = NULL;
|
||||
unsigned long *dst_pfns = NULL;
|
||||
|
||||
start = cmd->addr;
|
||||
end = start + size;
|
||||
@@ -955,6 +1105,18 @@ static int dmirror_migrate_to_device(struct dmirror *dmirror,
|
||||
if (!mmget_not_zero(mm))
|
||||
return -EINVAL;
|
||||
|
||||
ret = -ENOMEM;
|
||||
src_pfns = kvcalloc(PTRS_PER_PTE, sizeof(*src_pfns),
|
||||
GFP_KERNEL | __GFP_NOFAIL);
|
||||
if (!src_pfns)
|
||||
goto free_mem;
|
||||
|
||||
dst_pfns = kvcalloc(PTRS_PER_PTE, sizeof(*dst_pfns),
|
||||
GFP_KERNEL | __GFP_NOFAIL);
|
||||
if (!dst_pfns)
|
||||
goto free_mem;
|
||||
|
||||
ret = 0;
|
||||
mmap_read_lock(mm);
|
||||
for (addr = start; addr < end; addr = next) {
|
||||
vma = vma_lookup(mm, addr);
|
||||
@@ -962,7 +1124,7 @@ static int dmirror_migrate_to_device(struct dmirror *dmirror,
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT));
|
||||
next = min(end, addr + (PTRS_PER_PTE << PAGE_SHIFT));
|
||||
if (next > vma->vm_end)
|
||||
next = vma->vm_end;
|
||||
|
||||
@@ -972,7 +1134,8 @@ static int dmirror_migrate_to_device(struct dmirror *dmirror,
|
||||
args.start = addr;
|
||||
args.end = next;
|
||||
args.pgmap_owner = dmirror->mdevice;
|
||||
args.flags = MIGRATE_VMA_SELECT_SYSTEM;
|
||||
args.flags = MIGRATE_VMA_SELECT_SYSTEM |
|
||||
MIGRATE_VMA_SELECT_COMPOUND;
|
||||
ret = migrate_vma_setup(&args);
|
||||
if (ret)
|
||||
goto out;
|
||||
@@ -992,7 +1155,7 @@ static int dmirror_migrate_to_device(struct dmirror *dmirror,
|
||||
*/
|
||||
ret = dmirror_bounce_init(&bounce, start, size);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto free_mem;
|
||||
mutex_lock(&dmirror->mutex);
|
||||
ret = dmirror_do_read(dmirror, start, end, &bounce);
|
||||
mutex_unlock(&dmirror->mutex);
|
||||
@@ -1003,11 +1166,14 @@ static int dmirror_migrate_to_device(struct dmirror *dmirror,
|
||||
}
|
||||
cmd->cpages = bounce.cpages;
|
||||
dmirror_bounce_fini(&bounce);
|
||||
return ret;
|
||||
goto free_mem;
|
||||
|
||||
out:
|
||||
mmap_read_unlock(mm);
|
||||
mmput(mm);
|
||||
free_mem:
|
||||
kfree(src_pfns);
|
||||
kfree(dst_pfns);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1200,6 +1366,7 @@ static void dmirror_device_evict_chunk(struct dmirror_chunk *chunk)
|
||||
unsigned long i;
|
||||
unsigned long *src_pfns;
|
||||
unsigned long *dst_pfns;
|
||||
unsigned int order = 0;
|
||||
|
||||
src_pfns = kvcalloc(npages, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL);
|
||||
dst_pfns = kvcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL);
|
||||
@@ -1215,13 +1382,25 @@ static void dmirror_device_evict_chunk(struct dmirror_chunk *chunk)
|
||||
if (WARN_ON(!is_device_private_page(spage) &&
|
||||
!is_device_coherent_page(spage)))
|
||||
continue;
|
||||
|
||||
order = folio_order(page_folio(spage));
|
||||
spage = BACKING_PAGE(spage);
|
||||
dpage = alloc_page(GFP_HIGHUSER_MOVABLE | __GFP_NOFAIL);
|
||||
if (src_pfns[i] & MIGRATE_PFN_COMPOUND) {
|
||||
dpage = folio_page(folio_alloc(GFP_HIGHUSER_MOVABLE,
|
||||
order), 0);
|
||||
} else {
|
||||
dpage = alloc_page(GFP_HIGHUSER_MOVABLE | __GFP_NOFAIL);
|
||||
order = 0;
|
||||
}
|
||||
|
||||
/* TODO Support splitting here */
|
||||
lock_page(dpage);
|
||||
copy_highpage(dpage, spage);
|
||||
dst_pfns[i] = migrate_pfn(page_to_pfn(dpage));
|
||||
if (src_pfns[i] & MIGRATE_PFN_WRITE)
|
||||
dst_pfns[i] |= MIGRATE_PFN_WRITE;
|
||||
if (order)
|
||||
dst_pfns[i] |= MIGRATE_PFN_COMPOUND;
|
||||
folio_copy(page_folio(dpage), page_folio(spage));
|
||||
}
|
||||
migrate_device_pages(src_pfns, dst_pfns, npages);
|
||||
migrate_device_finalize(src_pfns, dst_pfns, npages);
|
||||
@@ -1234,7 +1413,12 @@ static void dmirror_remove_free_pages(struct dmirror_chunk *devmem)
|
||||
{
|
||||
struct dmirror_device *mdevice = devmem->mdevice;
|
||||
struct page *page;
|
||||
struct folio *folio;
|
||||
|
||||
|
||||
for (folio = mdevice->free_folios; folio; folio = folio_zone_device_data(folio))
|
||||
if (dmirror_page_to_chunk(folio_page(folio, 0)) == devmem)
|
||||
mdevice->free_folios = folio_zone_device_data(folio);
|
||||
for (page = mdevice->free_pages; page; page = page->zone_device_data)
|
||||
if (dmirror_page_to_chunk(page) == devmem)
|
||||
mdevice->free_pages = page->zone_device_data;
|
||||
@@ -1265,6 +1449,7 @@ static void dmirror_device_remove_chunks(struct dmirror_device *mdevice)
|
||||
mdevice->devmem_count = 0;
|
||||
mdevice->devmem_capacity = 0;
|
||||
mdevice->free_pages = NULL;
|
||||
mdevice->free_folios = NULL;
|
||||
kfree(mdevice->devmem_chunks);
|
||||
mdevice->devmem_chunks = NULL;
|
||||
}
|
||||
@@ -1379,18 +1564,30 @@ static void dmirror_devmem_free(struct folio *folio)
|
||||
struct page *page = &folio->page;
|
||||
struct page *rpage = BACKING_PAGE(page);
|
||||
struct dmirror_device *mdevice;
|
||||
struct folio *rfolio = page_folio(rpage);
|
||||
unsigned int order = folio_order(rfolio);
|
||||
|
||||
if (rpage != page)
|
||||
__free_page(rpage);
|
||||
if (rpage != page) {
|
||||
if (order)
|
||||
__free_pages(rpage, order);
|
||||
else
|
||||
__free_page(rpage);
|
||||
rpage = NULL;
|
||||
}
|
||||
|
||||
mdevice = dmirror_page_to_device(page);
|
||||
spin_lock(&mdevice->lock);
|
||||
|
||||
/* Return page to our allocator if not freeing the chunk */
|
||||
if (!dmirror_page_to_chunk(page)->remove) {
|
||||
mdevice->cfree++;
|
||||
page->zone_device_data = mdevice->free_pages;
|
||||
mdevice->free_pages = page;
|
||||
mdevice->cfree += 1 << order;
|
||||
if (order) {
|
||||
page->zone_device_data = mdevice->free_folios;
|
||||
mdevice->free_folios = page_folio(page);
|
||||
} else {
|
||||
page->zone_device_data = mdevice->free_pages;
|
||||
mdevice->free_pages = page;
|
||||
}
|
||||
}
|
||||
spin_unlock(&mdevice->lock);
|
||||
}
|
||||
@@ -1398,36 +1595,52 @@ static void dmirror_devmem_free(struct folio *folio)
|
||||
static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
|
||||
{
|
||||
struct migrate_vma args = { 0 };
|
||||
unsigned long src_pfns = 0;
|
||||
unsigned long dst_pfns = 0;
|
||||
struct page *rpage;
|
||||
struct dmirror *dmirror;
|
||||
vm_fault_t ret;
|
||||
vm_fault_t ret = 0;
|
||||
unsigned int order, nr;
|
||||
|
||||
/*
|
||||
* Normally, a device would use the page->zone_device_data to point to
|
||||
* the mirror but here we use it to hold the page for the simulated
|
||||
* device memory and that page holds the pointer to the mirror.
|
||||
*/
|
||||
rpage = vmf->page->zone_device_data;
|
||||
rpage = folio_zone_device_data(page_folio(vmf->page));
|
||||
dmirror = rpage->zone_device_data;
|
||||
|
||||
/* FIXME demonstrate how we can adjust migrate range */
|
||||
order = folio_order(page_folio(vmf->page));
|
||||
nr = 1 << order;
|
||||
|
||||
/*
|
||||
* Consider a per-cpu cache of src and dst pfns, but with
|
||||
* large number of cpus that might not scale well.
|
||||
*/
|
||||
args.start = ALIGN_DOWN(vmf->address, (PAGE_SIZE << order));
|
||||
args.vma = vmf->vma;
|
||||
args.start = vmf->address;
|
||||
args.end = args.start + PAGE_SIZE;
|
||||
args.src = &src_pfns;
|
||||
args.dst = &dst_pfns;
|
||||
args.end = args.start + (PAGE_SIZE << order);
|
||||
|
||||
nr = (args.end - args.start) >> PAGE_SHIFT;
|
||||
args.src = kcalloc(nr, sizeof(unsigned long), GFP_KERNEL);
|
||||
args.dst = kcalloc(nr, sizeof(unsigned long), GFP_KERNEL);
|
||||
args.pgmap_owner = dmirror->mdevice;
|
||||
args.flags = dmirror_select_device(dmirror);
|
||||
args.fault_page = vmf->page;
|
||||
|
||||
if (!args.src || !args.dst) {
|
||||
ret = VM_FAULT_OOM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (order)
|
||||
args.flags |= MIGRATE_VMA_SELECT_COMPOUND;
|
||||
|
||||
if (migrate_vma_setup(&args))
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
ret = dmirror_devmem_fault_alloc_and_copy(&args, dmirror);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto err;
|
||||
migrate_vma_pages(&args);
|
||||
/*
|
||||
* No device finalize step is needed since
|
||||
@@ -1435,7 +1648,10 @@ static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
|
||||
* invalidated the device page table.
|
||||
*/
|
||||
migrate_vma_finalize(&args);
|
||||
return 0;
|
||||
err:
|
||||
kfree(args.src);
|
||||
kfree(args.dst);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct dev_pagemap_ops dmirror_devmem_ops = {
|
||||
@@ -1466,7 +1682,7 @@ static int dmirror_device_init(struct dmirror_device *mdevice, int id)
|
||||
return ret;
|
||||
|
||||
/* Build a list of free ZONE_DEVICE struct pages */
|
||||
return dmirror_allocate_chunk(mdevice, NULL);
|
||||
return dmirror_allocate_chunk(mdevice, NULL, false);
|
||||
}
|
||||
|
||||
static void dmirror_device_remove(struct dmirror_device *mdevice)
|
||||
|
||||
Reference in New Issue
Block a user