libvmi\read.c
status_t
vmi_read_64_pa(
vmi_instance_t vmi,
addr_t paddr,
uint64_t * value)
{
return vmi_read_pa(vmi, paddr, 8, value, NULL);
}libvmi\read.c
status_t
vmi_read_pa(
vmi_instance_t vmi,
addr_t paddr,
size_t count,
void *buf,
size_t *bytes_read)
{
ACCESS_CONTEXT(ctx, .addr = paddr);
return vmi_read(vmi, &ctx, count, buf, bytes_read);
}libvmi\read.c
status_t
vmi_read(
vmi_instance_t vmi,
const access_context_t *ctx,
size_t count,
void *buf,
size_t *bytes_read)
{
status_t ret = VMI_FAILURE;
size_t buf_offset = 0;
unsigned char *memory;
addr_t start_addr;
addr_t paddr;
addr_t naddr;
addr_t pfn;
addr_t offset;
addr_t pt;
page_mode_t pm;
#ifdef ENABLE_SAFETY_CHECKS
if (NULL == vmi) {
dbprint(VMI_DEBUG_READ, "--%s: vmi passed as NULL, returning without read\n", __FUNCTION__);
goto done;
}
if (NULL == ctx) {
dbprint(VMI_DEBUG_READ, "--%s: ctx passed as NULL, returning without read\n", __FUNCTION__);
goto done;
}
if (NULL == buf) {
dbprint(VMI_DEBUG_READ, "--%s: buf passed as NULL, returning without read\n", __FUNCTION__);
goto done;
}
if (ctx->version != ACCESS_CONTEXT_VERSION) {
if (!vmi->actx_version_warn_once)
errprint("--%s: access context version mismatch, please update your code\n", __FUNCTION__);
vmi->actx_version_warn_once = 1;
// TODO: for compatibility reasons we still accept code compiled
// without the ABI version field initialized.
// Turn this check into enforcement after appropriate amount of
// time passed (in ~2023 or after).
}
#endif
// Set defaults
pt = ctx->pt;
pm = ctx->pm;
start_addr = ctx->addr;
switch (ctx->tm) {
case VMI_TM_NONE:
pm = VMI_PM_NONE;
pt = 0;
break;
case VMI_TM_KERNEL_SYMBOL:
#ifdef ENABLE_SAFETY_CHECKS
if (!vmi->os_interface || !vmi->kpgd)
goto done;
#endif
if ( VMI_FAILURE == vmi_translate_ksym2v(vmi, ctx->ksym, &start_addr) )
goto done;
pt = vmi->kpgd;
if (!pm)
pm = vmi->page_mode;
break;
case VMI_TM_PROCESS_PID:
#ifdef ENABLE_SAFETY_CHECKS
if (!vmi->os_interface)
goto done;
#endif
if ( !ctx->pid )
pt = vmi->kpgd;
else if (ctx->pid > 0) {
if ( VMI_FAILURE == vmi_pid_to_dtb(vmi, ctx->pid, &pt) )
goto done;
}
if (!pm)
pm = vmi->page_mode;
if (!pt)
goto done;
break;
case VMI_TM_PROCESS_DTB:
if (!pm)
pm = vmi->page_mode;
break;
default:
errprint("%s error: translation mechanism is not defined.\n", __FUNCTION__);
goto done;
}
#ifdef ENABLE_SAFETY_CHECKS
if (pt && !valid_pm(pm)) {
dbprint(VMI_DEBUG_READ, "--%s: pagetable specified with no page mode\n", __FUNCTION__);
goto done;
}
if (ctx->npt && !valid_npm(ctx->npm)) {
dbprint(VMI_DEBUG_READ, "--%s: nested pagetable specified with no nested page mode\n", __FUNCTION__);
goto done;
}
#endif
while (count > 0) {
size_t read_len = 0;
if (valid_pm(pm)) {
if (VMI_SUCCESS != vmi_nested_pagetable_lookup(vmi, ctx->npt, ctx->npm, pt, pm, start_addr + buf_offset, &paddr, &naddr))
goto done;
if (valid_npm(ctx->npm)) {
dbprint(VMI_DEBUG_READ, "--Setting paddr to nested address 0x%lx\n", naddr);
paddr = naddr;
}
} else {
paddr = start_addr + buf_offset;
if (valid_npm(ctx->npm) && VMI_SUCCESS != vmi_nested_pagetable_lookup(vmi, 0, 0, ctx->npt, ctx->npm, paddr, &paddr, NULL) )
goto done;
}
/* access the memory */
pfn = paddr >> vmi->page_shift;
dbprint(VMI_DEBUG_READ, "--Reading pfn 0x%lx\n", pfn);
offset = (vmi->page_size - 1) & paddr;
memory = vmi_read_page(vmi, pfn);
if (NULL == memory)
goto done;
/* determine how much we can read */
if ((offset + count) > vmi->page_size)
read_len = vmi->page_size - offset;
else
read_len = count;
/* do the read */
memcpy(((char *) buf) + (addr_t) buf_offset, memory + (addr_t) offset, read_len);
/* set variables for next loop */
count -= read_len;
buf_offset += read_len;
}
ret = VMI_SUCCESS;
done:
if ( bytes_read )
*bytes_read = buf_offset;
return ret;
}libvmi\accessors.c
void* vmi_read_page (vmi_instance_t vmi, addr_t frame_num)
{
#ifdef ENABLE_SAFETY_CHECKS
if (!vmi)
return NULL;
#endif
return driver_read_page(vmi, frame_num);
}libvmi\driver\driver_wrapper.h
static inline void *
driver_read_page(
vmi_instance_t vmi,
addr_t page)
{
#ifdef ENABLE_SAFETY_CHECKS
if (!vmi->driver.initialized || !vmi->driver.read_page_ptr) {
dbprint(VMI_DEBUG_DRIVER, "WARNING: driver_read_page function not implemented.\n");
return NULL;
}
#endif
return vmi->driver.read_page_ptr(vmi, page);
}The driver.read_page_ptr is a function pointer declared at driver/xen/xen.h and allocated by:
libvmi\driver\xen\xen.h
static inline status_t driver_xen_setup(vmi_instance_t vmi)
{
(...)
driver.read_page_ptr = &xen_read_page;
(...)
return VMI_SUCCESS;
}libvmi\driver\xen\xen.c
void *
xen_read_page(
vmi_instance_t vmi,
addr_t page)
{
addr_t paddr = page << vmi->page_shift;
return memory_cache_insert(vmi, paddr);
}libvmi\driver\memory_cache.c
void *
memory_cache_insert(
vmi_instance_t vmi,
addr_t paddr)
{
if (paddr == vmi->last_used_page_key && vmi->last_used_page) {
return vmi->last_used_page;
} else {
if (vmi->last_used_page) {
vmi->release_data_callback(vmi, vmi->last_used_page, vmi->page_size);
}
vmi->last_used_page = get_memory_data(vmi, paddr, vmi->page_size);
vmi->last_used_page_key = paddr;
return vmi->last_used_page;
}
}get_memory_data() invokes the callback get_data_callback() which is a function pointer allocated by memory_cache_init() :
libvmi\driver\memory_cache.c
static inline
void *get_memory_data(
vmi_instance_t vmi,
addr_t paddr,
uint32_t length)
{
return vmi->get_data_callback(vmi, paddr, length);
}libvmi\driver\memory_cache.c
void memory_cache_init(
vmi_instance_t vmi,
void *(*get_data) (vmi_instance_t,
addr_t,
uint32_t),
void (*release_data) (vmi_instance_t,
void *,
size_t),
unsigned long UNUSED(age_limit))
{
vmi->get_data_callback = get_data;
vmi->release_data_callback = release_data;
}libvmi\driver\xen\xen.c
xen_setup_live_mode() is called on xen_init_vmi() (defined at libvmi\driver\xen\xen.c )
status_t
xen_setup_live_mode(
vmi_instance_t vmi)
{
dbprint(VMI_DEBUG_XEN, "--xen: setup live mode\n");
memory_cache_destroy(vmi);
memory_cache_init(vmi, xen_get_memory, xen_release_memory, 0);
return VMI_SUCCESS;
}but xen_init_vmi() is actually a function, that is passed as a pointer on driver_xen_setup() :
libvmi\driver\xen\xen.h
static inline status_t driver_xen_setup(vmi_instance_t vmi)
{
(...)
driver.init_vmi_ptr = &xen_init_vmi;
(...)
return VMI_SUCCESS;
}void *
xen_get_memory(
vmi_instance_t vmi,
addr_t paddr,
uint32_t UNUSED(length))
{
//TODO assuming length == page size is safe for now, but isn't the most clean approach
addr_t pfn = paddr >> vmi->page_shift;
return xen_get_memory_pfn(vmi, pfn, PROT_READ);
}void *
xen_get_memory_pfn(
vmi_instance_t vmi,
addr_t pfn,
int prot)
{
xen_instance_t *xen = xen_get_instance(vmi);
void *memory = xen->libxcw.xc_map_foreign_range(xen->xchandle,
xen->domainid,
XC_PAGE_SIZE,
prot,
(unsigned long) pfn);
if (MAP_FAILED == memory || NULL == memory) {
dbprint(VMI_DEBUG_XEN, "--xen_get_memory_pfn failed on pfn=0x%"PRIx64"\n", pfn);
return NULL;
} else {
dbprint(VMI_DEBUG_XEN, "--xen_get_memory_pfn success on pfn=0x%"PRIx64"\n", pfn);
}
#ifdef VMI_DEBUG
// copy memory to local address space - handy for examination
uint8_t buf[XC_PAGE_SIZE];
memcpy(buf, memory, XC_PAGE_SIZE);
#endif // VMI_DEBUG
return memory;
}void *xc_map_foreign_pages(xc_interface *xch, uint32_t dom, int prot,
const xen_pfn_t *arr, int num)
{
if (num < 0) {
errno = EINVAL;
return NULL;
}
return xenforeignmemory_map(xch->fmem, dom, prot, num, arr, NULL);
}
void *xc_map_foreign_range(xc_interface *xch,
uint32_t dom, int size, int prot,
unsigned long mfn)
{
xen_pfn_t *arr;
int num;
int i;
void *ret;
num = (size + XC_PAGE_SIZE - 1) >> XC_PAGE_SHIFT;
arr = calloc(num, sizeof(xen_pfn_t));
if ( arr == NULL )
return NULL;
for ( i = 0; i < num; i++ )
arr[i] = mfn + i;
ret = xc_map_foreign_pages(xch, dom, prot, arr, num);
free(arr);
return ret;
}void *xenforeignmemory_map(xenforeignmemory_handle *fmem,
uint32_t dom, int prot,
size_t num,
const xen_pfn_t arr[/*num*/], int err[/*num*/])
{
return xenforeignmemory_map2(fmem, dom, NULL, prot, 0, num, arr, err);
}void *xenforeignmemory_map2(xenforeignmemory_handle *fmem,
uint32_t dom, void *addr,
int prot, int flags, size_t num,
const xen_pfn_t arr[/*num*/], int err[/*num*/])
{
void *ret;
int *err_to_free = NULL;
if ( err == NULL )
err = err_to_free = malloc(num * sizeof(int));
if ( err == NULL )
return NULL;
ret = osdep_xenforeignmemory_map(fmem, dom, addr, prot, flags, num, arr, err);
if ( ret && err_to_free )
{
int i;
for ( i = 0 ; i < num ; i++ )
{
if ( err[i] )
{
errno = -err[i];
(void)osdep_xenforeignmemory_unmap(fmem, ret, num);
ret = NULL;
break;
}
}
}
free(err_to_free);
return ret;
}The function osdep_xenforeignmemory_map() depends on OS of the dom0, so it have different implementations across XEN’s code. This code is for linux:
void *osdep_xenforeignmemory_map(xenforeignmemory_handle *fmem,
uint32_t dom, void *addr,
int prot, int flags, size_t num,
const xen_pfn_t arr[/*num*/], int err[/*num*/])
{
int fd = fmem->fd;
privcmd_mmapbatch_v2_t ioctlx;
size_t i;
int rc;
/*
As far as privcmd is concerned mmap is effectively used as a way to
reserve some virtual address in the current process, while the ioctl is
used to fill it with memory mapped from a foreign domain.
MMAP -> mapping memory for us?
IOCTL -> filling the memory?
*/
addr = mmap(addr, num << XC_PAGE_SHIFT, prot, flags | MAP_SHARED,
fd, 0);
if ( addr == MAP_FAILED )
return NULL;
ioctlx.num = num;
ioctlx.dom = dom;
ioctlx.addr = (unsigned long)addr;
ioctlx.arr = arr;
ioctlx.err = err;
rc = ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH_V2, &ioctlx);
/* Command was recognized, some gfn in arr are in paging state */
if ( rc < 0 && errno == ENOENT )
{
do {
usleep(100);
rc = retry_paged(fd, dom, addr, arr, err, num);
} while ( rc > 0 );
}
/* Command was not recognized, use fall back */
else if ( rc < 0 && errno == EINVAL && (int)num > 0 )
{
/*
* IOCTL_PRIVCMD_MMAPBATCH_V2 is not supported - fall back to
* IOCTL_PRIVCMD_MMAPBATCH.
*/
privcmd_mmapbatch_t ioctlx;
xen_pfn_t *pfn;
unsigned int pfn_arr_size = ROUNDUP((num * sizeof(*pfn)), XC_PAGE_SHIFT);
int os_page_size = sysconf(_SC_PAGESIZE);
if ( pfn_arr_size <= os_page_size )
pfn = alloca(num * sizeof(*pfn));
else
{
pfn = mmap(NULL, pfn_arr_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANON | MAP_POPULATE, -1, 0);
if ( pfn == MAP_FAILED )
{
PERROR("mmap of pfn array failed");
(void)munmap(addr, num << XC_PAGE_SHIFT);
return NULL;
}
}
memcpy(pfn, arr, num * sizeof(*arr));
ioctlx.num = num;
ioctlx.dom = dom;
ioctlx.addr = (unsigned long)addr;
ioctlx.arr = pfn;
rc = ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, &ioctlx);
rc = rc < 0 ? -errno : 0;
for ( i = 0; i < num; ++i )
{
switch ( pfn[i] ^ arr[i] )
{
case 0:
err[i] = rc != -ENOENT ? rc : 0;
continue;
default:
err[i] = -EINVAL;
continue;
case PRIVCMD_MMAPBATCH_PAGED_ERROR:
if ( rc != -ENOENT )
{
err[i] = rc ?: -EINVAL;
continue;
}
rc = map_foreign_batch_single(fd, dom, pfn + i,
(unsigned long)addr + (i<<XC_PAGE_SHIFT));
if ( rc < 0 )
{
rc = -errno;
break;
}
rc = -ENOENT;
continue;
}
break;
}
if ( pfn_arr_size > os_page_size )
munmap(pfn, pfn_arr_size);
if ( rc == -ENOENT && i == num )
rc = 0;
else if ( rc )
{
errno = -rc;
rc = -1;
}
}
if ( rc < 0 )
{
int saved_errno = errno;
(void)munmap(addr, num << XC_PAGE_SHIFT);
errno = saved_errno;
return NULL;
}
return addr;
}https://xen-devel.narkive.com/ccLJubJy/question-regarding-foreign-memory-mapping-in-xen
>> TRANSITION TO DOM0 LINUX KERNEL <<
https://elixir.bootlin.com/linux/v4.16.7/source/drivers/xen/privcmd.c#744
static long privcmd_ioctl(struct file *file,
unsigned int cmd, unsigned long data)
{
int ret = -ENOTTY;
void __user *udata = (void __user *) data;
switch (cmd) {
case IOCTL_PRIVCMD_HYPERCALL:
ret = privcmd_ioctl_hypercall(file, udata);
break;
case IOCTL_PRIVCMD_MMAP:
ret = privcmd_ioctl_mmap(file, udata);
break;
case IOCTL_PRIVCMD_MMAPBATCH:
ret = privcmd_ioctl_mmap_batch(file, udata, 1);
break;
case IOCTL_PRIVCMD_MMAPBATCH_V2:
ret = privcmd_ioctl_mmap_batch(file, udata, 2);
break;
case IOCTL_PRIVCMD_DM_OP:
ret = privcmd_ioctl_dm_op(file, udata);
break;
case IOCTL_PRIVCMD_RESTRICT:
ret = privcmd_ioctl_restrict(file, udata);
break;
default:
break;
}
return ret;
}https://elixir.bootlin.com/linux/v4.16.7/source/drivers/xen/privcmd.c#L443
static long privcmd_ioctl_mmap_batch(
struct file *file, void __user *udata, int version)
{
struct privcmd_data *data = file->private_data;
int ret;
struct privcmd_mmapbatch_v2 m;
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long nr_pages;
LIST_HEAD(pagelist);
struct mmap_batch_state state;
switch (version) {
case 1:
if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
return -EFAULT;
/* Returns per-frame error in m.arr. */
m.err = NULL;
if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
return -EFAULT;
break;
case 2:
if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
return -EFAULT;
/* Returns per-frame error code in m.err. */
if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
return -EFAULT;
break;
default:
return -EINVAL;
}
/* If restriction is in place, check the domid matches */
if (data->domid != DOMID_INVALID && data->domid != m.dom)
return -EPERM;
nr_pages = DIV_ROUND_UP(m.num, XEN_PFN_PER_PAGE);
if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
return -EINVAL;
ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
if (ret)
goto out;
if (list_empty(&pagelist)) {
ret = -EINVAL;
goto out;
}
if (version == 2) {
/* Zero error array now to only copy back actual errors. */
if (clear_user(m.err, sizeof(int) * m.num)) {
ret = -EFAULT;
goto out;
}
}
down_write(&mm->mmap_sem);
vma = find_vma(mm, m.addr);
if (!vma ||
vma->vm_ops != &privcmd_vm_ops) {
ret = -EINVAL;
goto out_unlock;
}
/*
* Caller must either:
*
* Map the whole VMA range, which will also allocate all the
* pages required for the auto_translated_physmap case.
*
* Or
*
* Map unmapped holes left from a previous map attempt (e.g.,
* because those foreign frames were previously paged out).
*/
if (vma->vm_private_data == NULL) { // vm_private_data: used by driver to store its own information
/*
This IF statement checks if the addresss is different from the VMA start and VMA end
*/
if (m.addr != vma->vm_start ||
m.addr + (nr_pages << PAGE_SHIFT) != vma->vm_end) {
ret = -EINVAL;
goto out_unlock;
}
if (xen_feature(XENFEAT_auto_translated_physmap)) {
ret = alloc_empty_pages(vma, nr_pages);
if (ret < 0)
goto out_unlock;
} else
vma->vm_private_data = PRIV_VMA_LOCKED;
} else {
/*
This IF statement checks if the addresss is less than the VMA start and greater than VMA end
*/
if (m.addr < vma->vm_start ||
m.addr + (nr_pages << PAGE_SHIFT) > vma->vm_end) {
ret = -EINVAL;
goto out_unlock;
}
if (privcmd_vma_range_is_mapped(vma, m.addr, nr_pages)) {
ret = -EINVAL;
goto out_unlock;
}
}
state.domain = m.dom;
state.vma = vma;
state.va = m.addr;
state.index = 0;
state.global_error = 0;
state.version = version;
BUILD_BUG_ON(((PAGE_SIZE / sizeof(xen_pfn_t)) % XEN_PFN_PER_PAGE) != 0);
/* mmap_batch_fn guarantees ret == 0 */
BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t),
&pagelist, mmap_batch_fn, &state));
up_write(&mm->mmap_sem);
if (state.global_error) {
/* Write back errors in second pass. */
state.user_gfn = (xen_pfn_t *)m.arr;
state.user_err = m.err;
ret = traverse_pages_block(m.num, sizeof(xen_pfn_t),
&pagelist, mmap_return_errors, &state);
} else
ret = 0;
/* If we have not had any EFAULT-like global errors then set the global
* error to -ENOENT if necessary. */
if ((ret == 0) && (state.global_error == -ENOENT))
ret = -ENOENT;
out:
free_page_list(&pagelist);
return ret;
out_unlock:
up_write(&mm->mmap_sem);
goto out;
}- The blue highlighted functions,
copy_from_user()andgather_array(), are used to copy data from user-land to kernel-land. - The purble highlighted function
up_down()is an synchronization primitive to lock memory access using a semaphore to prevent concurrency accesss problems and race conditions. Note that in case o failure, the execution is branched to theout_unlocklabel, whre the semaphore in unlocked. - The orange highlighted function
find_vma()searches the given address space for the first memory area whose vm_end field is greater than addr. In other words, this function finds the first memory area that contains addr or begins at an address greater than addr - The green highlighted part is:
There’s something called “ballooning” on Xen. This is a feature/technique used to dynamically adjust the physical memory in use by a guest. From the guest OS perspective, it still has all the memory that it started with; it just has a device driver that’s a real memory hog. But from Xen’s perspective, the memory which the device driver asked for is no longer real memory, it’s just empty space (hence “balloon”). When the administrator wants to give memory back to the VM, the balloon driver will ask Xen to fill the empty space with memory again (shrinking or “deflating” the balloon), and then “free” the resulting pages back to the guest OS (making the memory available for use again).
/* Allocate pfns (frames of machine real-physical memory are identified with Page Frame Numbers) that are then mapped with gfns (Frames of guest pseudo-physical memory are identified by Guest Frame Numbers) from foreign domid. Update
* the vma with the page info to use later.
* Returns: 0 if success, otherwise -errno
*/
static int alloc_empty_pages(structvm_area_struct *vma, int numpgs)
{
int rc;
structpage **pages;
pages = kcalloc(numpgs, sizeof(pages[0]),GFP_KERNEL);
if (pages == NULL)
return -ENOMEM;
rc = alloc_xenballooned_pages(numpgs,pages);
if (rc != 0) {
pr_warn("%s Could not alloc %d pfns rc:%d\n",__func__,
numpgs, rc);
kfree(pages);
return -ENOMEM;
}
BUG_ON(vma->vm_private_data != NULL);
vma->vm_private_data =pages;
return 0;
}
/**
* alloc_xenballooned_pages - get pages that have been ballooned out
* @nr_pages: Number of pages to get
* @pages: pages returned
* @return 0 on success, error otherwise
*/
int alloc_xenballooned_pages(int nr_pages, struct page **pages)
{
int pgno = 0;
struct page *page;
int ret;
mutex_lock(&balloon_mutex);
balloon_stats.target_unpopulated += nr_pages;
while (pgno < nr_pages) {
page = balloon_retrieve(true);
if (page) {
pages[pgno++] = page;
#ifdef CONFIG_XEN_HAVE_PVMMU
/*
* We don't support PV MMU when Linux and Xen is using
* different page granularity.
*/
BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
ret = xen_alloc_p2m_entry(page_to_pfn(page));
if (ret < 0)
goto out_undo;
}
#endif
} else {
ret = add_ballooned_pages(nr_pages - pgno);
if (ret < 0)
goto out_undo;
}
}
mutex_unlock(&balloon_mutex);
return 0;
out_undo:
mutex_unlock(&balloon_mutex);
free_xenballooned_pages(pgno, pages);
return ret;
}
EXPORT_SYMBOL(alloc_xenballooned_pages);>> PAGE TABLE WALK <<
Ohhh boy, this linux page walk implementation is a mess, just read this shit lol
https://lwn.net/Articles/717293/
https://silentming.net/blog/2016/11/30/funny-page-table-terminology/
https://silentming.net/blog/2016/11/30/xen-log-7-guest-linear-page-table/
static int privcmd_vma_range_is_mapped(
struct vm_area_struct *vma,
unsigned long addr,
unsigned long nr_pages)
{
return apply_to_page_range(vma->vm_mm, addr, nr_pages << PAGE_SHIFT,
is_mapped_fn, NULL) != 0;
}
/*
* For MMAPBATCH*. This allows asserting the singleshot mapping
* on a per pfn/pte basis. Mapping calls that fail with ENOENT
* can be then retried until success.
*/
static int is_mapped_fn(pte_t *pte, struct page *pmd_page,
unsigned long addr, void *data)
{
return pte_none(*pte) ? 0 : -EBUSY;
}
/*
* Scan a region of virtual memory, filling in page tables as necessary
* and calling a provided function on each leaf page table.
*/
int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
unsigned long size, pte_fn_t fn, void *data)
{
pgd_t *pgd;
unsigned long next;
unsigned long end = addr + size;
int err;
if (WARN_ON(addr >= end))
return -EINVAL;
pgd = pgd_offset(mm, addr);
do {
next = pgd_addr_end(addr, end);
err = apply_to_p4d_range(mm, pgd, addr, next, fn, data);
if (err)
break;
} while (pgd++, addr = next, addr != end);
return err;
}
EXPORT_SYMBOL_GPL(apply_to_page_range);