Source to osfmk/vm/vm_pageout.c
/*
* Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* The contents of this file constitute Original Code as defined in and
* are subject to the Apple Public Source License Version 1.1 (the
* "License"). You may not use this file except in compliance with the
* License. Please obtain a copy of the License at
* http://www.apple.com/publicsource and read it before using this file.
*
* This Original Code and all software distributed under the License are
* distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
* License for the specific language governing rights and limitations
* under the License.
*
* @APPLE_LICENSE_HEADER_END@
*/
/*
* @OSF_COPYRIGHT@
*/
/*
* Mach Operating System
* Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
* All Rights Reserved.
*
* Permission to use, copy, modify and distribute this software and its
* documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or [email protected]
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie Mellon
* the rights to redistribute these changes.
*/
/*
*/
/*
* File: vm/vm_pageout.c
* Author: Avadis Tevanian, Jr., Michael Wayne Young
* Date: 1985
*
* The proverbial page-out daemon.
*/
#ifdef MACH_BSD
/* remove after component merge */
extern int vnode_pager_workaround;
#endif
#include <mach_pagemap.h>
#include <mach_cluster_stats.h>
#include <mach_kdb.h>
#include <dipc.h>
#include <advisory_pageout.h>
#include <mach/mach_types.h>
#include <mach/memory_object.h>
#include <mach/memory_object_default.h>
#include <mach/mach_host_server.h>
#include <mach/vm_param.h>
#include <mach/vm_statistics.h>
#include <kern/host_statistics.h>
#include <kern/counters.h>
#include <kern/thread.h>
#include <kern/thread_swap.h>
#include <kern/xpr.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#include <machine/vm_tuning.h>
#include <kern/misc_protos.h>
extern ipc_port_t memory_manager_default;
#ifndef VM_PAGE_LAUNDRY_MAX
#define VM_PAGE_LAUNDRY_MAX 10 /* outstanding DMM page cleans */
#endif /* VM_PAGEOUT_LAUNDRY_MAX */
#ifndef VM_PAGEOUT_BURST_MAX
#define VM_PAGEOUT_BURST_MAX 10 /* simultaneous EMM page cleans */
#endif /* VM_PAGEOUT_BURST_MAX */
#ifndef VM_PAGEOUT_DISCARD_MAX
#define VM_PAGEOUT_DISCARD_MAX 68 /* simultaneous EMM page cleans */
#endif /* VM_PAGEOUT_DISCARD_MAX */
#ifndef VM_PAGEOUT_BURST_WAIT
#define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds per page */
#endif /* VM_PAGEOUT_BURST_WAIT */
#ifndef VM_PAGEOUT_EMPTY_WAIT
#define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
#endif /* VM_PAGEOUT_EMPTY_WAIT */
/*
* To obtain a reasonable LRU approximation, the inactive queue
* needs to be large enough to give pages on it a chance to be
* referenced a second time. This macro defines the fraction
* of active+inactive pages that should be inactive.
* The pageout daemon uses it to update vm_page_inactive_target.
*
* If vm_page_free_count falls below vm_page_free_target and
* vm_page_inactive_count is below vm_page_inactive_target,
* then the pageout daemon starts running.
*/
#ifndef VM_PAGE_INACTIVE_TARGET
#define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 2 / 3)
#endif /* VM_PAGE_INACTIVE_TARGET */
/*
* Once the pageout daemon starts running, it keeps going
* until vm_page_free_count meets or exceeds vm_page_free_target.
*/
#ifndef VM_PAGE_FREE_TARGET
#define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
#endif /* VM_PAGE_FREE_TARGET */
/*
* The pageout daemon always starts running once vm_page_free_count
* falls below vm_page_free_min.
*/
#ifndef VM_PAGE_FREE_MIN
#define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
#endif /* VM_PAGE_FREE_MIN */
/*
* When vm_page_free_count falls below vm_page_free_reserved,
* only vm-privileged threads can allocate pages. vm-privilege
* allows the pageout daemon and default pager (and any other
* associated threads needed for default pageout) to continue
* operation by dipping into the reserved pool of pages.
*/
#ifndef VM_PAGE_FREE_RESERVED
#define VM_PAGE_FREE_RESERVED \
((8 * VM_PAGE_LAUNDRY_MAX) + NCPUS)
#endif /* VM_PAGE_FREE_RESERVED */
/*
* Forward declarations for internal routines.
*/
extern void vm_pageout_continue(void);
extern void vm_pageout_scan(void);
extern void vm_pageout_throttle(vm_page_t m);
extern vm_page_t vm_pageout_cluster_page(
vm_object_t object,
vm_offset_t offset,
boolean_t precious_clean);
unsigned int vm_pageout_reserved_internal = 0;
unsigned int vm_pageout_reserved_really = 0;
unsigned int vm_page_laundry_max = 0; /* # of clusters outstanding */
unsigned int vm_page_laundry_min = 0;
unsigned int vm_pageout_burst_max = 0;
unsigned int vm_pageout_burst_wait = 0; /* milliseconds per page */
unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
unsigned int vm_pageout_burst_min = 0;
unsigned int vm_pageout_pause_count = 0;
unsigned int vm_pageout_pause_max = 0;
unsigned int vm_free_page_pause = 100; /* milliseconds */
/*
* These variables record the pageout daemon's actions:
* how many pages it looks at and what happens to those pages.
* No locking needed because only one thread modifies the variables.
*/
unsigned int vm_pageout_active = 0; /* debugging */
unsigned int vm_pageout_inactive = 0; /* debugging */
unsigned int vm_pageout_inactive_throttled = 0; /* debugging */
unsigned int vm_pageout_inactive_forced = 0; /* debugging */
unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
unsigned int vm_pageout_inactive_avoid = 0; /* debugging */
unsigned int vm_pageout_inactive_busy = 0; /* debugging */
unsigned int vm_pageout_inactive_absent = 0; /* debugging */
unsigned int vm_pageout_inactive_used = 0; /* debugging */
unsigned int vm_pageout_inactive_clean = 0; /* debugging */
unsigned int vm_pageout_inactive_dirty = 0; /* debugging */
unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */
unsigned int vm_pageout_inactive_pinned = 0; /* debugging */
unsigned int vm_pageout_inactive_limbo = 0; /* debugging */
unsigned int vm_pageout_setup_limbo = 0; /* debugging */
unsigned int vm_pageout_setup_unprepped = 0; /* debugging */
unsigned int vm_stat_discard = 0; /* debugging */
unsigned int vm_stat_discard_sent = 0; /* debugging */
unsigned int vm_stat_discard_failure = 0; /* debugging */
unsigned int vm_stat_discard_throttle = 0; /* debugging */
unsigned int vm_pageout_scan_active_emm_throttle = 0; /* debugging */
unsigned int vm_pageout_scan_active_emm_throttle_success = 0; /* debugging */
unsigned int vm_pageout_scan_active_emm_throttle_failure = 0; /* debugging */
unsigned int vm_pageout_scan_inactive_emm_throttle = 0; /* debugging */
unsigned int vm_pageout_scan_inactive_emm_throttle_success = 0; /* debugging */
unsigned int vm_pageout_scan_inactive_emm_throttle_failure = 0; /* debugging */
unsigned int vm_pageout_out_of_line = 0;
unsigned int vm_pageout_in_place = 0;
/*
* Routine: vm_pageout_object_allocate
* Purpose:
* Allocate an object for use as out-of-line memory in a
* data_return/data_initialize message.
* The page must be in an unlocked object.
*
* If the page belongs to a trusted pager, cleaning in place
* will be used, which utilizes a special "pageout object"
* containing private alias pages for the real page frames.
* Untrusted pagers use normal out-of-line memory.
*/
vm_object_t
vm_pageout_object_allocate(
vm_page_t m,
vm_size_t size,
vm_offset_t offset)
{
vm_object_t object = m->object;
vm_object_t new_object;
assert(object->pager_ready);
if (object->pager_trusted || object->internal)
vm_pageout_throttle(m);
new_object = vm_object_allocate(size);
if (object->pager_trusted) {
assert (offset < object->size);
vm_object_lock(new_object);
new_object->pageout = TRUE;
new_object->shadow = object;
new_object->can_persist = FALSE;
new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
new_object->shadow_offset = offset;
vm_object_unlock(new_object);
/*
* Take a paging reference on the object. This will be dropped
* in vm_pageout_object_terminate()
*/
vm_object_lock(object);
vm_object_paging_begin(object);
vm_object_unlock(object);
vm_pageout_in_place++;
} else
vm_pageout_out_of_line++;
return(new_object);
}
#if MACH_CLUSTER_STATS
unsigned long vm_pageout_cluster_dirtied = 0;
unsigned long vm_pageout_cluster_cleaned = 0;
unsigned long vm_pageout_cluster_collisions = 0;
unsigned long vm_pageout_cluster_clusters = 0;
unsigned long vm_pageout_cluster_conversions = 0;
unsigned long vm_pageout_target_collisions = 0;
unsigned long vm_pageout_target_page_dirtied = 0;
unsigned long vm_pageout_target_page_freed = 0;
unsigned long vm_pageout_target_page_pinned = 0;
unsigned long vm_pageout_target_page_limbo = 0;
#define CLUSTER_STAT(clause) clause
#else /* MACH_CLUSTER_STATS */
#define CLUSTER_STAT(clause)
#endif /* MACH_CLUSTER_STATS */
/*
* Routine: vm_pageout_object_terminate
* Purpose:
* Destroy the pageout_object allocated by
* vm_pageout_object_allocate(), and perform all of the
* required cleanup actions.
*
* In/Out conditions:
* The object must be locked, and will be returned locked.
*/
void
vm_pageout_object_terminate(
vm_object_t object)
{
vm_object_t shadow_object;
/*
* Deal with the deallocation (last reference) of a pageout object
* (used for cleaning-in-place) by dropping the paging references/
* freeing pages in the original object.
*/
assert(object->pageout);
shadow_object = object->shadow;
vm_object_lock(shadow_object);
while (!queue_empty(&object->memq)) {
vm_page_t p, m;
vm_offset_t offset;
p = (vm_page_t) queue_first(&object->memq);
assert(p->private);
assert(p->pageout);
p->pageout = FALSE;
assert(!p->cleaning);
offset = p->offset;
VM_PAGE_FREE(p);
p = VM_PAGE_NULL;
m = vm_page_lookup(shadow_object,
offset + object->shadow_offset);
if(m == VM_PAGE_NULL)
continue;
assert(m->cleaning);
/*
* Account for the paging reference taken when
* m->cleaning was set on this page.
*/
vm_object_paging_end(shadow_object);
assert((m->dirty) || (m->precious) ||
(m->busy && m->cleaning));
/*
* Handle the trusted pager throttle.
*/
vm_page_lock_queues();
if (m->laundry) {
vm_page_laundry_count--;
m->laundry = FALSE;
if (vm_page_laundry_count < vm_page_laundry_min) {
vm_page_laundry_min = 0;
thread_wakeup((event_t) &vm_page_laundry_count);
}
}
/*
* Handle the "target" page(s). These pages are to be freed if
* successfully cleaned. Target pages are always busy, and are
* wired exactly once. The initial target pages are not mapped,
* (so cannot be referenced or modified) but converted target
* pages may have been modified between the selection as an
* adjacent page and conversion to a target.
*/
if (m->pageout) {
assert(m->busy);
assert(m->wire_count == 1);
m->cleaning = FALSE;
m->pageout = FALSE;
#if MACH_CLUSTER_STATS
if (m->wanted) vm_pageout_target_collisions++;
#endif
/*
* Revoke all access to the page. Since the object is
* locked, and the page is busy, this prevents the page
* from being dirtied after the pmap_is_modified() call
* returns.
*/
pmap_page_protect(m->phys_addr, VM_PROT_NONE);
/*
* Since the page is left "dirty" but "not modifed", we
* can detect whether the page was redirtied during
* pageout by checking the modify state.
*/
m->dirty = pmap_is_modified(m->phys_addr);
if (m->dirty) {
CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
vm_page_unwire(m);/* reactivates */
VM_STAT(reactivations++);
PAGE_WAKEUP_DONE(m);
} else if (m->prep_pin_count != 0) {
vm_page_pin_lock();
if (m->pin_count != 0) {
/* page is pinned; reactivate */
CLUSTER_STAT(
vm_pageout_target_page_pinned++;)
vm_page_unwire(m);/* reactivates */
VM_STAT(reactivations++);
PAGE_WAKEUP_DONE(m);
} else {
/*
* page is prepped but not pinned; send
* it into limbo. Note that
* vm_page_free (which will be called
* after releasing the pin lock) knows
* how to handle a page with limbo set.
*/
m->limbo = TRUE;
CLUSTER_STAT(
vm_pageout_target_page_limbo++;)
}
vm_page_pin_unlock();
if (m->limbo)
vm_page_free(m);
} else {
CLUSTER_STAT(vm_pageout_target_page_freed++;)
vm_page_free(m);/* clears busy, etc. */
}
vm_page_unlock_queues();
continue;
}
/*
* Handle the "adjacent" pages. These pages were cleaned in
* place, and should be left alone.
* If prep_pin_count is nonzero, then someone is using the
* page, so make it active.
*/
if (!m->active && !m->inactive) {
if (m->reference || m->prep_pin_count != 0)
vm_page_activate(m);
else
vm_page_deactivate(m);
}
if((m->busy) && (m->cleaning)) {
/* the request_page_list case, (COPY_OUT_FROM FALSE) */
m->busy = FALSE;
/* We do not re-set m->dirty ! */
/* The page was busy so no extraneous activity */
/* could have occured. COPY_INTO is a read into the */
/* new pages. CLEAN_IN_PLACE does actually write */
/* out the pages but handling outside of this code */
/* will take care of resetting dirty. We clear the */
/* modify however for the Programmed I/O case. */
pmap_clear_modify(m->phys_addr);
if(m->absent) {
m->absent = FALSE;
if(shadow_object->absent_count == 1)
vm_object_absent_release(shadow_object);
else
shadow_object->absent_count--;
}
m->overwriting = FALSE;
} else if (m->overwriting) {
/* alternate request page list, write to page_list */
/* case. Occurs when the original page was wired */
/* at the time of the list request */
assert(m->wire_count != 0);
vm_page_unwire(m);/* reactivates */
m->overwriting = FALSE;
} else {
/*
* Set the dirty state according to whether or not the page was
* modified during the pageout. Note that we purposefully do
* NOT call pmap_clear_modify since the page is still mapped.
* If the page were to be dirtied between the 2 calls, this
* this fact would be lost. This code is only necessary to
* maintain statistics, since the pmap module is always
* consulted if m->dirty is false.
*/
#if MACH_CLUSTER_STATS
m->dirty = pmap_is_modified(m->phys_addr);
if (m->dirty) vm_pageout_cluster_dirtied++;
else vm_pageout_cluster_cleaned++;
if (m->wanted) vm_pageout_cluster_collisions++;
#else
m->dirty = 0;
#endif
}
m->cleaning = FALSE;
/*
* Wakeup any thread waiting for the page to be un-cleaning.
*/
PAGE_WAKEUP(m);
vm_page_unlock_queues();
}
/*
* Account for the paging reference taken in vm_paging_object_allocate.
*/
vm_object_paging_end(shadow_object);
vm_object_unlock(shadow_object);
assert(object->ref_count == 0);
assert(object->paging_in_progress == 0);
assert(object->resident_page_count == 0);
return;
}
/*
* Routine: vm_pageout_setup
* Purpose:
* Set up a page for pageout (clean & flush).
*
* Move the page to a new object, as part of which it will be
* sent to its memory manager in a memory_object_data_write or
* memory_object_initialize message.
*
* The "new_object" and "new_offset" arguments
* indicate where the page should be moved.
*
* In/Out conditions:
* The page in question must not be on any pageout queues,
* and must be busy. The object to which it belongs
* must be unlocked, and the caller must hold a paging
* reference to it. The new_object must not be locked.
*
* This routine returns a pointer to a place-holder page,
* inserted at the same offset, to block out-of-order
* requests for the page. The place-holder page must
* be freed after the data_write or initialize message
* has been sent.
*
* The original page is put on a paging queue and marked
* not busy on exit.
*/
vm_page_t
vm_pageout_setup(
register vm_page_t m,
register vm_object_t new_object,
vm_offset_t new_offset)
{
register vm_object_t old_object = m->object;
vm_offset_t paging_offset;
vm_offset_t offset;
register vm_page_t holding_page;
register vm_page_t new_m;
register vm_page_t new_page;
boolean_t need_to_wire = FALSE;
XPR(XPR_VM_PAGEOUT,
"vm_pageout_setup, obj 0x%X off 0x%X page 0x%X new obj 0x%X offset 0x%X\n",
(integer_t)m->object, (integer_t)m->offset,
(integer_t)m, (integer_t)new_object,
(integer_t)new_offset);
assert(m && m->busy && !m->absent && !m->fictitious && !m->error &&
!m->restart);
assert(m->dirty || m->precious);
/*
* Create a place-holder page where the old one was, to prevent
* attempted pageins of this page while we're unlocked.
* If the pageout daemon put this page in limbo and we're not
* going to clean in place, get another fictitious page to
* exchange for it now.
*/
VM_PAGE_GRAB_FICTITIOUS(holding_page);
if (m->limbo)
VM_PAGE_GRAB_FICTITIOUS(new_page);
vm_object_lock(old_object);
offset = m->offset;
paging_offset = offset + old_object->paging_offset;
if (old_object->pager_trusted) {
/*
* This pager is trusted, so we can clean this page
* in place. Leave it in the old object, and mark it
* cleaning & pageout.
*/
new_m = holding_page;
holding_page = VM_PAGE_NULL;
/*
* If the pageout daemon put this page in limbo, exchange the
* identities of the limbo page and the new fictitious page,
* and continue with the new page, unless the prep count has
* gone to zero in the meantime (which means no one is
* interested in the page any more). In that case, just clear
* the limbo bit and free the extra fictitious page.
*/
if (m->limbo) {
if (m->prep_pin_count == 0) {
/* page doesn't have to be in limbo any more */
m->limbo = FALSE;
vm_page_free(new_page);
vm_pageout_setup_unprepped++;
} else {
vm_page_lock_queues();
VM_PAGE_QUEUES_REMOVE(m);
vm_page_remove(m);
vm_page_limbo_exchange(m, new_page);
vm_pageout_setup_limbo++;
vm_page_release_limbo(m);
m = new_page;
vm_page_insert(m, old_object, offset);
vm_page_unlock_queues();
}
}
/*
* Set up new page to be private shadow of real page.
*/
new_m->phys_addr = m->phys_addr;
new_m->fictitious = FALSE;
new_m->private = TRUE;
new_m->pageout = TRUE;
/*
* Mark real page as cleaning (indicating that we hold a
* paging reference to be released via m_o_d_r_c) and
* pageout (indicating that the page should be freed
* when the pageout completes).
*/
pmap_clear_modify(m->phys_addr);
vm_page_lock_queues();
vm_page_wire(new_m);
m->cleaning = TRUE;
m->pageout = TRUE;
vm_page_wire(m);
assert(m->wire_count == 1);
vm_page_unlock_queues();
m->dirty = TRUE;
m->precious = FALSE;
m->page_lock = VM_PROT_NONE;
m->unusual = FALSE;
m->unlock_request = VM_PROT_NONE;
} else {
/*
* Cannot clean in place, so rip the old page out of the
* object, and stick the holding page in. Set new_m to the
* page in the new object.
*/
vm_page_lock_queues();
VM_PAGE_QUEUES_REMOVE(m);
vm_page_remove(m);
/*
* If the pageout daemon put this page in limbo, exchange the
* identities of the limbo page and the new fictitious page,
* and continue with the new page, unless the prep count has
* gone to zero in the meantime (which means no one is
* interested in the page any more). In that case, just clear
* the limbo bit and free the extra fictitious page.
*/
if (m->limbo) {
if (m->prep_pin_count == 0) {
/* page doesn't have to be in limbo any more */
m->limbo = FALSE;
vm_page_free(new_page);
vm_pageout_setup_unprepped++;
} else {
vm_page_limbo_exchange(m, new_page);
vm_pageout_setup_limbo++;
vm_page_release_limbo(m);
m = new_page;
}
}
vm_page_insert(holding_page, old_object, offset);
vm_page_unlock_queues();
m->dirty = TRUE;
m->precious = FALSE;
new_m = m;
new_m->page_lock = VM_PROT_NONE;
new_m->unlock_request = VM_PROT_NONE;
if (old_object->internal)
need_to_wire = TRUE;
}
/*
* Record that this page has been written out
*/
#if MACH_PAGEMAP
vm_external_state_set(old_object->existence_map, offset);
#endif /* MACH_PAGEMAP */
vm_object_unlock(old_object);
vm_object_lock(new_object);
/*
* Put the page into the new object. If it is a not wired
* (if it's the real page) it will be activated.
*/
vm_page_lock_queues();
vm_page_insert(new_m, new_object, new_offset);
if (need_to_wire)
vm_page_wire(new_m);
else
vm_page_activate(new_m);
PAGE_WAKEUP_DONE(new_m);
vm_page_unlock_queues();
vm_object_unlock(new_object);
/*
* Return the placeholder page to simplify cleanup.
*/
return (holding_page);
}
/*
* Routine: vm_pageclean_setup
*
* Purpose: setup a page to be cleaned (made non-dirty), but not
* necessarily flushed from the VM page cache.
* This is accomplished by cleaning in place.
*
* The page must not be busy, and the object and page
* queues must be locked.
*
*/
void
vm_pageclean_setup(
vm_page_t m,
vm_page_t new_m,
vm_object_t new_object,
vm_offset_t new_offset)
{
vm_object_t old_object = m->object;
assert(!m->busy);
assert(!m->cleaning);
XPR(XPR_VM_PAGEOUT,
"vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
(integer_t)old_object, m->offset, (integer_t)m,
(integer_t)new_m, new_offset);
pmap_clear_modify(m->phys_addr);
vm_object_paging_begin(old_object);
/*
* Record that this page has been written out
*/
#if MACH_PAGEMAP
vm_external_state_set(old_object->existence_map, m->offset);
#endif /*MACH_PAGEMAP*/
/*
* Mark original page as cleaning in place.
*/
m->cleaning = TRUE;
m->dirty = TRUE;
m->precious = FALSE;
/*
* Convert the fictitious page to a private shadow of
* the real page.
*/
assert(new_m->fictitious);
new_m->fictitious = FALSE;
new_m->private = TRUE;
new_m->pageout = TRUE;
new_m->phys_addr = m->phys_addr;
vm_page_wire(new_m);
vm_page_insert(new_m, new_object, new_offset);
assert(!new_m->wanted);
new_m->busy = FALSE;
}
void
vm_pageclean_copy(
vm_page_t m,
vm_page_t new_m,
vm_object_t new_object,
vm_offset_t new_offset)
{
XPR(XPR_VM_PAGEOUT,
"vm_pageclean_copy, page 0x%X new_m 0x%X new_obj 0x%X offset 0x%X\n",
m, new_m, new_object, new_offset, 0);
assert((!m->busy) && (!m->cleaning));
assert(!new_m->private && !new_m->fictitious);
pmap_clear_modify(m->phys_addr);
m->busy = TRUE;
vm_object_paging_begin(m->object);
vm_page_unlock_queues();
vm_object_unlock(m->object);
/*
* Copy the original page to the new page.
*/
vm_page_copy(m, new_m);
/*
* Mark the old page as clean. A request to pmap_is_modified
* will get the right answer.
*/
vm_object_lock(m->object);
m->dirty = FALSE;
vm_object_paging_end(m->object);
vm_page_lock_queues();
if (!m->active && !m->inactive)
vm_page_activate(m);
PAGE_WAKEUP_DONE(m);
vm_page_insert(new_m, new_object, new_offset);
vm_page_activate(new_m);
new_m->busy = FALSE; /* No other thread can be waiting */
}
/*
* Routine: vm_pageout_initialize_page
* Purpose:
* Causes the specified page to be initialized in
* the appropriate memory object. This routine is used to push
* pages into a copy-object when they are modified in the
* permanent object.
*
* The page is moved to a temporary object and paged out.
*
* In/out conditions:
* The page in question must not be on any pageout queues.
* The object to which it belongs must be locked.
* The page must be busy, but not hold a paging reference.
*
* Implementation:
* Move this page to a completely new object.
*/
void
vm_pageout_initialize_page(
vm_page_t m)
{
vm_map_copy_t copy;
vm_object_t new_object;
vm_object_t object;
vm_offset_t paging_offset;
vm_page_t holding_page;
XPR(XPR_VM_PAGEOUT,
"vm_pageout_initialize_page, page 0x%X\n",
(integer_t)m, 0, 0, 0, 0);
assert(m->busy);
/*
* Verify that we really want to clean this page
*/
assert(!m->absent);
assert(!m->error);
assert(m->dirty);
/*
* Create a paging reference to let us play with the object.
*/
object = m->object;
paging_offset = m->offset + object->paging_offset;
vm_object_paging_begin(object);
vm_object_unlock(object);
if (m->absent || m->error || m->restart ||
(!m->dirty && !m->precious)) {
VM_PAGE_FREE(m);
panic("reservation without pageout?"); /* alan */
return;
}
/* set the page for future call to vm_fault_list_request */
holding_page = NULL;
vm_object_lock(m->object);
vm_page_lock_queues();
pmap_clear_modify(m->phys_addr);
m->dirty = TRUE;
m->busy = TRUE;
m->list_req_pending = TRUE;
m->cleaning = TRUE;
m->pageout = TRUE;
vm_page_wire(m);
vm_page_unlock_queues();
vm_object_unlock(m->object);
vm_pageout_throttle(m);
copy = NULL;
VM_STAT(pageouts++);
/* VM_STAT(pages_pagedout++); */
/*
* Write the data to its pager.
* Note that the data is passed by naming the new object,
* not a virtual address; the pager interface has been
* manipulated to use the "internal memory" data type.
* [The object reference from its allocation is donated
* to the eventual recipient.]
*/
memory_object_data_initialize(object->pager,
object->pager_request,
paging_offset,
POINTER_T(copy),
PAGE_SIZE);
vm_object_lock(object);
}
#if MACH_CLUSTER_STATS
#define MAXCLUSTERPAGES 16
struct {
unsigned long pages_in_cluster;
unsigned long pages_at_higher_offsets;
unsigned long pages_at_lower_offsets;
} cluster_stats[MAXCLUSTERPAGES];
#endif /* MACH_CLUSTER_STATS */
boolean_t allow_clustered_pageouts = TRUE;
/*
* vm_pageout_cluster:
*
* Given a page, page it out, and attempt to clean adjacent pages
* in the same operation.
*
* The page must be busy, and the object unlocked w/ paging reference
* to prevent deallocation or collapse. The page must not be on any
* pageout queue.
*/
void
vm_pageout_cluster(
vm_page_t m)
{
vm_object_t object = m->object;
vm_offset_t offset = m->offset; /* from vm_object start */
vm_offset_t paging_offset = m->offset + object->paging_offset;
vm_object_t new_object;
vm_offset_t new_offset;
vm_size_t cluster_size;
vm_offset_t cluster_offset; /* from memory_object start */
vm_offset_t cluster_lower_bound; /* from vm_object_start */
vm_offset_t cluster_upper_bound; /* from vm_object_start */
vm_offset_t cluster_start, cluster_end; /* from vm_object start */
vm_offset_t offset_within_cluster;
vm_size_t length_of_data;
vm_page_t friend, holding_page;
vm_map_copy_t copy;
kern_return_t rc;
boolean_t precious_clean = FALSE;
int pages_in_cluster;
CLUSTER_STAT(int pages_at_higher_offsets = 0;)
CLUSTER_STAT(int pages_at_lower_offsets = 0;)
XPR(XPR_VM_PAGEOUT,
"vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
(integer_t)object, offset, (integer_t)m, 0, 0);
CLUSTER_STAT(vm_pageout_cluster_clusters++;)
/*
* Only a certain kind of page is appreciated here.
*/
assert(m->busy && (m->dirty || m->precious) && (m->wire_count == 0));
assert(!m->cleaning && !m->pageout && !m->inactive && !m->active);
vm_object_lock(object);
cluster_size = object->cluster_size;
assert(cluster_size >= PAGE_SIZE);
if (cluster_size < PAGE_SIZE) cluster_size = PAGE_SIZE;
assert(object->pager_created && object->pager_initialized);
assert(object->internal || object->pager_ready);
if (m->precious && !m->dirty)
precious_clean = TRUE;
if (!object->pager_trusted || !allow_clustered_pageouts)
cluster_size = PAGE_SIZE;
vm_object_unlock(object);
cluster_offset = paging_offset & (cluster_size - 1);
/* bytes from beginning of cluster */
/*
* Due to unaligned mappings, we have to be careful
* of negative offsets into the VM object. Clip the cluster
* boundary to the VM object, not the memory object.
*/
if (offset > cluster_offset) {
cluster_lower_bound = offset - cluster_offset;
/* from vm_object */
} else {
cluster_lower_bound = 0;
}
cluster_upper_bound = (offset - cluster_offset) + cluster_size;
/* set the page for future call to vm_fault_list_request */
holding_page = NULL;
vm_object_lock(m->object);
vm_page_lock_queues();
pmap_clear_modify(m->phys_addr);
m->dirty = TRUE;
m->busy = TRUE;
m->list_req_pending = TRUE;
m->cleaning = TRUE;
m->pageout = TRUE;
vm_page_wire(m);
vm_page_unlock_queues();
vm_object_unlock(m->object);
vm_pageout_throttle(m);
/*
* Search backward for adjacent eligible pages to clean in
* this operation.
*/
cluster_start = offset;
if (offset) { /* avoid wrap-around at zero */
for (cluster_start = offset - PAGE_SIZE;
cluster_start >= cluster_lower_bound;
cluster_start -= PAGE_SIZE) {
assert(cluster_size > PAGE_SIZE);
vm_object_lock(object);
vm_page_lock_queues();
if ((friend = vm_pageout_cluster_page(object, cluster_start,
precious_clean)) == VM_PAGE_NULL) {
vm_page_unlock_queues();
vm_object_unlock(object);
break;
}
new_offset = (cluster_start + object->paging_offset)
& (cluster_size - 1);
assert(new_offset < cluster_offset);
pmap_clear_modify(m->phys_addr);
m->dirty = TRUE;
m->list_req_pending = TRUE;
m->cleaning = TRUE;
/* do nothing except advance the write request, all we really need to */
/* do is push the target page and let the code at the other end decide */
/* what is really the right size */
if (vm_page_free_count <= vm_page_free_reserved) {
m->busy = TRUE;
m->pageout = TRUE;
vm_page_wire(m);
}
vm_page_unlock_queues();
vm_object_unlock(object);
CLUSTER_STAT(pages_at_lower_offsets++;)
}
cluster_start += PAGE_SIZE;
}
assert(cluster_start >= cluster_lower_bound);
assert(cluster_start <= offset);
/*
* Search forward for adjacent eligible pages to clean in
* this operation.
*/
for (cluster_end = offset + PAGE_SIZE;
cluster_end < cluster_upper_bound;
cluster_end += PAGE_SIZE) {
assert(cluster_size > PAGE_SIZE);
vm_object_lock(object);
vm_page_lock_queues();
if ((friend = vm_pageout_cluster_page(object, cluster_end,
precious_clean)) == VM_PAGE_NULL) {
vm_page_unlock_queues();
vm_object_unlock(object);
break;
}
new_offset = (cluster_end + object->paging_offset)
& (cluster_size - 1);
assert(new_offset < cluster_size);
pmap_clear_modify(m->phys_addr);
m->dirty = TRUE;
m->list_req_pending = TRUE;
m->cleaning = TRUE;
/* do nothing except advance the write request, all we really need to */
/* do is push the target page and let the code at the other end decide */
/* what is really the right size */
if (vm_page_free_count <= vm_page_free_reserved) {
m->busy = TRUE;
m->pageout = TRUE;
vm_page_wire(m);
}
vm_page_unlock_queues();
vm_object_unlock(object);
CLUSTER_STAT(pages_at_higher_offsets++;)
}
assert(cluster_end <= cluster_upper_bound);
assert(cluster_end >= offset + PAGE_SIZE);
/*
* (offset - cluster_offset) is beginning of cluster_object
* relative to vm_object start.
*/
offset_within_cluster = cluster_start - (offset - cluster_offset);
length_of_data = cluster_end - cluster_start;
assert(offset_within_cluster < cluster_size);
assert((offset_within_cluster + length_of_data) <= cluster_size);
rc = KERN_SUCCESS;
assert(rc == KERN_SUCCESS);
pages_in_cluster = length_of_data/PAGE_SIZE;
VM_STAT(pageouts++);
/* VM_STAT(pages_pagedout += pages_in_cluster); */
#if MACH_CLUSTER_STATS
(cluster_stats[pages_at_lower_offsets].pages_at_lower_offsets)++;
(cluster_stats[pages_at_higher_offsets].pages_at_higher_offsets)++;
(cluster_stats[pages_in_cluster].pages_in_cluster)++;
#endif /* MACH_CLUSTER_STATS */
/*
* Send the data to the pager.
*/
paging_offset = cluster_start + object->paging_offset;
#ifdef MACH_BSD
if(((rpc_subsystem_t)pager_mux_hash_lookup(object->pager)) ==
((rpc_subsystem_t) &vnode_pager_workaround)) {
rc = vnode_pager_data_return(object->pager,
object->pager_request,
paging_offset,
POINTER_T(copy),
length_of_data,
!precious_clean,
FALSE);
} else {
rc = memory_object_data_return(object->pager,
object->pager_request,
paging_offset,
POINTER_T(copy),
length_of_data,
!precious_clean,
FALSE);
}
#else
rc = memory_object_data_return(object->pager,
object->pager_request,
paging_offset,
POINTER_T(copy),
length_of_data,
!precious_clean,
FALSE);
#endif
vm_object_paging_end(object);
if (rc != KERN_SUCCESS)
vm_map_copy_discard(copy);
if (holding_page) {
assert(!object->pager_trusted);
vm_object_lock(object);
VM_PAGE_FREE(holding_page);
vm_object_paging_end(object);
vm_object_unlock(object);
}
}
/*
* vm_pageout_return_write_pages
* Recover pages from an aborted write attempt
*
*/
vm_pageout_return_write_pages(
ipc_port_t control_port,
vm_offset_t object_offset,
vm_map_copy_t copy)
{
vm_object_t object;
int offset;
int size;
int shadow_offset;
int copy_offset;
int j;
vm_page_t m;
object = copy->cpy_object;
copy_offset = copy->offset;
size = copy->size;
if((copy->type != VM_MAP_COPY_OBJECT) || (object->shadow == 0)) {
object = (vm_object_t)control_port->ip_kobject;
shadow_offset = (object_offset - object->paging_offset)
- copy->offset;
} else {
/* get the offset from the copy object */
shadow_offset = object->shadow_offset;
/* find the backing object */
object = object->shadow;
}
vm_object_lock(object);
for(offset = 0, j=0; offset < size; offset+=page_size, j++) {
m = vm_page_lookup(object,
offset + shadow_offset + copy_offset);
if((m == VM_PAGE_NULL) || m->fictitious) {
vm_page_t p;
int i;
vm_object_t copy_object;
/* m might be fictitious if the original page */
/* was found to be in limbo at the time of */
/* vm_pageout_setup */
if((m != VM_PAGE_NULL) && m->fictitious) {
m->cleaning = FALSE;
vm_page_remove(m);
/* if object is not pager trusted then */
/* this fictitious page will be removed */
/* as the holding page in vm_pageout_cluster */
if (object->pager_trusted)
vm_page_free(m);
if(vm_page_laundry_count)
vm_page_laundry_count--;
if (vm_page_laundry_count
< vm_page_laundry_min) {
vm_page_laundry_min = 0;
thread_wakeup((event_t)
&vm_page_laundry_count);
}
}
else if ((object->pager_trusted) &&
(copy->type == VM_MAP_COPY_OBJECT)) {
vm_object_paging_end(object);
}
copy_object = copy->cpy_object;
if(copy->type == VM_MAP_COPY_OBJECT) {
p = (vm_page_t) queue_first(©_object->memq);
for(i = 0;
i < copy_object->resident_page_count;
i++) {
if(p->offset == (offset + copy_offset))
break;
p = (vm_page_t) queue_next(&p->listq);
}
vm_page_remove(p);
} else {
p = copy->cpy_page_list[j];
copy->cpy_page_list[j] = 0;
p->gobbled = FALSE;
}
vm_page_insert(p, object,
offset + shadow_offset + copy_offset);
p->busy = TRUE;
p->dirty = TRUE;
p->laundry = FALSE;
if (p->pageout) {
p->pageout = FALSE; /*dont throw away target*/
vm_page_unwire(p);/* reactivates */
}
} else if(m->pageout) {
m->pageout = FALSE; /* dont throw away target pages */
vm_page_unwire(m);/* reactivates */
}
}
vm_object_unlock(object);
vm_map_copy_discard(copy);
vm_object_lock(object);
for(offset = 0; offset < size; offset+=page_size) {
m = vm_page_lookup(object,
offset + shadow_offset + copy_offset);
m->dirty = TRUE; /* we'll send the pages home later */
m->busy = FALSE; /* allow system access again */
}
vm_object_unlock(object);
}
/*
* Trusted pager throttle.
* Object must be unlocked, page queues must be unlocked.
*/
void
vm_pageout_throttle(
register vm_page_t m)
{
vm_page_lock_queues();
assert(!m->laundry);
m->laundry = TRUE;
while (vm_page_laundry_count >= vm_page_laundry_max) {
/*
* Set the threshold for when vm_page_free()
* should wake us up.
*/
vm_page_laundry_min = vm_page_laundry_max/2;
assert_wait((event_t) &vm_page_laundry_count, THREAD_UNINT);
vm_page_unlock_queues();
/*
* Pause to let the default pager catch up.
*/
thread_block((void (*)(void)) 0);
vm_page_lock_queues();
}
vm_page_laundry_count++;
vm_page_unlock_queues();
}
/*
* The global variable vm_pageout_clean_active_pages controls whether
* active pages are considered valid to be cleaned in place during a
* clustered pageout. Performance measurements are necessary to determine
* the best policy.
*/
int vm_pageout_clean_active_pages = 1;
/*
* vm_pageout_cluster_page: [Internal]
*
* return a vm_page_t to the page at (object,offset) if it is appropriate
* to clean in place. Pages that are non-existent, busy, absent, already
* cleaning, or not dirty are not eligible to be cleaned as an adjacent
* page in a cluster.
*
* The object must be locked on entry, and remains locked throughout
* this call.
*/
vm_page_t
vm_pageout_cluster_page(
vm_object_t object,
vm_offset_t offset,
boolean_t precious_clean)
{
vm_page_t m;
XPR(XPR_VM_PAGEOUT,
"vm_pageout_cluster_page, object 0x%X offset 0x%X\n",
(integer_t)object, offset, 0, 0, 0);
if ((m = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
return(VM_PAGE_NULL);
if (m->busy || m->absent || m->cleaning ||
m->prep_pin_count != 0 ||
(m->wire_count != 0) || m->error)
return(VM_PAGE_NULL);
if (vm_pageout_clean_active_pages) {
if (!m->active && !m->inactive) return(VM_PAGE_NULL);
} else {
if (!m->inactive) return(VM_PAGE_NULL);
}
assert(!m->private);
assert(!m->fictitious);
if (!m->dirty) m->dirty = pmap_is_modified(m->phys_addr);
if (precious_clean) {
if (!m->precious || m->dirty)
return(VM_PAGE_NULL);
} else {
if (!m->dirty)
return(VM_PAGE_NULL);
}
return(m);
}
/*
* vm_pageout_scan does the dirty work for the pageout daemon.
* It returns with vm_page_queue_free_lock held and
* vm_page_free_wanted == 0.
*/
extern void vm_pageout_scan_continue(void); /* forward; */
void
vm_pageout_scan(void)
{
unsigned int burst_count;
boolean_t now = FALSE;
unsigned int laundry_pages;
boolean_t need_more_inactive_pages;
unsigned int loop_detect;
XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
/*???*/ /*
* We want to gradually dribble pages from the active queue
* to the inactive queue. If we let the inactive queue get
* very small, and then suddenly dump many pages into it,
* those pages won't get a sufficient chance to be referenced
* before we start taking them from the inactive queue.
*
* We must limit the rate at which we send pages to the pagers.
* data_write messages consume memory, for message buffers and
* for map-copy objects. If we get too far ahead of the pagers,
* we can potentially run out of memory.
*
* We can use the laundry count to limit directly the number
* of pages outstanding to the default pager. A similar
* strategy for external pagers doesn't work, because
* external pagers don't have to deallocate the pages sent them,
* and because we might have to send pages to external pagers
* even if they aren't processing writes. So we also
* use a burst count to limit writes to external pagers.
*
* When memory is very tight, we can't rely on external pagers to
* clean pages. They probably aren't running, because they
* aren't vm-privileged. If we kept sending dirty pages to them,
* we could exhaust the free list. However, we can't just ignore
* pages belonging to external objects, because there might be no
* pages belonging to internal objects. Hence, we get the page
* into an internal object and then immediately double-page it,
* sending it to the default pager.
*
* consider_zone_gc should be last, because the other operations
* might return memory to zones.
*/
Restart:
mutex_lock(&vm_page_queue_free_lock);
now = (vm_page_free_count < vm_page_free_min);
mutex_unlock(&vm_page_queue_free_lock);
#if THREAD_SWAPPER
swapout_threads(now);
#endif /* THREAD_SWAPPER */
stack_collect();
consider_task_collect();
consider_thread_collect();
cleanup_limbo_queue();
consider_zone_gc();
consider_machine_collect();
loop_detect = vm_page_active_count + vm_page_inactive_count;
if (vm_page_free_count <= vm_page_free_reserved) {
need_more_inactive_pages = TRUE;
} else {
need_more_inactive_pages = FALSE;
}
for (burst_count = 0;;) {
register vm_page_t m;
register vm_object_t object;
unsigned int free_count;
/*
* Recalculate vm_page_inactivate_target.
*/
vm_page_lock_queues();
vm_page_inactive_target =
VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
vm_page_inactive_count);
/*
* Move pages from active to inactive.
*/
while ((vm_page_inactive_count < vm_page_inactive_target ||
need_more_inactive_pages) &&
!queue_empty(&vm_page_queue_active)) {
register vm_object_t object;
vm_pageout_active++;
m = (vm_page_t) queue_first(&vm_page_queue_active);
/*
* If we're getting really low on memory,
* try selecting a page that will go
* directly to the default_pager.
* If there are no such pages, we have to
* page out a page backed by an EMM,
* so that the default_pager can recover
* it eventually.
*/
if (need_more_inactive_pages &&
(IP_VALID(memory_manager_default))) {
vm_pageout_scan_active_emm_throttle++;
do {
assert(m->active && !m->inactive);
object = m->object;
if (vm_object_lock_try(object)) {
if (object->pager_trusted ||
object->internal) {
/* found one ! */
vm_pageout_scan_active_emm_throttle_success++;
goto object_locked_active;
}
vm_object_unlock(object);
}
m = (vm_page_t) queue_next(&m->pageq);
} while (!queue_end(&vm_page_queue_active,
(queue_entry_t) m));
if (queue_end(&vm_page_queue_active,
(queue_entry_t) m)) {
vm_pageout_scan_active_emm_throttle_failure++;
m = (vm_page_t)
queue_first(&vm_page_queue_active);
}
}
assert(m->active && !m->inactive);
object = m->object;
if (!vm_object_lock_try(object)) {
/*
* Move page to end and continue.
*/
queue_remove(&vm_page_queue_active, m,
vm_page_t, pageq);
queue_enter(&vm_page_queue_active, m,
vm_page_t, pageq);
vm_page_unlock_queues();
mutex_pause();
vm_page_lock_queues();
continue;
}
object_locked_active:
/*
* If the page is busy, then we pull it
* off the active queue and leave it alone.
*/
if (m->busy) {
vm_object_unlock(object);
queue_remove(&vm_page_queue_active, m,
vm_page_t, pageq);
m->active = FALSE;
if (!m->fictitious)
vm_page_active_count--;
continue;
}
/*
* Deactivate the page while holding the object
* locked, so we know the page is still not busy.
* This should prevent races between pmap_enter
* and pmap_clear_reference. The page might be
* absent or fictitious, but vm_page_deactivate
* can handle that.
*/
vm_page_deactivate(m);
vm_object_unlock(object);
}
/*
* We are done if we have met our target *and*
* nobody is still waiting for a page.
*/
mutex_lock(&vm_page_queue_free_lock);
free_count = vm_page_free_count;
if ((free_count >= vm_page_free_target) &&
(vm_page_free_wanted == 0)) {
vm_page_unlock_queues();
break;
}
mutex_unlock(&vm_page_queue_free_lock);
/*
* Sometimes we have to pause:
* 1) No inactive pages - nothing to do.
* 2) Flow control - wait for untrusted pagers to catch up.
*/
if (queue_empty(&vm_page_queue_inactive) ||
((--loop_detect) == 0) ||
(burst_count >= vm_pageout_burst_max)) {
unsigned int pages, msecs;
/*
* vm_pageout_burst_wait is msecs/page.
* If there is nothing for us to do, we wait
* at least vm_pageout_empty_wait msecs.
*/
pages = burst_count;
if (loop_detect == 0) {
printf("Warning: No physical memory suitable for pageout or reclaim, pageout thread temporarily going to sleep\n");
msecs = vm_free_page_pause;
}
else {
msecs = burst_count * vm_pageout_burst_wait;
}
if (queue_empty(&vm_page_queue_inactive) &&
(msecs < vm_pageout_empty_wait))
msecs = vm_pageout_empty_wait;
vm_page_unlock_queues();
assert_wait_timeout(msecs, THREAD_INTERRUPTIBLE);
counter(c_vm_pageout_scan_block++);
/*
* Unfortunately, we don't have call_continuation
* so we can't rely on tail-recursion.
*/
thread_block((void (*)(void)) 0);
thread_cancel_timer();
vm_pageout_scan_continue();
goto Restart;
/*NOTREACHED*/
}
vm_pageout_inactive++;
m = (vm_page_t) queue_first(&vm_page_queue_inactive);
if ((vm_page_free_count <= vm_page_free_reserved) &&
(IP_VALID(memory_manager_default))) {
/*
* We're really low on memory. Try to select a page that
* would go directly to the default_pager.
* If there are no such pages, we have to page out a
* page backed by an EMM, so that the default_pager
* can recover it eventually.
*/
vm_pageout_scan_inactive_emm_throttle++;
do {
assert(!m->active && m->inactive);
object = m->object;
if (vm_object_lock_try(object)) {
if (object->pager_trusted ||
object->internal) {
/* found one ! */
vm_pageout_scan_inactive_emm_throttle_success++;
goto object_locked_inactive;
}
vm_object_unlock(object);
}
m = (vm_page_t) queue_next(&m->pageq);
} while (!queue_end(&vm_page_queue_inactive,
(queue_entry_t) m));
if (queue_end(&vm_page_queue_inactive,
(queue_entry_t) m)) {
vm_pageout_scan_inactive_emm_throttle_failure++;
/*
* We should check the "active" queue
* for good candidates to page out.
*/
need_more_inactive_pages = TRUE;
m = (vm_page_t)
queue_first(&vm_page_queue_inactive);
}
}
assert(!m->active && m->inactive);
object = m->object;
/*
* Try to lock object; since we've got the
* page queues lock, we can only try for this one.
*/
if (!vm_object_lock_try(object)) {
/*
* Move page to end and continue.
*/
queue_remove(&vm_page_queue_inactive, m,
vm_page_t, pageq);
queue_enter(&vm_page_queue_inactive, m,
vm_page_t, pageq);
vm_page_unlock_queues();
mutex_pause();
vm_pageout_inactive_nolock++;
continue;
}
object_locked_inactive:
/*
* Paging out pages of objects which pager is being
* created by another thread must be avoided, because
* this thread may claim for memory, thus leading to a
* possible dead lock between it and the pageout thread
* which will wait for pager creation, if such pages are
* finally chosen. The remaining assumption is that there
* will finally be enough available pages in the inactive
* pool to page out in order to satisfy all memory claimed
* by the thread which concurrently creates the pager.
*/
if (!object->pager_initialized && object->pager_created) {
/*
* Move page to end and continue, hoping that
* there will be enough other inactive pages to
* page out so that the thread which currently
* initializes the pager will succeed.
*/
queue_remove(&vm_page_queue_inactive, m,
vm_page_t, pageq);
queue_enter(&vm_page_queue_inactive, m,
vm_page_t, pageq);
vm_page_unlock_queues();
vm_object_unlock(object);
vm_pageout_inactive_avoid++;
continue;
}
/*
* Remove the page from the inactive list.
*/
queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
m->inactive = FALSE;
if (!m->fictitious)
vm_page_inactive_count--;
if (m->busy || !object->alive) {
/*
* Somebody is already playing with this page.
* Leave it off the pageout queues.
*/
vm_page_unlock_queues();
vm_object_unlock(object);
vm_pageout_inactive_busy++;
continue;
}
/*
* If it's absent or in error, we can reclaim the page.
*/
if (m->absent || m->error) {
vm_pageout_inactive_absent++;
reclaim_page:
vm_page_free(m);
vm_page_unlock_queues();
vm_object_unlock(object);
continue;
}
assert(!m->private);
assert(!m->fictitious);
/*
* If already cleaning this page in place, convert from
* "adjacent" to "target". We can leave the page mapped,
* and vm_pageout_object_terminate will determine whether
* to free or reactivate.
*/
if (m->cleaning) {
#if MACH_CLUSTER_STATS
vm_pageout_cluster_conversions++;
#endif
if (m->prep_pin_count == 0) {
m->busy = TRUE;
m->pageout = TRUE;
vm_page_wire(m);
}
vm_object_unlock(object);
vm_page_unlock_queues();
continue;
}
/*
* If it's being used, reactivate.
* (Fictitious pages are either busy or absent.)
*/
if (m->reference || pmap_is_referenced(m->phys_addr)) {
vm_pageout_inactive_used++;
reactivate_page:
#if ADVISORY_PAGEOUT
if (m->discard_request) {
m->discard_request = FALSE;
}
#endif /* ADVISORY_PAGEOUT */
vm_object_unlock(object);
vm_page_activate(m);
VM_STAT(reactivations++);
vm_page_unlock_queues();
continue;
}
if (m->prep_pin_count != 0) {
boolean_t pinned = FALSE;
vm_page_pin_lock();
if (m->pin_count != 0) {
/* skip and reactivate pinned page */
pinned = TRUE;
vm_pageout_inactive_pinned++;
} else {
/* page is prepped; send it into limbo */
m->limbo = TRUE;
vm_pageout_inactive_limbo++;
}
vm_page_pin_unlock();
if (pinned)
goto reactivate_page;
}
#if ADVISORY_PAGEOUT
if (object->advisory_pageout) {
boolean_t do_throttle;
ipc_port_t port;
vm_offset_t discard_offset;
if (m->discard_request) {
vm_stat_discard_failure++;
goto mandatory_pageout;
}
assert(object->pager_initialized);
m->discard_request = TRUE;
port = object->pager;
/* system-wide throttle */
do_throttle = (vm_page_free_count <=
vm_page_free_reserved);
if (!do_throttle) {
/* throttle on this pager */
/* XXX lock ordering ? */
ip_lock(port);
do_throttle= imq_full(&port->ip_messages);
ip_unlock(port);
}
if (do_throttle) {
vm_stat_discard_throttle++;
#if 0
/* ignore this page and skip to next */
vm_page_unlock_queues();
vm_object_unlock(object);
continue;
#else
/* force mandatory pageout */
goto mandatory_pageout;
#endif
}
/* proceed with discard_request */
vm_page_activate(m);
vm_stat_discard++;
VM_STAT(reactivations++);
discard_offset = m->offset + object->paging_offset;
vm_stat_discard_sent++;
vm_page_unlock_queues();
vm_object_unlock(object);
memory_object_discard_request(object->pager,
object->pager_request,
discard_offset,
PAGE_SIZE);
continue;
}
mandatory_pageout:
#endif /* ADVISORY_PAGEOUT */
XPR(XPR_VM_PAGEOUT,
"vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
(integer_t)object, (integer_t)m->offset, (integer_t)m, 0,0);
/*
* Eliminate all mappings.
*/
m->busy = TRUE;
pmap_page_protect(m->phys_addr, VM_PROT_NONE);
if (!m->dirty)
m->dirty = pmap_is_modified(m->phys_addr);
/*
* If it's clean and not precious, we can free the page.
*/
if (!m->dirty && !m->precious) {
vm_pageout_inactive_clean++;
goto reclaim_page;
}
vm_page_unlock_queues();
/*
* If there is no memory object for the page, create
* one and hand it to the default pager.
* [First try to collapse, so we don't create
* one unnecessarily.]
*/
if (!object->pager_initialized)
vm_object_collapse(object);
if (!object->pager_initialized)
vm_object_pager_create(object);
if (!object->pager_initialized) {
/*
* Still no pager for the object.
* Reactivate the page.
*
* Should only happen if there is no
* default pager.
*/
vm_page_lock_queues();
vm_page_activate(m);
vm_page_unlock_queues();
/*
* And we are done with it.
*/
PAGE_WAKEUP_DONE(m);
vm_object_unlock(object);
/*
* break here to get back to the preemption
* point in the outer loop so that we don't
* spin forever if there is no default pager.
*/
vm_pageout_dirty_no_pager++;
/*
* Well there's no pager, but we can still reclaim
* free pages out of the inactive list. Go back
* to top of loop and look for suitable pages.
*/
continue;
}
if (object->pager_initialized && object->pager == IP_NULL) {
/*
* This pager has been destroyed by either
* memory_object_destroy or vm_object_destroy, and
* so there is nowhere for the page to go.
* Just free the page.
*/
VM_PAGE_FREE(m);
vm_object_unlock(object);
continue;
}
vm_pageout_inactive_dirty++;
if (!object->internal)
burst_count++;
vm_object_paging_begin(object);
vm_object_unlock(object);
vm_pageout_cluster(m); /* flush it */
}
}
counter(unsigned int c_vm_pageout_scan_continue = 0;)
void
vm_pageout_scan_continue(void)
{
/*
* We just paused to let the pagers catch up.
* If vm_page_laundry_count is still high,
* then we aren't waiting long enough.
* If we have paused some vm_pageout_pause_max times without
* adjusting vm_pageout_burst_wait, it might be too big,
* so we decrease it.
*/
vm_page_lock_queues();
counter(++c_vm_pageout_scan_continue);
if (vm_page_laundry_count > vm_pageout_burst_min) {
vm_pageout_burst_wait++;
vm_pageout_pause_count = 0;
} else if (++vm_pageout_pause_count > vm_pageout_pause_max) {
vm_pageout_burst_wait = (vm_pageout_burst_wait * 3) / 4;
if (vm_pageout_burst_wait < 1)
vm_pageout_burst_wait = 1;
vm_pageout_pause_count = 0;
}
vm_page_unlock_queues();
}
void vm_page_free_reserve(int pages);
int vm_page_free_count_init;
void
vm_page_free_reserve(
int pages)
{
int free_after_reserve;
vm_page_free_reserved += pages;
free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
vm_page_free_min = vm_page_free_reserved +
VM_PAGE_FREE_MIN(free_after_reserve);
vm_page_free_target = vm_page_free_reserved +
VM_PAGE_FREE_TARGET(free_after_reserve);
if (vm_page_free_target < vm_page_free_min + 5)
vm_page_free_target = vm_page_free_min + 5;
}
/*
* vm_pageout is the high level pageout daemon.
*/
void
vm_pageout(void)
{
thread_t thread;
processor_set_t pset;
kern_return_t ret;
policy_base_t base;
policy_limit_t limit;
policy_fifo_base_data_t fifo_base;
policy_fifo_limit_data_t fifo_limit;
/*
* Set thread privileges.
*/
thread = current_thread();
thread->vm_privilege = TRUE;
stack_privilege(thread);
thread_swappable(current_act(), FALSE);
/*
* Set thread scheduling priority and policy.
*/
pset = thread->processor_set;
base = (policy_base_t) &fifo_base;
limit = (policy_limit_t) &fifo_limit;
fifo_base.base_priority = BASEPRI_SYSTEM;
fifo_limit.max_priority = BASEPRI_SYSTEM;
ret = thread_set_policy(thread->top_act, pset, POLICY_FIFO,
base, POLICY_TIMESHARE_BASE_COUNT,
limit, POLICY_TIMESHARE_LIMIT_COUNT);
if (ret != KERN_SUCCESS)
printf("WARNING: vm_pageout_thread is being TIMESHARED!\n");
/*
* Initialize some paging parameters.
*/
if (vm_page_laundry_max == 0)
vm_page_laundry_max = VM_PAGE_LAUNDRY_MAX;
if (vm_pageout_burst_max == 0)
vm_pageout_burst_max = VM_PAGEOUT_BURST_MAX;
if (vm_pageout_burst_wait == 0)
vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
if (vm_pageout_empty_wait == 0)
vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
vm_page_free_count_init = vm_page_free_count;
/*
* even if we've already called vm_page_free_reserve
* call it again here to insure that the targets are
* accurately calculated (it uses vm_page_free_count_init)
* calling it with an arg of 0 will not change the reserve
* but will re-calculate free_min and free_target
*/
if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED)
vm_page_free_reserve(VM_PAGE_FREE_RESERVED - vm_page_free_reserved);
else
vm_page_free_reserve(0);
/*
* vm_pageout_scan will set vm_page_inactive_target.
*
* The pageout daemon is never done, so loop forever.
* We should call vm_pageout_scan at least once each
* time we are woken, even if vm_page_free_wanted is
* zero, to check vm_page_free_target and
* vm_page_inactive_target.
*/
for (;;) {
vm_pageout_scan();
/* we hold vm_page_queue_free_lock now */
assert(vm_page_free_wanted == 0);
assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
mutex_unlock(&vm_page_queue_free_lock);
counter(c_vm_pageout_block++);
thread_block((void (*)(void)) 0);
}
/*NOTREACHED*/
}
/*
* Routine: vm_fault_list_request
* Purpose:
* Cause the population of a portion of a vm_object.
* Depending on the nature of the request, the pages
* returned may be contain valid data or be uninitialized.
* A page list structure, listing the physical pages
* will be returned upon request.
* This function is called by the file system or any other
* supplier of backing store to a pager.
* IMPORTANT NOTE: The caller must still respect the relationship
* between the vm_object and its backing memory object. The
* caller MUST NOT substitute changes in the backing file
* without first doing a memory_object_lock_request on the
* target range unless it is know that the pages are not
* shared with another entity at the pager level.
* Copy_in_to:
* if a page list structure is present
* return the mapped physical pages, where a
* page is not present, return a non-initialized
* one. If the no_sync bit is turned on, don't
* call the pager unlock to synchronize with other
* possible copies of the page. Leave pages busy
* in the original object, if a page list structure
* was specified. When a commit of the page list
* pages is done, the dirty bit will be set for each one.
* Copy_out_from:
* If a page list structure is present, return
* all mapped pages. Where a page does not exist
* map a zero filled one. Leave pages busy in
* the original object. If a page list structure
* is not specified, this call is a no-op.
*
* Note: access of default pager objects has a rather interesting
* twist. The caller of this routine, presumably the file system
* page cache handling code, will never actually make a request
* against a default pager backed object. Only the default
* pager will make requests on backing store related vm_objects
* In this way the default pager can maintain the relationship
* between backing store files (abstract memory objects) and
* the vm_objects (cache objects), they support.
*
*/
kern_return_t
vm_fault_list_request(
vm_object_t object,
vm_object_offset_t offset,
vm_size_t size,
upl_t *upl_ptr,
upl_page_info_t **user_page_list_ptr,
int page_list_count,
int cntrl_flags)
{
vm_page_t dst_page;
vm_object_offset_t dst_offset = offset;
upl_page_info_t *user_page_list;
vm_size_t xfer_size = size;
boolean_t do_m_lock = FALSE;
boolean_t dirty;
upl_t upl = NULL;
int entry;
if(cntrl_flags & UPL_SET_INTERNAL)
page_list_count = 20;
if(user_page_list_ptr && (page_list_count < (size/page_size)))
return KERN_INVALID_ARGUMENT;
if((!object->internal) && (object->paging_offset != 0))
panic("vm_fault_list_request: vnode object with non-zero paging offset\n");
if((cntrl_flags & UPL_COPYOUT_FROM) && (upl_ptr == NULL)) {
return KERN_SUCCESS;
}
if(upl_ptr) {
if(cntrl_flags & UPL_SET_INTERNAL) {
upl = upl_create(TRUE);
user_page_list = (upl_page_info_t *)
(((vm_offset_t)upl) + sizeof(struct upl));
if(user_page_list_ptr)
*user_page_list_ptr = user_page_list;
else
user_page_list = NULL;
upl->flags |= UPL_INTERNAL;
} else {
upl = upl_create(FALSE);
if(user_page_list_ptr)
user_page_list = *user_page_list_ptr;
else
user_page_list = NULL;
}
upl->map_object = vm_object_allocate(size);
vm_object_lock(upl->map_object);
upl->map_object->shadow = object;
upl->size = size;
upl->offset = offset + object->paging_offset;
upl->map_object->pageout = TRUE;
upl->map_object->can_persist = FALSE;
upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
upl->map_object->shadow_offset = offset;
vm_object_unlock(upl->map_object);
*upl_ptr = upl;
}
vm_object_lock(object);
vm_object_paging_begin(object);
entry = 0;
if(cntrl_flags & UPL_COPYOUT_FROM) {
while (xfer_size) {
vm_page_t alias_page;
if(((dst_page = vm_page_lookup(object,
dst_offset)) == VM_PAGE_NULL) ||
dst_page->fictitious ||
dst_page->absent ||
dst_page->error ||
(dst_page->wire_count != 0 &&
!dst_page->pageout) ||
((!(dst_page->dirty || dst_page->precious)) &&
(cntrl_flags & UPL_RET_ONLY_DIRTY))) {
if(user_page_list)
user_page_list[entry].phys_addr = 0;
} else {
if(dst_page->busy &&
(!(dst_page->list_req_pending &&
dst_page->pageout))) {
if(cntrl_flags & UPL_NOBLOCK) {
if(user_page_list)
user_page_list[entry]
.phys_addr = 0;
entry++;
dst_offset += PAGE_SIZE;
xfer_size -= PAGE_SIZE;
continue;
}
/*someone else is playing with the */
/* page. We will have to wait. */
PAGE_ASSERT_WAIT(
dst_page, THREAD_UNINT);
vm_object_unlock(object);
thread_block((void(*)(void))0);
vm_object_lock(object);
continue;
}
/* Someone else already cleaning the page? */
if((dst_page->cleaning || dst_page->absent ||
dst_page->prep_pin_count != 0 ||
dst_page->wire_count != 0) &&
!dst_page->list_req_pending) {
if(user_page_list)
user_page_list[entry].phys_addr = 0;
entry++;
dst_offset += PAGE_SIZE;
xfer_size -= PAGE_SIZE;
continue;
}
VM_PAGE_GRAB_FICTITIOUS(alias_page);
/* eliminate all mappings from the */
/* original object and its prodigy */
vm_page_lock_queues();
pmap_page_protect(dst_page->phys_addr,
VM_PROT_NONE);
/* Turn off busy indication on pending */
/* pageout. Note: we can only get here */
/* in the request pending case. */
dst_page->list_req_pending = FALSE;
dst_page->busy = FALSE;
dst_page->cleaning = FALSE;
dirty = pmap_is_modified(dst_page->phys_addr);
dirty = dirty ? TRUE : dst_page->dirty;
/* use pageclean setup, it is more convenient */
/* even for the pageout cases here */
vm_pageclean_setup(dst_page, alias_page,
upl->map_object, size - xfer_size);
if(!dirty) {
dst_page->dirty = FALSE;
dst_page->precious = TRUE;
}
if(dst_page->pageout)
dst_page->busy = TRUE;
alias_page->absent = FALSE;
if(!(cntrl_flags & UPL_CLEAN_IN_PLACE)) {
/* deny access to the target page */
/* while it is being worked on */
if((!dst_page->pageout) &&
(dst_page->wire_count == 0)) {
dst_page->busy = TRUE;
dst_page->pageout = TRUE;
vm_page_wire(dst_page);
}
}
if(user_page_list) {
user_page_list[entry].phys_addr
= dst_page->phys_addr;
user_page_list[entry].dirty =
dst_page->dirty;
user_page_list[entry].pageout =
dst_page->pageout;
user_page_list[entry].absent =
dst_page->absent;
}
vm_page_unlock_queues();
}
entry++;
dst_offset += PAGE_SIZE;
xfer_size -= PAGE_SIZE;
}
} else {
while (xfer_size) {
dst_page = vm_page_lookup(object, dst_offset);
if(dst_page != VM_PAGE_NULL) {
if((dst_page->cleaning) &&
!(dst_page->list_req_pending)) {
/*someone else is writing to the */
/* page. We will have to wait. */
PAGE_ASSERT_WAIT(dst_page, THREAD_UNINT);
vm_object_unlock(object);
thread_block((void(*)(void))0);
vm_object_lock(object);
continue;
}
if ((dst_page->fictitious &&
dst_page->list_req_pending)) {
/* dump the fictitious page */
dst_page->list_req_pending = FALSE;
dst_page->clustered = FALSE;
vm_page_lock_queues();
vm_page_free(dst_page);
vm_page_unlock_queues();
} else if ((dst_page->absent &&
dst_page->list_req_pending)) {
/* the default_pager case */
dst_page->list_req_pending = FALSE;
dst_page->busy = FALSE;
dst_page->clustered = FALSE;
}
}
if((dst_page = vm_page_lookup(
object, dst_offset)) == VM_PAGE_NULL) {
/* need to allocate a page */
dst_page = vm_page_alloc(object, dst_offset);
if (dst_page == VM_PAGE_NULL) {
vm_object_unlock(object);
VM_PAGE_WAIT();
vm_object_lock(object);
continue;
}
dst_page->busy = FALSE;
if(cntrl_flags & UPL_NO_SYNC) {
dst_page->page_lock = 0;
dst_page->unlock_request = 0;
}
dst_page->absent = TRUE;
object->absent_count++;
}
dst_page->overwriting = TRUE;
if(dst_page->fictitious) {
panic("need corner case for fictitious page");
}
if(dst_page->page_lock) {
do_m_lock = TRUE;
}
if(upl_ptr) {
vm_page_t alias_page;
VM_PAGE_GRAB_FICTITIOUS(alias_page);
/* eliminate all mappings from the */
/* original object and its prodigy */
if(dst_page->busy) {
/*someone else is playing with the */
/* page. We will have to wait. */
PAGE_ASSERT_WAIT(
dst_page, THREAD_UNINT);
vm_object_unlock(object);
thread_block((void(*)(void))0);
vm_object_lock(object);
continue;
}
vm_page_lock_queues();
pmap_page_protect(dst_page->phys_addr,
VM_PROT_NONE);
dirty = pmap_is_modified(dst_page->phys_addr);
dirty = dirty ? TRUE : dst_page->dirty;
vm_pageclean_setup(dst_page, alias_page,
upl->map_object, size - xfer_size);
if(cntrl_flags & UPL_CLEAN_IN_PLACE) {
/* clean in place for read implies */
/* that a write will be done on all */
/* the pages that are dirty before */
/* a upl commit is done. The caller */
/* is obligated to preserve the */
/* contents of all pages marked */
/* dirty. */
upl->flags |= UPL_CLEAR_DIRTY;
}
if(!dirty) {
dst_page->dirty = FALSE;
dst_page->precious = TRUE;
}
if (dst_page->wire_count == 0) {
/* deny access to the target page while */
/* it is being worked on */
dst_page->busy = TRUE;
} else {
vm_page_wire(dst_page);
}
/* expect the page to be used */
dst_page->reference = TRUE;
dst_page->precious =
(cntrl_flags & UPL_PRECIOUS)
? TRUE : FALSE;
alias_page->absent = FALSE;
if(user_page_list) {
user_page_list[entry].phys_addr
= dst_page->phys_addr;
user_page_list[entry].dirty =
dst_page->dirty;
user_page_list[entry].pageout =
dst_page->pageout;
user_page_list[entry].absent =
dst_page->absent;
}
vm_page_unlock_queues();
}
entry++;
dst_offset += PAGE_SIZE;
xfer_size -= PAGE_SIZE;
}
}
if(do_m_lock) {
vm_prot_t access_required;
/* call back all associated pages from other users of the pager */
/* all future updates will be on data which is based on the */
/* changes we are going to make here. Note: it is assumed that */
/* we already hold copies of the data so we will not be seeing */
/* an avalanche of incoming data from the pager */
access_required = (cntrl_flags & UPL_COPYOUT_FROM)
? VM_PROT_READ : VM_PROT_WRITE;
while (TRUE) {
kern_return_t rc;
thread_t thread;
if(!object->pager_ready) {
thread = current_thread();
vm_object_assert_wait(object,
VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT);
vm_object_unlock(object);
thread_block((void (*)(void))0);
if (thread->wait_result != THREAD_AWAKENED) {
return(KERN_FAILURE);
}
vm_object_lock(object);
continue;
}
vm_object_unlock(object);
if (rc = memory_object_data_unlock(
object->pager,
object->pager_request,
dst_offset + object->paging_offset,
size,
access_required)) {
if (rc == MACH_SEND_INTERRUPTED)
continue;
else
return KERN_FAILURE;
}
break;
}
/* lets wait on the last page requested */
/* NOTE: we will have to update lock completed routine to signal */
if(dst_page != VM_PAGE_NULL &&
(access_required & dst_page->page_lock) != access_required) {
PAGE_ASSERT_WAIT(dst_page, THREAD_UNINT);
thread_block((void (*)(void))0);
vm_object_lock(object);
}
}
vm_object_unlock(object);
return KERN_SUCCESS;
}
kern_return_t
upl_system_list_request(
vm_object_t object,
vm_object_offset_t offset,
vm_size_t size,
vm_size_t super_cluster,
upl_t *upl,
upl_page_info_t **user_page_list_ptr,
int page_list_count,
int cntrl_flags)
{
if(object->paging_offset > offset)
return KERN_FAILURE;
offset = offset - object->paging_offset;
/* turns off super cluster exercised by the default_pager */
/*
super_cluster = size;
*/
if ((super_cluster > size) &&
(vm_page_free_count > vm_page_free_reserved)) {
vm_offset_t base_offset;
vm_size_t super_size;
base_offset = (vm_offset_t)(offset &
~((vm_offset_t) super_cluster - 1));
super_size = (offset+size) > (base_offset + super_cluster) ?
super_cluster<<1 : super_cluster;
super_size = ((base_offset + super_size) > object->size) ?
(object->size - base_offset) : super_size;
if(offset > (base_offset + super_size))
panic("upl_system_list_request: Missed target pageout 0x%x,0x%x, 0x%x, 0x%x, 0x%x, 0x%x\n", offset, base_offset, super_size, super_cluster, size, object->paging_offset);
/* apparently there is a case where the vm requests a */
/* page to be written out who's offset is beyond the */
/* object size */
if((offset + size) > (base_offset + super_size))
super_size = (offset + size) - base_offset;
offset = base_offset;
size = super_size;
}
vm_fault_list_request(object, offset, size, upl, user_page_list_ptr,
page_list_count, cntrl_flags);
}
kern_return_t
upl_map(
vm_map_t map,
upl_t upl,
vm_offset_t *dst_addr)
{
vm_size_t size;
vm_offset_t offset;
vm_offset_t addr;
vm_page_t m;
kern_return_t kr;
/* check to see if already mapped */
if(UPL_PAGE_LIST_MAPPED & upl->flags)
return KERN_FAILURE;
offset = 0; /* Always map the entire object */
size = upl->size;
vm_object_lock(upl->map_object);
upl->map_object->ref_count++;
vm_object_res_reference(upl->map_object);
vm_object_unlock(upl->map_object);
*dst_addr = 0;
/* NEED A UPL_MAP ALIAS */
kr = vm_map_enter(map, dst_addr, size, (vm_offset_t) 0, TRUE,
upl->map_object, offset, FALSE,
VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
if (kr != KERN_SUCCESS)
return(kr);
for(addr=*dst_addr; size > 0; size-=PAGE_SIZE,addr+=PAGE_SIZE) {
m = vm_page_lookup(upl->map_object, offset);
if(m) {
PMAP_ENTER(map->pmap, addr, m, VM_PROT_ALL, TRUE);
}
offset+=PAGE_SIZE;
}
upl->flags |= UPL_PAGE_LIST_MAPPED;
upl->kaddr = *dst_addr;
return KERN_SUCCESS;
}
kern_return_t
upl_un_map(
vm_map_t map,
upl_t upl)
{
vm_size_t size;
if(upl->flags & UPL_PAGE_LIST_MAPPED) {
size = upl->size;
vm_deallocate(map, upl->kaddr, size);
upl->flags &= ~UPL_PAGE_LIST_MAPPED;
upl->kaddr = (vm_offset_t) 0;
return KERN_SUCCESS;
} else {
return KERN_FAILURE;
}
}
kern_return_t
upl_commit_range(
upl_t upl,
vm_offset_t offset,
vm_size_t size,
boolean_t free_on_empty,
upl_page_info_t *page_list)
{
vm_offset_t xfer_size = size;
vm_object_t shadow_object = upl->map_object->shadow;
vm_object_t object = upl->map_object;
vm_offset_t target_offset;
vm_offset_t page_offset;
int entry;
if ((offset + size) > upl->size)
return KERN_FAILURE;
vm_object_lock(shadow_object);
entry = offset/PAGE_SIZE;
target_offset = offset;
while(xfer_size) {
vm_page_t t,m;
upl_page_info_t *p;
if((t = vm_page_lookup(object,
(vm_offset_t)target_offset)) != NULL) {
t->pageout = FALSE;
page_offset = (vm_offset_t)t->offset;
VM_PAGE_FREE(t);
t = VM_PAGE_NULL;
m = vm_page_lookup(shadow_object,
page_offset + object->shadow_offset);
if(m != VM_PAGE_NULL) {
vm_object_paging_end(shadow_object);
vm_page_lock_queues();
if (upl->flags & UPL_CLEAR_DIRTY) {
pmap_clear_modify(m->phys_addr);
m->dirty = FALSE;
}
if(page_list) {
p = &(page_list[entry]);
if(p->phys_addr && p->pageout && !m->pageout) {
m->busy = TRUE;
m->pageout = TRUE;
vm_page_wire(m);
} else if (page_list[entry].phys_addr &&
!p->pageout && m->pageout) {
m->pageout = FALSE;
vm_page_unwire(m);
PAGE_WAKEUP_DONE(m);
}
page_list[entry].phys_addr = 0;
}
if(m->laundry) {
vm_page_laundry_count--;
m->laundry = FALSE;
if (vm_page_laundry_count < vm_page_laundry_min) {
vm_page_laundry_min = 0;
thread_wakeup((event_t)
&vm_page_laundry_count);
}
}
if(m->pageout) {
m->cleaning = FALSE;
m->pageout = FALSE;
#if MACH_CLUSTER_STATS
if (m->wanted) vm_pageout_target_collisions++;
#endif
pmap_page_protect(m->phys_addr, VM_PROT_NONE);
m->dirty = pmap_is_modified(m->phys_addr);
if(m->dirty) {
CLUSTER_STAT(
vm_pageout_target_page_dirtied++;)
vm_page_unwire(m);/* reactivates */
VM_STAT(reactivations++);
PAGE_WAKEUP_DONE(m);
} else if (m->prep_pin_count != 0) {
vm_page_pin_lock();
if (m->pin_count != 0) {
/* page is pinned; reactivate */
CLUSTER_STAT(
vm_pageout_target_page_pinned++;)
vm_page_unwire(m);/* reactivates */
VM_STAT(reactivations++);
PAGE_WAKEUP_DONE(m);
} else {
/*
* page is prepped but not pinned;
* send it into limbo. Note that
* vm_page_free (which will be
* called after releasing the pin
* lock) knows how to handle a page
* with limbo set.
*/
m->limbo = TRUE;
CLUSTER_STAT(
vm_pageout_target_page_limbo++;)
}
vm_page_pin_unlock();
if (m->limbo)
vm_page_free(m);
} else {
CLUSTER_STAT(
vm_pageout_target_page_freed++;)
vm_page_free(m);/* clears busy, etc. */
}
vm_page_unlock_queues();
target_offset += PAGE_SIZE;
xfer_size -= PAGE_SIZE;
entry++;
continue;
}
if (!m->active && !m->inactive) {
if (m->reference || m->prep_pin_count != 0)
vm_page_activate(m);
else
vm_page_deactivate(m);
}
#if MACH_CLUSTER_STATS
m->dirty = pmap_is_modified(m->phys_addr);
if (m->dirty) vm_pageout_cluster_dirtied++;
else vm_pageout_cluster_cleaned++;
if (m->wanted) vm_pageout_cluster_collisions++;
#else
m->dirty = 0;
#endif
if((m->busy) && (m->cleaning)) {
/* the request_page_list case */
if(m->absent) {
m->absent = FALSE;
if(shadow_object->absent_count == 1)
vm_object_absent_release(shadow_object);
else
shadow_object->absent_count--;
}
m->overwriting = FALSE;
m->busy = FALSE;
m->dirty = FALSE;
}
else if (m->overwriting) {
/* alternate request page list, write to
/* page_list case. Occurs when the original
/* page was wired at the time of the list
/* request */
assert(m->wire_count != 0);
vm_page_unwire(m);/* reactivates */
m->overwriting = FALSE;
}
m->cleaning = FALSE;
/*
* Wakeup any thread waiting for the page to be un-cleaning.
*/
PAGE_WAKEUP(m);
vm_page_unlock_queues();
}
}
target_offset += PAGE_SIZE;
xfer_size -= PAGE_SIZE;
entry++;
}
vm_object_unlock(shadow_object);
if(free_on_empty) {
if(queue_empty(&upl->map_object->memq)) {
upl_destroy(upl);
}
}
return KERN_SUCCESS;
}
upl_abort_range(
upl_t upl,
vm_offset_t offset,
vm_size_t size,
int error)
{
vm_offset_t xfer_size = size;
vm_object_t shadow_object = upl->map_object->shadow;
vm_object_t object = upl->map_object;
vm_offset_t target_offset;
vm_offset_t page_offset;
int entry;
if ((offset + size) > upl->size)
return KERN_FAILURE;
vm_object_lock(shadow_object);
entry = offset/PAGE_SIZE;
target_offset = offset;
while(xfer_size) {
vm_page_t t,m;
upl_page_info_t *p;
if((t = vm_page_lookup(object,
(vm_offset_t)target_offset)) != NULL) {
t->pageout = FALSE;
page_offset = (vm_offset_t)t->offset;
VM_PAGE_FREE(t);
t = VM_PAGE_NULL;
m = vm_page_lookup(shadow_object,
page_offset + object->shadow_offset);
if(m != VM_PAGE_NULL) {
vm_object_paging_end(m->object);
vm_page_lock_queues();
if(m->absent) {
/* COPYOUT = FALSE case */
/* check for error conditions which must */
/* be passed back to the pages customer */
if(error & UPL_ABORT_RESTART) {
m->restart = TRUE;
m->absent = FALSE;
vm_object_absent_release(m->object);
m->page_error = KERN_MEMORY_ERROR;
m->error = TRUE;
} else if(error & UPL_ABORT_UNAVAILABLE) {
m->restart = FALSE;
m->unusual = TRUE;
m->clustered = FALSE;
} else if(error & UPL_ABORT_ERROR) {
m->restart = FALSE;
m->absent = FALSE;
vm_object_absent_release(m->object);
m->page_error = KERN_MEMORY_ERROR;
m->error = TRUE;
} else {
m->clustered = TRUE;
}
m->cleaning = FALSE;
m->overwriting = FALSE;
PAGE_WAKEUP_DONE(m);
if(m->clustered) {
vm_page_free(m);
} else {
vm_page_activate(m);
}
vm_page_unlock_queues();
target_offset += PAGE_SIZE;
xfer_size -= PAGE_SIZE;
entry++;
continue;
}
/*
* Handle the trusted pager throttle.
*/
if (m->laundry) {
vm_page_laundry_count--;
m->laundry = FALSE;
if (vm_page_laundry_count
< vm_page_laundry_min) {
vm_page_laundry_min = 0;
thread_wakeup((event_t)
&vm_page_laundry_count);
}
}
if(m->pageout) {
assert(m->busy);
assert(m->wire_count == 1);
m->pageout = FALSE;
vm_page_unwire(m);
}
m->dirty = TRUE;
m->cleaning = FALSE;
m->busy = FALSE;
m->overwriting = FALSE;
PAGE_WAKEUP(m);
vm_page_unlock_queues();
}
}
target_offset += PAGE_SIZE;
xfer_size -= PAGE_SIZE;
entry++;
}
vm_object_unlock(shadow_object);
if(error & UPL_ABORT_FREE_ON_EMPTY) {
if(queue_empty(&upl->map_object->memq)) {
upl_destroy(upl);
}
}
return KERN_SUCCESS;
}
kern_return_t
upl_abort(
upl_t upl,
int error)
{
vm_object_t object = NULL;
vm_object_t shadow_object = NULL;
vm_offset_t offset;
vm_object_offset_t shadow_offset;
vm_object_offset_t target_offset;
int i;
vm_page_t t,m;
object = upl->map_object;
if(object == NULL) {
panic("upl_abort: upl object is not backed by an object");
return KERN_INVALID_ARGUMENT;
}
shadow_object = upl->map_object->shadow;
shadow_offset = upl->map_object->shadow_offset;
offset = 0;
vm_object_lock(shadow_object);
for(i = 0; i<(upl->size); i+=PAGE_SIZE) {
if((t = vm_page_lookup(object,
(vm_offset_t)offset)) != NULL) {
target_offset = t->offset + shadow_offset;
if((m = vm_page_lookup(shadow_object, target_offset)) != NULL) {
vm_object_paging_end(m->object);
vm_page_lock_queues();
if(m->absent) {
/* COPYOUT = FALSE case */
/* check for error conditions which must */
/* be passed back to the pages customer */
if(error & UPL_ABORT_RESTART) {
m->restart = TRUE;
m->absent = FALSE;
vm_object_absent_release(m->object);
m->page_error = KERN_MEMORY_ERROR;
m->error = TRUE;
} else if(error & UPL_ABORT_UNAVAILABLE) {
m->restart = FALSE;
m->unusual = TRUE;
m->clustered = FALSE;
} else if(error & UPL_ABORT_ERROR) {
m->restart = FALSE;
m->absent = FALSE;
vm_object_absent_release(m->object);
m->page_error = KERN_MEMORY_ERROR;
m->error = TRUE;
} else {
m->clustered = TRUE;
}
m->cleaning = FALSE;
m->overwriting = FALSE;
PAGE_WAKEUP_DONE(m);
if(m->clustered) {
vm_page_free(m);
} else {
vm_page_activate(m);
}
vm_page_unlock_queues();
continue;
}
/*
* Handle the trusted pager throttle.
*/
if (m->laundry) {
vm_page_laundry_count--;
m->laundry = FALSE;
if (vm_page_laundry_count
< vm_page_laundry_min) {
vm_page_laundry_min = 0;
thread_wakeup((event_t)
&vm_page_laundry_count);
}
}
if(m->pageout) {
assert(m->busy);
assert(m->wire_count == 1);
m->pageout = FALSE;
vm_page_unwire(m);
}
m->dirty = TRUE;
m->cleaning = FALSE;
m->busy = FALSE;
m->overwriting = FALSE;
PAGE_WAKEUP(m);
vm_page_unlock_queues();
}
}
offset += PAGE_SIZE;
}
vm_object_unlock(shadow_object);
/* Remove all the pages from the map object so */
/* vm_pageout_object_terminate will work properly. */
while (!queue_empty(&upl->map_object->memq)) {
vm_page_t p;
p = (vm_page_t) queue_first(&upl->map_object->memq);
assert(p->private);
assert(p->pageout);
p->pageout = FALSE;
assert(!p->cleaning);
VM_PAGE_FREE(p);
}
upl_destroy(upl);
return KERN_SUCCESS;
}
/* an option on commit should be wire */
kern_return_t
upl_commit(
upl_t upl,
upl_page_info_t *page_list)
{
if (upl->flags & UPL_CLEAR_DIRTY) {
vm_object_t shadow_object = upl->map_object->shadow;
vm_object_t object = upl->map_object;
vm_object_offset_t target_offset;
vm_size_t xfer_end;
vm_page_t t,m;
vm_object_lock(shadow_object);
target_offset = object->shadow_offset;
xfer_end = upl->size + object->shadow_offset;
while(target_offset <= xfer_end) {
if ((t = vm_page_lookup(object,
target_offset - object->shadow_offset))
!= NULL) {
m = vm_page_lookup(
shadow_object, target_offset);
if(m != VM_PAGE_NULL) {
pmap_clear_modify(m->phys_addr);
m->dirty = FALSE;
}
}
target_offset += PAGE_SIZE;
}
vm_object_unlock(shadow_object);
}
if (page_list) {
vm_object_t shadow_object = upl->map_object->shadow;
vm_object_t object = upl->map_object;
vm_object_offset_t target_offset;
vm_size_t xfer_end;
int entry;
vm_page_t m;
upl_page_info_t *p;
vm_object_lock(shadow_object);
entry = 0;
target_offset = object->shadow_offset;
xfer_end = upl->size + object->shadow_offset;
while(target_offset <= xfer_end) {
m = vm_page_lookup(shadow_object, target_offset);
if(m != VM_PAGE_NULL) {
p = &(page_list[entry]);
if(page_list[entry].phys_addr &&
p->pageout && !m->pageout) {
vm_page_lock_queues();
m->busy = TRUE;
m->pageout = TRUE;
vm_page_wire(m);
vm_page_unlock_queues();
} else if (page_list[entry].phys_addr &&
!p->pageout && m->pageout) {
vm_page_lock_queues();
m->pageout = FALSE;
vm_page_unwire(m);
PAGE_WAKEUP_DONE(m);
vm_page_unlock_queues();
}
page_list[entry].phys_addr = 0;
}
target_offset += PAGE_SIZE;
entry++;
}
vm_object_unlock(shadow_object);
}
upl_destroy(upl);
return KERN_SUCCESS;
}
upl_t
upl_create(
boolean_t internal)
{
upl_t upl;
if(internal) {
upl = (upl_t)kalloc(sizeof(struct upl)
+ (sizeof(struct upl_page_info)*20));
} else {
upl = (upl_t)kalloc(sizeof(struct upl));
}
upl->flags = 0;
upl->src_object = NULL;
upl->kaddr = (vm_offset_t)0;
upl->size = 0;
upl->map_object = NULL;
upl->ref_count = 1;
upl_lock_init(upl);
return(upl);
}
void
upl_destroy(
upl_t upl)
{
vm_object_deallocate(upl->map_object);
if(upl->flags & UPL_INTERNAL) {
kfree((vm_offset_t)upl,
sizeof(struct upl) + (sizeof(struct upl_page_info)*20));
} else {
kfree((vm_offset_t)upl, sizeof(struct upl));
}
}
vm_size_t
upl_get_internal_pagelist_offset()
{
return sizeof(struct upl);
}
void
upl_set_dirty(
upl_t upl)
{
upl->flags |= UPL_CLEAR_DIRTY;
}
void
upl_clear_dirty(
upl_t upl)
{
upl->flags &= ~UPL_CLEAR_DIRTY;
}
#if MACH_KDB
#include <ddb/db_output.h>
#include <ddb/db_print.h>
#include <vm/vm_print.h>
#define printf kdbprintf
extern int db_indent;
void db_pageout(void);
void
db_vm(void)
{
extern int vm_page_gobble_count;
extern int vm_page_limbo_count, vm_page_limbo_real_count;
extern int vm_page_pin_count;
iprintf("VM Statistics:\n");
db_indent += 2;
iprintf("pages:\n");
db_indent += 2;
iprintf("activ %5d inact %5d free %5d",
vm_page_active_count, vm_page_inactive_count,
vm_page_free_count);
printf(" wire %5d gobbl %5d\n",
vm_page_wire_count, vm_page_gobble_count);
iprintf("laund %5d limbo %5d lim_r %5d pin %5d\n",
vm_page_laundry_count, vm_page_limbo_count,
vm_page_limbo_real_count, vm_page_pin_count);
db_indent -= 2;
iprintf("target:\n");
db_indent += 2;
iprintf("min %5d inact %5d free %5d",
vm_page_free_min, vm_page_inactive_target,
vm_page_free_target);
printf(" resrv %5d\n", vm_page_free_reserved);
db_indent -= 2;
iprintf("burst:\n");
db_indent += 2;
iprintf("max %5d min %5d wait %5d empty %5d\n",
vm_pageout_burst_max, vm_pageout_burst_min,
vm_pageout_burst_wait, vm_pageout_empty_wait);
db_indent -= 2;
iprintf("pause:\n");
db_indent += 2;
iprintf("count %5d max %5d\n",
vm_pageout_pause_count, vm_pageout_pause_max);
#if MACH_COUNTERS
iprintf("scan_continue called %8d\n", c_vm_pageout_scan_continue);
#endif /* MACH_COUNTERS */
db_indent -= 2;
db_pageout();
db_indent -= 2;
}
void
db_pageout(void)
{
extern int c_limbo_page_free;
extern int c_limbo_convert;
#if MACH_COUNTERS
extern int c_laundry_pages_freed;
#endif /* MACH_COUNTERS */
iprintf("Pageout Statistics:\n");
db_indent += 2;
iprintf("active %5d inactv %5d\n",
vm_pageout_active, vm_pageout_inactive);
iprintf("nolock %5d avoid %5d busy %5d absent %5d\n",
vm_pageout_inactive_nolock, vm_pageout_inactive_avoid,
vm_pageout_inactive_busy, vm_pageout_inactive_absent);
iprintf("used %5d clean %5d dirty %5d\n",
vm_pageout_inactive_used, vm_pageout_inactive_clean,
vm_pageout_inactive_dirty);
iprintf("pinned %5d limbo %5d setup_limbo %5d setup_unprep %5d\n",
vm_pageout_inactive_pinned, vm_pageout_inactive_limbo,
vm_pageout_setup_limbo, vm_pageout_setup_unprepped);
iprintf("limbo_page_free %5d limbo_convert %5d\n",
c_limbo_page_free, c_limbo_convert);
#if MACH_COUNTERS
iprintf("laundry_pages_freed %d\n", c_laundry_pages_freed);
#endif /* MACH_COUNTERS */
#if MACH_CLUSTER_STATS
iprintf("Cluster Statistics:\n");
db_indent += 2;
iprintf("dirtied %5d cleaned %5d collisions %5d\n",
vm_pageout_cluster_dirtied, vm_pageout_cluster_cleaned,
vm_pageout_cluster_collisions);
iprintf("clusters %5d conversions %5d\n",
vm_pageout_cluster_clusters, vm_pageout_cluster_conversions);
db_indent -= 2;
iprintf("Target Statistics:\n");
db_indent += 2;
iprintf("collisions %5d page_dirtied %5d page_freed %5d\n",
vm_pageout_target_collisions, vm_pageout_target_page_dirtied,
vm_pageout_target_page_freed);
iprintf("page_pinned %5d page_limbo %5d\n",
vm_pageout_target_page_pinned, vm_pageout_target_page_limbo);
db_indent -= 2;
#endif /* MACH_CLUSTER_STATS */
db_indent -= 2;
}
#if MACH_CLUSTER_STATS
unsigned long vm_pageout_cluster_dirtied = 0;
unsigned long vm_pageout_cluster_cleaned = 0;
unsigned long vm_pageout_cluster_collisions = 0;
unsigned long vm_pageout_cluster_clusters = 0;
unsigned long vm_pageout_cluster_conversions = 0;
unsigned long vm_pageout_target_collisions = 0;
unsigned long vm_pageout_target_page_dirtied = 0;
unsigned long vm_pageout_target_page_freed = 0;
unsigned long vm_pageout_target_page_pinned = 0;
unsigned long vm_pageout_target_page_limbo = 0;
#define CLUSTER_STAT(clause) clause
#else /* MACH_CLUSTER_STATS */
#define CLUSTER_STAT(clause)
#endif /* MACH_CLUSTER_STATS */
#endif /* MACH_KDB */