Source to osfmk/vm/vm_map.c
/*
* Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* The contents of this file constitute Original Code as defined in and
* are subject to the Apple Public Source License Version 1.1 (the
* "License"). You may not use this file except in compliance with the
* License. Please obtain a copy of the License at
* http://www.apple.com/publicsource and read it before using this file.
*
* This Original Code and all software distributed under the License are
* distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
* License for the specific language governing rights and limitations
* under the License.
*
* @APPLE_LICENSE_HEADER_END@
*/
/*
* @OSF_COPYRIGHT@
*/
/*
* Mach Operating System
* Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
* All Rights Reserved.
*
* Permission to use, copy, modify and distribute this software and its
* documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or [email protected]
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie Mellon
* the rights to redistribute these changes.
*/
/*
*/
/*
* File: vm/vm_map.c
* Author: Avadis Tevanian, Jr., Michael Wayne Young
* Date: 1985
*
* Virtual memory mapping module.
*/
#include <cpus.h>
#include <task_swapper.h>
#include <mach_assert.h>
#include <dipc.h>
#include <mach/kern_return.h>
#include <mach/port.h>
#include <mach/vm_attributes.h>
#include <mach/vm_param.h>
#include <mach/vm_behavior.h>
#include <kern/assert.h>
#include <kern/counters.h>
#include <kern/zalloc.h>
#include <vm/vm_init.h>
#include <vm/vm_fault.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_kern.h>
#include <ipc/ipc_port.h>
#include <kern/sched_prim.h>
#include <kern/misc_protos.h>
#include <mach/vm_task_server.h>
#include <mach/mach_host_server.h>
#include <ddb/tr.h>
#include <kern/xpr.h>
/* Internal prototypes
*/
extern boolean_t vm_map_range_check(
vm_map_t map,
vm_offset_t start,
vm_offset_t end,
vm_map_entry_t *entry);
extern vm_map_entry_t _vm_map_entry_create(
struct vm_map_header *map_header);
extern void _vm_map_entry_dispose(
struct vm_map_header *map_header,
vm_map_entry_t entry);
extern void vm_map_pmap_enter(
vm_map_t map,
vm_offset_t addr,
vm_offset_t end_addr,
vm_object_t object,
vm_offset_t offset,
vm_prot_t protection);
extern void _vm_map_clip_end(
struct vm_map_header *map_header,
vm_map_entry_t entry,
vm_offset_t end);
extern void vm_map_entry_delete(
vm_map_t map,
vm_map_entry_t entry);
extern kern_return_t vm_map_delete(
vm_map_t map,
vm_offset_t start,
vm_offset_t end,
int flags);
extern void vm_map_copy_steal_pages(
vm_map_copy_t copy);
extern kern_return_t vm_map_copy_overwrite_unaligned(
vm_map_t dst_map,
vm_map_entry_t entry,
vm_map_copy_t copy,
vm_offset_t start);
extern kern_return_t vm_map_copy_overwrite_aligned(
vm_map_t dst_map,
vm_map_entry_t tmp_entry,
vm_map_copy_t copy,
vm_offset_t start);
extern kern_return_t vm_map_copyout_kernel_buffer(
vm_map_t map,
vm_offset_t *addr, /* IN/OUT */
vm_map_copy_t copy,
boolean_t overwrite);
extern kern_return_t vm_map_copyin_page_list_cont(
vm_map_copyin_args_t cont_args,
vm_map_copy_t *copy_result); /* OUT */
extern void vm_map_fork_share(
vm_map_t old_map,
vm_map_entry_t old_entry,
vm_map_t new_map);
extern boolean_t vm_map_fork_copy(
vm_map_t old_map,
vm_map_entry_t *old_entry_p,
vm_map_t new_map);
extern kern_return_t vm_remap_range_allocate(
vm_map_t map,
vm_offset_t *address, /* IN/OUT */
vm_size_t size,
vm_offset_t mask,
boolean_t anywhere,
vm_map_entry_t *map_entry); /* OUT */
extern void _vm_map_clip_start(
struct vm_map_header *map_header,
vm_map_entry_t entry,
vm_offset_t start);
/*
* Macros to copy a vm_map_entry. We must be careful to correctly
* manage the wired page count. vm_map_entry_copy() creates a new
* map entry to the same memory - the wired count in the new entry
* must be set to zero. vm_map_entry_copy_full() creates a new
* entry that is identical to the old entry. This preserves the
* wire count; it's used for map splitting and zone changing in
* vm_map_copyout.
*/
#define vm_map_entry_copy(NEW,OLD) \
MACRO_BEGIN \
*(NEW) = *(OLD); \
(NEW)->is_shared = FALSE; \
(NEW)->needs_wakeup = FALSE; \
(NEW)->in_transition = FALSE; \
(NEW)->wired_count = 0; \
(NEW)->user_wired_count = 0; \
MACRO_END
#define vm_map_entry_copy_full(NEW,OLD) (*(NEW) = *(OLD))
/*
* Virtual memory maps provide for the mapping, protection,
* and sharing of virtual memory objects. In addition,
* this module provides for an efficient virtual copy of
* memory from one map to another.
*
* Synchronization is required prior to most operations.
*
* Maps consist of an ordered doubly-linked list of simple
* entries; a single hint is used to speed up lookups.
*
* Sharing maps have been deleted from this version of Mach.
* All shared objects are now mapped directly into the respective
* maps. This requires a change in the copy on write strategy;
* the asymmetric (delayed) strategy is used for shared temporary
* objects instead of the symmetric (shadow) strategy. All maps
* are now "top level" maps (either task map, kernel map or submap
* of the kernel map).
*
* Since portions of maps are specified by start/end addreses,
* which may not align with existing map entries, all
* routines merely "clip" entries to these start/end values.
* [That is, an entry is split into two, bordering at a
* start or end value.] Note that these clippings may not
* always be necessary (as the two resulting entries are then
* not changed); however, the clipping is done for convenience.
* No attempt is currently made to "glue back together" two
* abutting entries.
*
* The symmetric (shadow) copy strategy implements virtual copy
* by copying VM object references from one map to
* another, and then marking both regions as copy-on-write.
* It is important to note that only one writeable reference
* to a VM object region exists in any map when this strategy
* is used -- this means that shadow object creation can be
* delayed until a write operation occurs. The symmetric (delayed)
* strategy allows multiple maps to have writeable references to
* the same region of a vm object, and hence cannot delay creating
* its copy objects. See vm_object_copy_quickly() in vm_object.c.
* Copying of permanent objects is completely different; see
* vm_object_copy_strategically() in vm_object.c.
*/
zone_t vm_map_zone; /* zone for vm_map structures */
zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
zone_t vm_map_kentry_zone; /* zone for kernel entry structures */
zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
/*
* Placeholder object for submap operations. This object is dropped
* into the range by a call to vm_map_find, and removed when
* vm_map_submap creates the submap.
*/
vm_object_t vm_submap_object;
/*
* vm_map_init:
*
* Initialize the vm_map module. Must be called before
* any other vm_map routines.
*
* Map and entry structures are allocated from zones -- we must
* initialize those zones.
*
* There are three zones of interest:
*
* vm_map_zone: used to allocate maps.
* vm_map_entry_zone: used to allocate map entries.
* vm_map_kentry_zone: used to allocate map entries for the kernel.
*
* The kernel allocates map entries from a special zone that is initially
* "crammed" with memory. It would be difficult (perhaps impossible) for
* the kernel to allocate more memory to a entry zone when it became
* empty since the very act of allocating memory implies the creation
* of a new entry.
*/
vm_offset_t map_data;
vm_size_t map_data_size;
vm_offset_t kentry_data;
vm_size_t kentry_data_size;
int kentry_count = 2048; /* to init kentry_data_size */
/*
* Threshold for aggressive (eager) page map entering for vm copyout
* operations. Any copyout larger will NOT be aggressively entered.
*/
vm_size_t vm_map_aggressive_enter_max; /* set by bootstrap */
void
vm_map_init(
void)
{
vm_map_zone = zinit((vm_size_t) sizeof(struct vm_map), 40*1024,
PAGE_SIZE, "maps");
vm_map_entry_zone = zinit((vm_size_t) sizeof(struct vm_map_entry),
1024*1024, PAGE_SIZE*5,
"non-kernel map entries");
vm_map_kentry_zone = zinit((vm_size_t) sizeof(struct vm_map_entry),
kentry_data_size, kentry_data_size,
"kernel map entries");
vm_map_copy_zone = zinit((vm_size_t) sizeof(struct vm_map_copy),
16*1024, PAGE_SIZE, "map copies");
/*
* Cram the map and kentry zones with initial data.
* Set kentry_zone non-collectible to aid zone_gc().
*/
zone_change(vm_map_zone, Z_COLLECT, FALSE);
zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE);
zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE);
zcram(vm_map_zone, map_data, map_data_size);
zcram(vm_map_kentry_zone, kentry_data, kentry_data_size);
}
void
vm_map_steal_memory(
void)
{
map_data_size = round_page(10 * sizeof(struct vm_map));
map_data = pmap_steal_memory(map_data_size);
/*
* Limiting worst case: vm_map_kentry_zone needs to map each "available"
* physical page (i.e. that beyond the kernel image and page tables)
* individually; we fudge this slightly and guess at most one entry per
* two pages. This works out to roughly .4 of 1% of physical memory,
* or roughly 1800 entries (56K) for a 16M machine with 4K pages.
*/
kentry_count = pmap_free_pages() / 2;
kentry_data_size =
round_page(kentry_count * sizeof(struct vm_map_entry));
kentry_data = pmap_steal_memory(kentry_data_size);
}
/*
* vm_map_create:
*
* Creates and returns a new empty VM map with
* the given physical map structure, and having
* the given lower and upper address bounds.
*/
vm_map_t
vm_map_create(
pmap_t pmap,
vm_offset_t min,
vm_offset_t max,
boolean_t pageable)
{
register vm_map_t result;
result = (vm_map_t) zalloc(vm_map_zone);
if (result == VM_MAP_NULL)
panic("vm_map_create");
vm_map_first_entry(result) = vm_map_to_entry(result);
vm_map_last_entry(result) = vm_map_to_entry(result);
result->hdr.nentries = 0;
result->hdr.entries_pageable = pageable;
result->size = 0;
result->ref_count = 1;
#if TASK_SWAPPER
result->res_count = 1;
result->sw_state = MAP_SW_IN;
#endif /* TASK_SWAPPER */
result->pmap = pmap;
result->min_offset = min;
result->max_offset = max;
result->wiring_required = FALSE;
result->no_zero_fill = FALSE;
result->wait_for_space = FALSE;
result->first_free = vm_map_to_entry(result);
result->hint = vm_map_to_entry(result);
vm_map_lock_init(result);
mutex_init(&result->s_lock, ETAP_VM_RESULT);
return(result);
}
/*
* vm_map_entry_create: [ internal use only ]
*
* Allocates a VM map entry for insertion in the
* given map (or map copy). No fields are filled.
*/
#define vm_map_entry_create(map) \
_vm_map_entry_create(&(map)->hdr)
#define vm_map_copy_entry_create(copy) \
_vm_map_entry_create(&(copy)->cpy_hdr)
vm_map_entry_t
_vm_map_entry_create(
register struct vm_map_header *map_header)
{
register zone_t zone;
register vm_map_entry_t entry;
if (map_header->entries_pageable)
zone = vm_map_entry_zone;
else
zone = vm_map_kentry_zone;
entry = (vm_map_entry_t) zalloc(zone);
if (entry == VM_MAP_ENTRY_NULL)
panic("vm_map_entry_create");
return(entry);
}
/*
* vm_map_entry_dispose: [ internal use only ]
*
* Inverse of vm_map_entry_create.
*/
#define vm_map_entry_dispose(map, entry) \
MACRO_BEGIN \
assert((entry) != (map)->first_free && \
(entry) != (map)->hint); \
_vm_map_entry_dispose(&(map)->hdr, (entry)); \
MACRO_END
#define vm_map_copy_entry_dispose(map, entry) \
_vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
void
_vm_map_entry_dispose(
register struct vm_map_header *map_header,
register vm_map_entry_t entry)
{
register zone_t zone;
if (map_header->entries_pageable)
zone = vm_map_entry_zone;
else
zone = vm_map_kentry_zone;
zfree(zone, (vm_offset_t) entry);
}
boolean_t first_free_is_valid(vm_map_t map); /* forward */
boolean_t first_free_check = FALSE;
boolean_t
first_free_is_valid(
vm_map_t map)
{
vm_map_entry_t entry, next;
if (!first_free_check)
return TRUE;
entry = vm_map_to_entry(map);
next = entry->vme_next;
while (trunc_page(next->vme_start) == trunc_page(entry->vme_end) ||
(trunc_page(next->vme_start) == trunc_page(entry->vme_start) &&
next != vm_map_to_entry(map))) {
entry = next;
next = entry->vme_next;
if (entry == vm_map_to_entry(map))
break;
}
if (map->first_free != entry) {
printf("Bad first_free for map 0x%x: 0x%x should be 0x%x\n",
map, map->first_free, entry);
return FALSE;
}
return TRUE;
}
/*
* UPDATE_FIRST_FREE:
*
* Updates the map->first_free pointer to the
* entry immediately before the first hole in the map.
* The map should be locked.
*/
#define UPDATE_FIRST_FREE(map, new_first_free) \
MACRO_BEGIN \
vm_map_t UFF_map; \
vm_map_entry_t UFF_first_free; \
vm_map_entry_t UFF_next_entry; \
UFF_map = (map); \
UFF_first_free = (new_first_free); \
UFF_next_entry = UFF_first_free->vme_next; \
while (trunc_page(UFF_next_entry->vme_start) == \
trunc_page(UFF_first_free->vme_end) || \
(trunc_page(UFF_next_entry->vme_start) == \
trunc_page(UFF_first_free->vme_start) && \
UFF_next_entry != vm_map_to_entry(UFF_map))) { \
UFF_first_free = UFF_next_entry; \
UFF_next_entry = UFF_first_free->vme_next; \
if (UFF_first_free == vm_map_to_entry(UFF_map)) \
break; \
} \
UFF_map->first_free = UFF_first_free; \
assert(first_free_is_valid(UFF_map)); \
MACRO_END
/*
* vm_map_entry_{un,}link:
*
* Insert/remove entries from maps (or map copies).
*/
#define vm_map_entry_link(map, after_where, entry) \
MACRO_BEGIN \
vm_map_t VMEL_map; \
vm_map_entry_t VMEL_entry; \
VMEL_map = (map); \
VMEL_entry = (entry); \
_vm_map_entry_link(&VMEL_map->hdr, after_where, VMEL_entry); \
UPDATE_FIRST_FREE(VMEL_map, VMEL_map->first_free); \
MACRO_END
#define vm_map_copy_entry_link(copy, after_where, entry) \
_vm_map_entry_link(&(copy)->cpy_hdr, after_where, (entry))
#define _vm_map_entry_link(hdr, after_where, entry) \
MACRO_BEGIN \
(hdr)->nentries++; \
(entry)->vme_prev = (after_where); \
(entry)->vme_next = (after_where)->vme_next; \
(entry)->vme_prev->vme_next = (entry)->vme_next->vme_prev = (entry); \
MACRO_END
#define vm_map_entry_unlink(map, entry) \
MACRO_BEGIN \
vm_map_t VMEU_map; \
vm_map_entry_t VMEU_entry; \
vm_map_entry_t VMEU_first_free; \
VMEU_map = (map); \
VMEU_entry = (entry); \
if (VMEU_entry->vme_start <= VMEU_map->first_free->vme_start) \
VMEU_first_free = VMEU_entry->vme_prev; \
else \
VMEU_first_free = VMEU_map->first_free; \
_vm_map_entry_unlink(&VMEU_map->hdr, VMEU_entry); \
UPDATE_FIRST_FREE(VMEU_map, VMEU_first_free); \
MACRO_END
#define vm_map_copy_entry_unlink(copy, entry) \
_vm_map_entry_unlink(&(copy)->cpy_hdr, (entry))
#define _vm_map_entry_unlink(hdr, entry) \
MACRO_BEGIN \
(hdr)->nentries--; \
(entry)->vme_next->vme_prev = (entry)->vme_prev; \
(entry)->vme_prev->vme_next = (entry)->vme_next; \
MACRO_END
#if MACH_ASSERT && TASK_SWAPPER
/*
* vm_map_reference:
*
* Adds valid reference and residence counts to the given map.
* The map must be in memory (i.e. non-zero residence count).
*
*/
void
vm_map_reference(
register vm_map_t map)
{
if (map == VM_MAP_NULL)
return;
mutex_lock(&map->s_lock);
assert(map->res_count > 0);
assert(map->ref_count >= map->res_count);
map->ref_count++;
map->res_count++;
mutex_unlock(&map->s_lock);
}
/*
* vm_map_res_reference:
*
* Adds another valid residence count to the given map.
*
* Map is locked so this function can be called from
* vm_map_swapin.
*
*/
void vm_map_res_reference(register vm_map_t map)
{
/* assert map is locked */
assert(map->res_count >= 0);
assert(map->ref_count >= map->res_count);
if (map->res_count == 0) {
mutex_unlock(&map->s_lock);
vm_map_lock(map);
vm_map_swapin(map);
mutex_lock(&map->s_lock);
++map->res_count;
vm_map_unlock(map);
} else
++map->res_count;
}
/*
* vm_map_reference_swap:
*
* Adds valid reference and residence counts to the given map.
*
* The map may not be in memory (i.e. zero residence count).
*
*/
void vm_map_reference_swap(register vm_map_t map)
{
assert(map != VM_MAP_NULL);
mutex_lock(&map->s_lock);
assert(map->res_count >= 0);
assert(map->ref_count >= map->res_count);
map->ref_count++;
vm_map_res_reference(map);
mutex_unlock(&map->s_lock);
}
/*
* vm_map_res_deallocate:
*
* Decrement residence count on a map; possibly causing swapout.
*
* The map must be in memory (i.e. non-zero residence count).
*
* The map is locked, so this function is callable from vm_map_deallocate.
*
*/
void vm_map_res_deallocate(register vm_map_t map)
{
assert(map->res_count > 0);
if (--map->res_count == 0) {
mutex_unlock(&map->s_lock);
vm_map_lock(map);
vm_map_swapout(map);
vm_map_unlock(map);
mutex_lock(&map->s_lock);
}
assert(map->ref_count >= map->res_count);
}
#endif /* MACH_ASSERT && TASK_SWAPPER */
/*
* vm_map_deallocate:
*
* Removes a reference from the specified map,
* destroying it if no references remain.
* The map should not be locked.
*/
void
vm_map_deallocate(
register vm_map_t map)
{
unsigned int ref;
if (map == VM_MAP_NULL)
return;
mutex_lock(&map->s_lock);
ref = --map->ref_count;
if (ref > 0) {
vm_map_res_deallocate(map);
mutex_unlock(&map->s_lock);
/*
* Someone may be waiting in task_halt_wait. They
* wait there for the map reference count to reach
* two (one for the task and one for their act). Wake
* them up if need be.
*/
if (ref == 2)
thread_wakeup(&map->ref_count);
return;
}
assert(map->ref_count == 0);
mutex_unlock(&map->s_lock);
#if TASK_SWAPPER
/*
* The map residence count isn't decremented here because
* the vm_map_delete below will traverse the entire map,
* deleting entries, and the residence counts on objects
* and sharing maps will go away then.
*/
#endif
vm_map_destroy(map);
}
/*
* vm_map_destroy:
*
* Actually destroy a map.
*/
void
vm_map_destroy(
register vm_map_t map)
{
vm_map_lock(map);
(void) vm_map_delete(map, map->min_offset,
map->max_offset, VM_MAP_NO_FLAGS);
vm_map_unlock(map);
pmap_destroy(map->pmap);
zfree(vm_map_zone, (vm_offset_t) map);
}
#if TASK_SWAPPER
/*
* vm_map_swapin/vm_map_swapout
*
* Swap a map in and out, either referencing or releasing its resources.
* These functions are internal use only; however, they must be exported
* because they may be called from macros, which are exported.
*
* In the case of swapout, there could be races on the residence count,
* so if the residence count is up, we return, assuming that a
* vm_map_deallocate() call in the near future will bring us back.
*
* Locking:
* -- We use the map write lock for synchronization among races.
* -- The map write lock, and not the simple s_lock, protects the
* swap state of the map.
* -- If a map entry is a share map, then we hold both locks, in
* hierarchical order.
*
* Synchronization Notes:
* 1) If a vm_map_swapin() call happens while swapout in progress, it
* will block on the map lock and proceed when swapout is through.
* 2) A vm_map_reference() call at this time is illegal, and will
* cause a panic. vm_map_reference() is only allowed on resident
* maps, since it refuses to block.
* 3) A vm_map_swapin() call during a swapin will block, and
* proceeed when the first swapin is done, turning into a nop.
* This is the reason the res_count is not incremented until
* after the swapin is complete.
* 4) There is a timing hole after the checks of the res_count, before
* the map lock is taken, during which a swapin may get the lock
* before a swapout about to happen. If this happens, the swapin
* will detect the state and increment the reference count, causing
* the swapout to be a nop, thereby delaying it until a later
* vm_map_deallocate. If the swapout gets the lock first, then
* the swapin will simply block until the swapout is done, and
* then proceed.
*
* Because vm_map_swapin() is potentially an expensive operation, it
* should be used with caution.
*
* Invariants:
* 1) A map with a residence count of zero is either swapped, or
* being swapped.
* 2) A map with a non-zero residence count is either resident,
* or being swapped in.
*/
int vm_map_swap_enable = 1;
void vm_map_swapin (vm_map_t map)
{
register vm_map_entry_t entry;
if (!vm_map_swap_enable) /* debug */
return;
/*
* Map is locked
* First deal with various races.
*/
if (map->sw_state == MAP_SW_IN)
/*
* we raced with swapout and won. Returning will incr.
* the res_count, turning the swapout into a nop.
*/
return;
/*
* The residence count must be zero. If we raced with another
* swapin, the state would have been IN; if we raced with a
* swapout (after another competing swapin), we must have lost
* the race to get here (see above comment), in which case
* res_count is still 0.
*/
assert(map->res_count == 0);
/*
* There are no intermediate states of a map going out or
* coming in, since the map is locked during the transition.
*/
assert(map->sw_state == MAP_SW_OUT);
/*
* We now operate upon each map entry. If the entry is a sub-
* or share-map, we call vm_map_res_reference upon it.
* If the entry is an object, we call vm_object_res_reference
* (this may iterate through the shadow chain).
* Note that we hold the map locked the entire time,
* even if we get back here via a recursive call in
* vm_map_res_reference.
*/
entry = vm_map_first_entry(map);
while (entry != vm_map_to_entry(map)) {
if (entry->object.vm_object != VM_OBJECT_NULL) {
if (entry->is_sub_map) {
vm_map_t lmap = entry->object.sub_map;
mutex_lock(&lmap->s_lock);
vm_map_res_reference(lmap);
mutex_unlock(&lmap->s_lock);
} else {
vm_object_t object = entry->object.vm_object;
vm_object_lock(object);
/*
* This call may iterate through the
* shadow chain.
*/
vm_object_res_reference(object);
vm_object_unlock(object);
}
}
entry = entry->vme_next;
}
assert(map->sw_state == MAP_SW_OUT);
map->sw_state = MAP_SW_IN;
}
void vm_map_swapout(vm_map_t map)
{
register vm_map_entry_t entry;
/*
* Map is locked
* First deal with various races.
* If we raced with a swapin and lost, the residence count
* will have been incremented to 1, and we simply return.
*/
mutex_lock(&map->s_lock);
if (map->res_count != 0) {
mutex_unlock(&map->s_lock);
return;
}
mutex_unlock(&map->s_lock);
/*
* There are no intermediate states of a map going out or
* coming in, since the map is locked during the transition.
*/
assert(map->sw_state == MAP_SW_IN);
if (!vm_map_swap_enable)
return;
/*
* We now operate upon each map entry. If the entry is a sub-
* or share-map, we call vm_map_res_deallocate upon it.
* If the entry is an object, we call vm_object_res_deallocate
* (this may iterate through the shadow chain).
* Note that we hold the map locked the entire time,
* even if we get back here via a recursive call in
* vm_map_res_deallocate.
*/
entry = vm_map_first_entry(map);
while (entry != vm_map_to_entry(map)) {
if (entry->object.vm_object != VM_OBJECT_NULL) {
if (entry->is_sub_map) {
vm_map_t lmap = entry->object.sub_map;
mutex_lock(&lmap->s_lock);
vm_map_res_deallocate(lmap);
mutex_unlock(&lmap->s_lock);
} else {
vm_object_t object = entry->object.vm_object;
vm_object_lock(object);
/*
* This call may take a long time,
* since it could actively push
* out pages (if we implement it
* that way).
*/
vm_object_res_deallocate(object);
vm_object_unlock(object);
}
}
entry = entry->vme_next;
}
assert(map->sw_state == MAP_SW_IN);
map->sw_state = MAP_SW_OUT;
}
#endif /* TASK_SWAPPER */
/*
* SAVE_HINT:
*
* Saves the specified entry as the hint for
* future lookups. Performs necessary interlocks.
*/
#define SAVE_HINT(map,value) \
mutex_lock(&(map)->s_lock); \
(map)->hint = (value); \
mutex_unlock(&(map)->s_lock);
/*
* vm_map_lookup_entry: [ internal use only ]
*
* Finds the map entry containing (or
* immediately preceding) the specified address
* in the given map; the entry is returned
* in the "entry" parameter. The boolean
* result indicates whether the address is
* actually contained in the map.
*/
boolean_t
vm_map_lookup_entry(
register vm_map_t map,
register vm_offset_t address,
vm_map_entry_t *entry) /* OUT */
{
register vm_map_entry_t cur;
register vm_map_entry_t last;
/*
* Start looking either from the head of the
* list, or from the hint.
*/
mutex_lock(&map->s_lock);
cur = map->hint;
mutex_unlock(&map->s_lock);
if (cur == vm_map_to_entry(map))
cur = cur->vme_next;
if (address >= cur->vme_start) {
/*
* Go from hint to end of list.
*
* But first, make a quick check to see if
* we are already looking at the entry we
* want (which is usually the case).
* Note also that we don't need to save the hint
* here... it is the same hint (unless we are
* at the header, in which case the hint didn't
* buy us anything anyway).
*/
last = vm_map_to_entry(map);
if ((cur != last) && (cur->vme_end > address)) {
*entry = cur;
return(TRUE);
}
}
else {
/*
* Go from start to hint, *inclusively*
*/
last = cur->vme_next;
cur = vm_map_first_entry(map);
}
/*
* Search linearly
*/
while (cur != last) {
if (cur->vme_end > address) {
if (address >= cur->vme_start) {
/*
* Save this lookup for future
* hints, and return
*/
*entry = cur;
SAVE_HINT(map, cur);
return(TRUE);
}
break;
}
cur = cur->vme_next;
}
*entry = cur->vme_prev;
SAVE_HINT(map, *entry);
return(FALSE);
}
/*
* Routine: vm_map_find_space
* Purpose:
* Allocate a range in the specified virtual address map,
* returning the entry allocated for that range.
* Used by kmem_alloc, etc.
*
* The map must be NOT be locked. It will be returned locked
* on KERN_SUCCESS, unlocked on failure.
*
* If an entry is allocated, the object/offset fields
* are initialized to zero.
*/
kern_return_t
vm_map_find_space(
register vm_map_t map,
vm_offset_t *address, /* OUT */
vm_size_t size,
vm_offset_t mask,
vm_map_entry_t *o_entry) /* OUT */
{
register vm_map_entry_t entry, new_entry;
register vm_offset_t start;
register vm_offset_t end;
new_entry = vm_map_entry_create(map);
/*
* Look for the first possible address; if there's already
* something at this address, we have to start after it.
*/
vm_map_lock(map);
assert(first_free_is_valid(map));
if ((entry = map->first_free) == vm_map_to_entry(map))
start = map->min_offset;
else
start = entry->vme_end;
/*
* In any case, the "entry" always precedes
* the proposed new region throughout the loop:
*/
while (TRUE) {
register vm_map_entry_t next;
/*
* Find the end of the proposed new region.
* Be sure we didn't go beyond the end, or
* wrap around the address.
*/
end = ((start + mask) & ~mask);
if (end < start) {
vm_map_entry_dispose(map, new_entry);
vm_map_unlock(map);
return(KERN_NO_SPACE);
}
start = end;
end += size;
if ((end > map->max_offset) || (end < start)) {
vm_map_entry_dispose(map, new_entry);
vm_map_unlock(map);
return(KERN_NO_SPACE);
}
/*
* If there are no more entries, we must win.
*/
next = entry->vme_next;
if (next == vm_map_to_entry(map))
break;
/*
* If there is another entry, it must be
* after the end of the potential new region.
*/
if (next->vme_start >= end)
break;
/*
* Didn't fit -- move to the next entry.
*/
entry = next;
start = entry->vme_end;
}
/*
* At this point,
* "start" and "end" should define the endpoints of the
* available new range, and
* "entry" should refer to the region before the new
* range, and
*
* the map should be locked.
*/
*address = start;
new_entry->vme_start = start;
new_entry->vme_end = end;
assert(page_aligned(new_entry->vme_start));
assert(page_aligned(new_entry->vme_end));
new_entry->is_shared = FALSE;
new_entry->is_sub_map = FALSE;
new_entry->object.vm_object = VM_OBJECT_NULL;
new_entry->offset = (vm_offset_t) 0;
new_entry->needs_copy = FALSE;
new_entry->inheritance = VM_INHERIT_DEFAULT;
new_entry->protection = VM_PROT_DEFAULT;
new_entry->max_protection = VM_PROT_ALL;
new_entry->behavior = VM_BEHAVIOR_DEFAULT;
new_entry->wired_count = 0;
new_entry->user_wired_count = 0;
new_entry->in_transition = FALSE;
new_entry->needs_wakeup = FALSE;
/*
* Insert the new entry into the list
*/
vm_map_entry_link(map, entry, new_entry);
map->size += size;
/*
* Update the lookup hint
*/
SAVE_HINT(map, new_entry);
*o_entry = new_entry;
return(KERN_SUCCESS);
}
int vm_map_pmap_enter_print = FALSE;
int vm_map_pmap_enter_enable = FALSE;
/*
* Routine: vm_map_pmap_enter
*
* Description:
* Force pages from the specified object to be entered into
* the pmap at the specified address if they are present.
* As soon as a page not found in the object the scan ends.
*
* Returns:
* Nothing.
*
* In/out conditions:
* The source map should not be locked on entry.
*/
void
vm_map_pmap_enter(
vm_map_t map,
register vm_offset_t addr,
register vm_offset_t end_addr,
register vm_object_t object,
vm_offset_t offset,
vm_prot_t protection)
{
while (addr < end_addr) {
register vm_page_t m;
vm_object_lock(object);
vm_object_paging_begin(object);
m = vm_page_lookup(object, offset);
if (m == VM_PAGE_NULL || m->busy ||
(m->unusual && ( m->error || m->restart || m->absent ||
protection & m->page_lock))) {
vm_object_paging_end(object);
vm_object_unlock(object);
return;
}
assert(!m->fictitious); /* XXX is this possible ??? */
if (vm_map_pmap_enter_print) {
printf("vm_map_pmap_enter:");
printf("map: %x, addr: %x, object: %x, offset: %x\n",
map, addr, object, offset);
}
m->busy = TRUE;
vm_object_unlock(object);
PMAP_ENTER(map->pmap, addr, m,
protection, FALSE);
vm_object_lock(object);
PAGE_WAKEUP_DONE(m);
vm_page_lock_queues();
if (!m->active && !m->inactive)
vm_page_activate(m);
vm_page_unlock_queues();
vm_object_paging_end(object);
vm_object_unlock(object);
offset += PAGE_SIZE;
addr += PAGE_SIZE;
}
}
/*
* Routine: vm_map_enter
*
* Description:
* Allocate a range in the specified virtual address map.
* The resulting range will refer to memory defined by
* the given memory object and offset into that object.
*
* Arguments are as defined in the vm_map call.
*/
kern_return_t
vm_map_enter(
register vm_map_t map,
vm_offset_t *address, /* IN/OUT */
vm_size_t size,
vm_offset_t mask,
int flags,
vm_object_t object,
vm_offset_t offset,
boolean_t needs_copy,
vm_prot_t cur_protection,
vm_prot_t max_protection,
vm_inherit_t inheritance)
{
vm_map_entry_t entry;
register vm_offset_t start;
register vm_offset_t end;
kern_return_t result = KERN_SUCCESS;
boolean_t anywhere = VM_FLAGS_ANYWHERE & flags;
char alias;
VM_GET_FLAGS_ALIAS(flags, alias);
#define RETURN(value) { result = value; goto BailOut; }
assert(page_aligned(*address));
assert(page_aligned(size));
StartAgain: ;
start = *address;
if (anywhere) {
vm_map_lock(map);
/*
* Calculate the first possible address.
*/
if (start < map->min_offset)
start = map->min_offset;
if (start > map->max_offset)
RETURN(KERN_NO_SPACE);
/*
* Look for the first possible address;
* if there's already something at this
* address, we have to start after it.
*/
assert(first_free_is_valid(map));
if (start == map->min_offset) {
if ((entry = map->first_free) != vm_map_to_entry(map))
start = entry->vme_end;
} else {
vm_map_entry_t tmp_entry;
if (vm_map_lookup_entry(map, start, &tmp_entry))
start = tmp_entry->vme_end;
entry = tmp_entry;
}
/*
* In any case, the "entry" always precedes
* the proposed new region throughout the
* loop:
*/
while (TRUE) {
register vm_map_entry_t next;
/*
* Find the end of the proposed new region.
* Be sure we didn't go beyond the end, or
* wrap around the address.
*/
end = ((start + mask) & ~mask);
if (end < start)
RETURN(KERN_NO_SPACE);
start = end;
end += size;
if ((end > map->max_offset) || (end < start)) {
if (map->wait_for_space) {
if (size <= (map->max_offset -
map->min_offset)) {
assert_wait((event_t)map,
THREAD_ABORTSAFE);
vm_map_unlock(map);
thread_block((void (*)(void))0);
goto StartAgain;
}
}
RETURN(KERN_NO_SPACE);
}
/*
* If there are no more entries, we must win.
*/
next = entry->vme_next;
if (next == vm_map_to_entry(map))
break;
/*
* If there is another entry, it must be
* after the end of the potential new region.
*/
if (next->vme_start >= end)
break;
/*
* Didn't fit -- move to the next entry.
*/
entry = next;
start = entry->vme_end;
}
*address = start;
} else {
vm_map_entry_t temp_entry;
/*
* Verify that:
* the address doesn't itself violate
* the mask requirement.
*/
vm_map_lock(map);
if ((start & mask) != 0)
RETURN(KERN_NO_SPACE);
/*
* ... the address is within bounds
*/
end = start + size;
if ((start < map->min_offset) ||
(end > map->max_offset) ||
(start >= end)) {
RETURN(KERN_INVALID_ADDRESS);
}
/*
* ... the starting address isn't allocated
*/
if (vm_map_lookup_entry(map, start, &temp_entry))
RETURN(KERN_NO_SPACE);
entry = temp_entry;
/*
* ... the next region doesn't overlap the
* end point.
*/
if ((entry->vme_next != vm_map_to_entry(map)) &&
(entry->vme_next->vme_start < end))
RETURN(KERN_NO_SPACE);
}
/*
* At this point,
* "start" and "end" should define the endpoints of the
* available new range, and
* "entry" should refer to the region before the new
* range, and
*
* the map should be locked.
*/
/*
* See whether we can avoid creating a new entry (and object) by
* extending one of our neighbors. [So far, we only attempt to
* extend from below.]
*/
if ((object == VM_OBJECT_NULL) &&
(entry != vm_map_to_entry(map)) &&
(entry->vme_end == start) &&
(!entry->is_shared) &&
(!entry->is_sub_map) &&
(entry->alias == alias) &&
(entry->inheritance == inheritance) &&
(entry->protection == cur_protection) &&
(entry->max_protection == max_protection) &&
(entry->behavior == VM_BEHAVIOR_DEFAULT) &&
(entry->in_transition == 0) &&
(entry->wired_count == 0)) { /* implies user_wired_count == 0 */
if (vm_object_coalesce(entry->object.vm_object,
VM_OBJECT_NULL,
entry->offset,
(vm_offset_t) 0,
(vm_size_t)(entry->vme_end - entry->vme_start),
(vm_size_t)(end - entry->vme_end))) {
/*
* Coalesced the two objects - can extend
* the previous map entry to include the
* new range.
*/
map->size += (end - entry->vme_end);
entry->vme_end = end;
UPDATE_FIRST_FREE(map, map->first_free);
RETURN(KERN_SUCCESS);
}
}
/*
* Create a new entry
*/
{ /**/
register vm_map_entry_t new_entry;
new_entry = vm_map_entry_insert(map, entry, start, end, object,
offset, needs_copy, FALSE, FALSE,
cur_protection, max_protection,
VM_BEHAVIOR_DEFAULT, inheritance, 0);
new_entry->alias = alias;
vm_map_unlock(map);
/* Wire down the new entry if the user
* requested all new map entries be wired.
*/
if (map->wiring_required) {
result = vm_map_wire(map, start, end,
new_entry->protection, TRUE);
return(result);
}
if ((object != VM_OBJECT_NULL) &&
(vm_map_pmap_enter_enable) &&
(!anywhere) &&
(!needs_copy) &&
(size < (128*1024))) {
vm_map_pmap_enter(map, start, end,
object, offset, cur_protection);
}
return(result);
} /**/
BailOut: ;
vm_map_unlock(map);
return(result);
#undef RETURN
}
/*
* vm_map_clip_start: [ internal use only ]
*
* Asserts that the given entry begins at or after
* the specified address; if necessary,
* it splits the entry into two.
*/
#define vm_map_clip_start(map, entry, startaddr) \
MACRO_BEGIN \
vm_map_t VMCS_map; \
vm_map_entry_t VMCS_entry; \
vm_offset_t VMCS_startaddr; \
VMCS_map = (map); \
VMCS_entry = (entry); \
VMCS_startaddr = (startaddr); \
if (VMCS_startaddr > VMCS_entry->vme_start) \
_vm_map_clip_start(&VMCS_map->hdr,VMCS_entry,VMCS_startaddr);\
UPDATE_FIRST_FREE(VMCS_map, VMCS_map->first_free); \
MACRO_END
#define vm_map_copy_clip_start(copy, entry, startaddr) \
MACRO_BEGIN \
if ((startaddr) > (entry)->vme_start) \
_vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
MACRO_END
/*
* This routine is called only when it is known that
* the entry must be split.
*/
void
_vm_map_clip_start(
register struct vm_map_header *map_header,
register vm_map_entry_t entry,
register vm_offset_t start)
{
register vm_map_entry_t new_entry;
/*
* Split off the front portion --
* note that we must insert the new
* entry BEFORE this one, so that
* this entry has the specified starting
* address.
*/
new_entry = _vm_map_entry_create(map_header);
vm_map_entry_copy_full(new_entry, entry);
new_entry->vme_end = start;
entry->offset += (start - entry->vme_start);
entry->vme_start = start;
_vm_map_entry_link(map_header, entry->vme_prev, new_entry);
if (entry->is_sub_map)
vm_map_reference(new_entry->object.sub_map);
else
vm_object_reference(new_entry->object.vm_object);
}
/*
* vm_map_clip_end: [ internal use only ]
*
* Asserts that the given entry ends at or before
* the specified address; if necessary,
* it splits the entry into two.
*/
#define vm_map_clip_end(map, entry, endaddr) \
MACRO_BEGIN \
vm_map_t VMCE_map; \
vm_map_entry_t VMCE_entry; \
vm_offset_t VMCE_endaddr; \
VMCE_map = (map); \
VMCE_entry = (entry); \
VMCE_endaddr = (endaddr); \
if (VMCE_endaddr < VMCE_entry->vme_end) \
_vm_map_clip_end(&VMCE_map->hdr,VMCE_entry,VMCE_endaddr); \
UPDATE_FIRST_FREE(VMCE_map, VMCE_map->first_free); \
MACRO_END
#define vm_map_copy_clip_end(copy, entry, endaddr) \
MACRO_BEGIN \
if ((endaddr) < (entry)->vme_end) \
_vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
MACRO_END
/*
* This routine is called only when it is known that
* the entry must be split.
*/
void
_vm_map_clip_end(
register struct vm_map_header *map_header,
register vm_map_entry_t entry,
register vm_offset_t end)
{
register vm_map_entry_t new_entry;
/*
* Create a new entry and insert it
* AFTER the specified entry
*/
new_entry = _vm_map_entry_create(map_header);
vm_map_entry_copy_full(new_entry, entry);
new_entry->vme_start = entry->vme_end = end;
new_entry->offset += (end - entry->vme_start);
_vm_map_entry_link(map_header, entry, new_entry);
if (entry->is_sub_map)
vm_map_reference(new_entry->object.sub_map);
else
vm_object_reference(new_entry->object.vm_object);
}
/*
* VM_MAP_RANGE_CHECK: [ internal use only ]
*
* Asserts that the starting and ending region
* addresses fall within the valid range of the map.
*/
#define VM_MAP_RANGE_CHECK(map, start, end) \
{ \
if (start < vm_map_min(map)) \
start = vm_map_min(map); \
if (end > vm_map_max(map)) \
end = vm_map_max(map); \
if (start > end) \
start = end; \
}
/*
* vm_map_range_check: [ internal use only ]
*
* Check that the region defined by the specified start and
* end addresses are wholly contained within a single map
* entry or set of adjacent map entries of the spacified map,
* i.e. the specified region contains no unmapped space.
* If any or all of the region is unmapped, FALSE is returned.
* Otherwise, TRUE is returned and if the output argument 'entry'
* is not NULL it points to the map entry containing the start
* of the region.
*
* The map is locked for reading on entry and is left locked.
*/
boolean_t
vm_map_range_check(
register vm_map_t map,
register vm_offset_t start,
register vm_offset_t end,
vm_map_entry_t *entry)
{
vm_map_entry_t cur;
register vm_offset_t prev;
/*
* Basic sanity checks first
*/
if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
return (FALSE);
/*
* Check first if the region starts within a valid
* mapping for the map.
*/
if (!vm_map_lookup_entry(map, start, &cur))
return (FALSE);
/*
* Optimize for the case that the region is contained
* in a single map entry.
*/
if (entry != (vm_map_entry_t *) NULL)
*entry = cur;
if (end <= cur->vme_end)
return (TRUE);
/*
* If the region is not wholly contained within a
* single entry, walk the entries looking for holes.
*/
prev = cur->vme_end;
cur = cur->vme_next;
while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
if (end <= cur->vme_end)
return (TRUE);
prev = cur->vme_end;
cur = cur->vme_next;
}
return (FALSE);
}
/*
* vm_map_submap: [ kernel use only ]
*
* Mark the given range as handled by a subordinate map.
*
* This range must have been created with vm_map_find using
* the vm_submap_object, and no other operations may have been
* performed on this range prior to calling vm_map_submap.
*
* Only a limited number of operations can be performed
* within this rage after calling vm_map_submap:
* vm_fault
* [Don't try vm_map_copyin!]
*
* To remove a submapping, one must first remove the
* range from the superior map, and then destroy the
* submap (if desired). [Better yet, don't try it.]
*/
kern_return_t
vm_map_submap(
register vm_map_t map,
register vm_offset_t start,
register vm_offset_t end,
vm_map_t submap,
vm_offset_t offset)
{
vm_map_entry_t entry;
register kern_return_t result = KERN_INVALID_ARGUMENT;
register vm_object_t object;
vm_map_lock(map);
VM_MAP_RANGE_CHECK(map, start, end);
if (vm_map_lookup_entry(map, start, &entry)) {
vm_map_clip_start(map, entry, start);
}
else
entry = entry->vme_next;
vm_map_clip_end(map, entry, end);
if ((entry->vme_start == start) && (entry->vme_end == end) &&
(!entry->is_sub_map) &&
((object = entry->object.vm_object) == vm_submap_object) &&
(object->resident_page_count == 0) &&
(object->copy == VM_OBJECT_NULL) &&
(object->shadow == VM_OBJECT_NULL) &&
(!object->pager_created)) {
entry->offset = offset;
entry->object.vm_object = VM_OBJECT_NULL;
vm_object_deallocate(object);
entry->is_sub_map = TRUE;
vm_map_reference(entry->object.sub_map = submap);
result = KERN_SUCCESS;
}
vm_map_unlock(map);
return(result);
}
/*
* vm_map_protect:
*
* Sets the protection of the specified address
* region in the target map. If "set_max" is
* specified, the maximum protection is to be set;
* otherwise, only the current protection is affected.
*/
kern_return_t
vm_map_protect(
register vm_map_t map,
register vm_offset_t start,
register vm_offset_t end,
register vm_prot_t new_prot,
register boolean_t set_max)
{
register vm_map_entry_t current;
register vm_offset_t prev;
vm_map_entry_t entry;
vm_prot_t new_max;
boolean_t clip;
XPR(XPR_VM_MAP,
"vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
(integer_t)map, start, end, new_prot, set_max);
vm_map_lock(map);
/*
* Lookup the entry. If it doesn't start in a valid
* entry, return an error. Remember if we need to
* clip the entry. We don't do it here because we don't
* want to make any changes until we've scanned the
* entire range below for address and protection
* violations.
*/
if (!(clip = vm_map_lookup_entry(map, start, &entry))) {
vm_map_unlock(map);
return(KERN_INVALID_ADDRESS);
}
/*
* Make a first pass to check for protection and address
* violations.
*/
current = entry;
prev = current->vme_start;
while ((current != vm_map_to_entry(map)) &&
(current->vme_start < end)) {
/*
* If there is a hole, return an error.
*/
if (current->vme_start != prev) {
vm_map_unlock(map);
return(KERN_INVALID_ADDRESS);
}
new_max = current->max_protection;
if(new_prot & VM_PROT_COPY) {
new_max |= VM_PROT_WRITE;
}
if ((new_prot & new_max) != new_prot) {
vm_map_unlock(map);
return(KERN_PROTECTION_FAILURE);
}
prev = current->vme_end;
current = current->vme_next;
}
if (end > prev) {
vm_map_unlock(map);
return(KERN_INVALID_ADDRESS);
}
/*
* Go back and fix up protections.
* Clip to start here if the range starts within
* the entry.
*/
current = entry;
if (clip) {
vm_map_clip_start(map, entry, start);
}
while ((current != vm_map_to_entry(map)) &&
(current->vme_start < end)) {
vm_prot_t old_prot;
vm_map_clip_end(map, current, end);
old_prot = current->protection;
if(new_prot & VM_PROT_COPY) {
/* caller is asking specifically to copy the */
/* mapped data, this implies that max protection */
/* will include write. Caller must be prepared */
/* for loss of shared memory communication in the */
/* target area after taking this step */
current->needs_copy = TRUE;
current->max_protection |= VM_PROT_WRITE;
}
if (set_max)
current->protection =
(current->max_protection = new_prot) &
old_prot;
else
current->protection = new_prot;
/*
* Update physical map if necessary.
* If the request is to turn off write protection,
* we won't do it for real (in pmap). This is because
* it would cause copy-on-write to fail. We've already
* set, the new protection in the map, so if a
* write-protect fault occurred, it will be fixed up
* properly, COW or not.
*/
if ((current->protection != old_prot) && !(current->protection & VM_PROT_WRITE)) {
pmap_protect(map->pmap, current->vme_start,
current->vme_end,
current->protection);
}
current = current->vme_next;
}
vm_map_unlock(map);
return(KERN_SUCCESS);
}
/*
* vm_map_inherit:
*
* Sets the inheritance of the specified address
* range in the target map. Inheritance
* affects how the map will be shared with
* child maps at the time of vm_map_fork.
*/
kern_return_t
vm_map_inherit(
register vm_map_t map,
register vm_offset_t start,
register vm_offset_t end,
register vm_inherit_t new_inheritance)
{
register vm_map_entry_t entry;
vm_map_entry_t temp_entry;
vm_map_lock(map);
VM_MAP_RANGE_CHECK(map, start, end);
if (vm_map_lookup_entry(map, start, &temp_entry)) {
entry = temp_entry;
vm_map_clip_start(map, entry, start);
}
else {
temp_entry = temp_entry->vme_next;
entry = temp_entry;
}
/* first check entire range for submaps which can't support the */
/* given inheritance. */
while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
if(entry->is_sub_map) {
if(new_inheritance == VM_INHERIT_COPY)
return(KERN_INVALID_ARGUMENT);
}
entry = entry->vme_next;
}
entry = temp_entry;
while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
vm_map_clip_end(map, entry, end);
entry->inheritance = new_inheritance;
entry = entry->vme_next;
}
vm_map_unlock(map);
return(KERN_SUCCESS);
}
/*
* vm_map_wire:
*
* Sets the pageability of the specified address range in the
* target map as wired. Regions specified as not pageable require
* locked-down physical memory and physical page maps. The
* access_type variable indicates types of accesses that must not
* generate page faults. This is checked against protection of
* memory being locked-down.
*
* The map must not be locked, but a reference must remain to the
* map throughout the call.
*/
kern_return_t
vm_map_wire(
register vm_map_t map,
register vm_offset_t start,
register vm_offset_t end,
register vm_prot_t access_type,
boolean_t user_wire)
{
register vm_map_entry_t entry;
struct vm_map_entry *first_entry, tmp_entry;
register vm_offset_t s,e;
kern_return_t rc;
boolean_t need_wakeup;
unsigned int last_timestamp;
vm_size_t size;
vm_map_lock(map);
last_timestamp = map->timestamp;
VM_MAP_RANGE_CHECK(map, start, end);
assert(page_aligned(start));
assert(page_aligned(end));
if (vm_map_lookup_entry(map, start, &first_entry)) {
entry = first_entry;
/* vm_map_clip_start will be done later. */
} else {
/* Start address is not in map */
vm_map_unlock(map);
return(KERN_INVALID_ADDRESS);
}
s=start;
need_wakeup = FALSE;
while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
if(entry->is_sub_map) {
vm_offset_t sub_start;
vm_offset_t sub_end;
vm_offset_t local_end;
if(entry->vme_start < start)
sub_start = start;
else
sub_start = entry->vme_start;
sub_start -= entry->vme_start;
sub_start += entry->offset;
if(entry->vme_end < end)
sub_end = entry->vme_end;
else
sub_end = end;
sub_end -= entry->vme_start;
sub_end += entry->offset;
local_end = entry->vme_end;
vm_map_unlock(map);
if (vm_map_wire(entry->object.sub_map,
sub_start, sub_end,
access_type, user_wire)
!= KERN_SUCCESS) {
vm_map_unwire(map, start, s, user_wire);
return(KERN_FAILURE);
}
vm_map_lock(map);
if (last_timestamp+1 != map->timestamp) {
/*
* Find the entry again. It could have been clipped
* after we unlocked the map.
*/
if (!vm_map_lookup_entry(map, local_end,
&first_entry))
panic("vm_map_wire: re-lookup failed");
entry = first_entry;
} else
entry = entry->vme_next;
last_timestamp = map->timestamp;
continue;
}
/*
* If another thread is wiring/unwiring this entry then
* block after informing other thread to wake us up.
*/
if (entry->in_transition) {
/*
* We have not clipped the entry. Make sure that
* the start address is in range so that the lookup
* below will succeed.
*/
s = entry->vme_start < start? start: entry->vme_start;
entry->needs_wakeup = TRUE;
/*
* wake up anybody waiting on entries that we have
* already wired.
*/
if (need_wakeup) {
vm_map_entry_wakeup(map);
need_wakeup = FALSE;
}
/*
* User wiring is interruptible
*/
vm_map_entry_wait(map,
(user_wire) ? THREAD_ABORTSAFE :
THREAD_UNINT);
if (user_wire && current_thread()->wait_result ==
THREAD_INTERRUPTED) {
/*
* undo the wirings we have done so far
* We do not clear the needs_wakeup flag,
* because we cannot tell if we were the
* only one waiting.
*/
vm_map_unwire(map, start, s, user_wire);
return(KERN_FAILURE);
}
vm_map_lock(map);
/*
* Cannot avoid a lookup here. reset timestamp.
*/
last_timestamp = map->timestamp;
/*
* The entry could have been clipped, look it up again.
* Worse that can happen is, it may not exist anymore.
*/
if (!vm_map_lookup_entry(map, s, &first_entry)) {
if (!user_wire)
panic("vm_map_wire: re-lookup failed");
/*
* User: undo everything upto the previous
* entry. let vm_map_unwire worry about
* checking the validity of the range.
*/
vm_map_unlock(map);
vm_map_unwire(map, start, s, user_wire);
return(KERN_FAILURE);
}
entry = first_entry;
continue;
}
/*
* If this entry is already wired then increment
* the appropriate wire reference count.
*/
if (entry->wired_count) {
/* sanity check: wired_count is a short */
if (entry->wired_count >= MAX_WIRE_COUNT)
panic("vm_map_wire: too many wirings");
if (user_wire &&
entry->user_wired_count >= MAX_WIRE_COUNT) {
vm_map_unlock(map);
vm_map_unwire(map, start,
entry->vme_start, user_wire);
return(KERN_FAILURE);
}
/*
* entry is already wired down, get our reference
* after clipping to our range.
*/
vm_map_clip_start(map, entry, start);
vm_map_clip_end(map, entry, end);
if (!user_wire || (entry->user_wired_count++ == 0))
entry->wired_count++;
entry = entry->vme_next;
continue;
}
/*
* Unwired entry
*/
/*
* Perform actions of vm_map_lookup that need the write
* lock on the map: create a shadow object for a
* copy-on-write region, or an object for a zero-fill
* region.
*/
size = entry->vme_end - entry->vme_start;
/*
* If wiring a copy-on-write page, we need to copy it now
* even if we're only (currently) requesting read access.
* This is aggressive, but once it's wired we can't move it.
*/
if (entry->needs_copy) {
vm_object_shadow(&entry->object.vm_object,
&entry->offset, size);
entry->needs_copy = FALSE;
} else if (entry->object.vm_object == VM_OBJECT_NULL) {
entry->object.vm_object = vm_object_allocate(size);
entry->offset = (vm_offset_t)0;
}
vm_map_clip_start(map, entry, start);
vm_map_clip_end(map, entry, end);
s = entry->vme_start;
e = entry->vme_end;
/*
* Check for holes and protection mismatch.
* Holes: Next entry should be contiguous unless this
* is the end of the region.
* Protection: Access requested must be allowed, unless
* wiring is by protection class
*/
if ((((entry->vme_end < end) &&
((entry->vme_next == vm_map_to_entry(map)) ||
(entry->vme_next->vme_start > entry->vme_end))) ||
((entry->protection & access_type) != access_type))) {
/*
* Found a hole or protection problem.
* Unwire the region we wired so far.
*/
if (start != entry->vme_start) {
vm_map_unlock(map);
vm_map_unwire(map, start, s, user_wire);
} else {
vm_map_unlock(map);
}
return((entry->protection&access_type) != access_type?
KERN_PROTECTION_FAILURE: KERN_INVALID_ADDRESS);
}
assert(entry->wired_count == 0 && entry->user_wired_count == 0);
if (user_wire)
entry->user_wired_count++;
entry->wired_count++;
entry->in_transition = TRUE;
/*
* This entry might get split once we unlock the map.
* In vm_fault_wire(), we need the current range as
* defined by this entry. In order for this to work
* along with a simultaneous clip operation, we make a
* temporary copy of this entry and use that for the
* wiring. Note that the underlying objects do not
* change during a clip.
*/
tmp_entry = *entry;
/*
* The in_transition state guarentees that the entry
* (or entries for this range, if split occured) will be
* there when the map lock is acquired for the second time.
*/
vm_map_unlock(map);
rc = vm_fault_wire(map, &tmp_entry);
vm_map_lock(map);
if (last_timestamp+1 != map->timestamp) {
/*
* Find the entry again. It could have been clipped
* after we unlocked the map.
*/
if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
&first_entry))
panic("vm_map_wire: re-lookup failed");
entry = first_entry;
}
last_timestamp = map->timestamp;
while ((entry != vm_map_to_entry(map)) &&
(entry->vme_start < tmp_entry.vme_end)) {
assert(entry->in_transition);
entry->in_transition = FALSE;
if (entry->needs_wakeup) {
entry->needs_wakeup = FALSE;
need_wakeup = TRUE;
}
if (rc != KERN_SUCCESS) { /* from vm_*_wire */
if (user_wire)
entry->user_wired_count--;
entry->wired_count--;
}
entry = entry->vme_next;
}
if (rc != KERN_SUCCESS) { /* from vm_*_wire */
vm_map_unlock(map);
if (need_wakeup)
vm_map_entry_wakeup(map);
/*
* undo everything upto the previous entry.
*/
(void)vm_map_unwire(map, start, s, user_wire);
return rc;
}
} /* end while loop through map entries */
vm_map_unlock(map);
/*
* wake up anybody waiting on entries we wired.
*/
if (need_wakeup)
vm_map_entry_wakeup(map);
return(KERN_SUCCESS);
}
/*
* vm_map_unwire:
*
* Sets the pageability of the specified address range in the target
* as pageable. Regions specified must have been wired previously.
*
* The map must not be locked, but a reference must remain to the map
* throughout the call.
*
* Kernel will panic on failures. User unwire ignores holes and
* unwired and intransition entries to avoid losing memory by leaving
* it unwired.
*/
kern_return_t
vm_map_unwire(
register vm_map_t map,
register vm_offset_t start,
register vm_offset_t end,
boolean_t user_wire)
{
register vm_map_entry_t entry;
struct vm_map_entry *first_entry, tmp_entry;
boolean_t need_wakeup;
unsigned int last_timestamp;
vm_map_lock(map);
last_timestamp = map->timestamp;
VM_MAP_RANGE_CHECK(map, start, end);
assert(page_aligned(start));
assert(page_aligned(end));
if (vm_map_lookup_entry(map, start, &first_entry)) {
entry = first_entry;
/* vm_map_clip_start will be done later. */
}
else {
/* Start address is not in map. */
vm_map_unlock(map);
return(KERN_INVALID_ADDRESS);
}
need_wakeup = FALSE;
while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
if(entry->is_sub_map) {
vm_offset_t sub_start;
vm_offset_t sub_end;
vm_offset_t local_end;
if(entry->vme_start < start)
sub_start = start;
else
sub_start = entry->vme_start;
sub_start -= entry->vme_start;
sub_start += entry->offset;
if(entry->vme_end < end)
sub_end = entry->vme_end;
else
sub_end = end;
sub_end -= entry->vme_start;
sub_end += entry->offset;
local_end = entry->vme_end;
vm_map_unlock(map);
vm_map_unwire(entry->object.sub_map,
sub_start, sub_end,
user_wire);
vm_map_lock(map);
if (last_timestamp+1 != map->timestamp) {
/*
* Find the entry again. It could have been clipped
* after we unlocked the map.
*/
if (!vm_map_lookup_entry(map, local_end,
&first_entry))
panic("vm_map_unwire: re-lookup failed");
entry = first_entry;
} else
entry = entry->vme_next;
last_timestamp = map->timestamp;
continue;
}
if (entry->in_transition) {
/*
* 1)
* Another thread is wiring down this entry. Note
* that if it is not for the other thread we would
* be unwiring an unwired entry. This is not
* permitted. If we wait, we will be unwiring memory
* we did not wire.
*
* 2)
* Another thread is unwiring this entry. We did not
* have a reference to it, because if we did, this
* entry will not be getting unwired now.
*/
if (!user_wire)
panic("vm_map_unwire: in_transition entry");
entry = entry->vme_next;
continue;
}
if (entry->wired_count == 0 ||
(user_wire && entry->user_wired_count == 0)) {
if (!user_wire)
panic("vm_map_unwire: entry is unwired");
entry = entry->vme_next;
continue;
}
assert(entry->wired_count > 0 &&
(!user_wire || entry->user_wired_count > 0));
vm_map_clip_start(map, entry, start);
vm_map_clip_end(map, entry, end);
/*
* Check for holes
* Holes: Next entry should be contiguous unless
* this is the end of the region.
*/
if (((entry->vme_end < end) &&
((entry->vme_next == vm_map_to_entry(map)) ||
(entry->vme_next->vme_start > entry->vme_end)))) {
if (!user_wire)
panic("vm_map_unwire: non-contiguous region");
entry = entry->vme_next;
continue;
}
if (!user_wire || (--entry->user_wired_count == 0))
entry->wired_count--;
if (entry->wired_count != 0) {
entry = entry->vme_next;
continue;
}
entry->in_transition = TRUE;
tmp_entry = *entry; /* see comment in vm_map_wire() */
/*
* We can unlock the map now. The in_transition state
* guarantees existance of the entry.
*/
vm_map_unlock(map);
vm_fault_unwire(map, &tmp_entry, FALSE);
vm_map_lock(map);
if (last_timestamp+1 != map->timestamp) {
/*
* Find the entry again. It could have been clipped
* or deleted after we unlocked the map.
*/
if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
&first_entry)) {
if (!user_wire)
panic("vm_map_unwire: re-lookup failed");
entry = first_entry->vme_next;
} else
entry = first_entry;
}
last_timestamp = map->timestamp;
/*
* clear transition bit for all constituent entries that
* were in the original entry (saved in tmp_entry). Also
* check for waiters.
*/
while ((entry != vm_map_to_entry(map)) &&
(entry->vme_start < tmp_entry.vme_end)) {
assert(entry->in_transition);
entry->in_transition = FALSE;
if (entry->needs_wakeup) {
entry->needs_wakeup = FALSE;
need_wakeup = TRUE;
}
entry = entry->vme_next;
}
}
vm_map_unlock(map);
/*
* wake up anybody waiting on entries that we have unwired.
*/
if (need_wakeup)
vm_map_entry_wakeup(map);
return(KERN_SUCCESS);
}
/*
* vm_map_entry_delete: [ internal use only ]
*
* Deallocate the given entry from the target map.
*/
void
vm_map_entry_delete(
register vm_map_t map,
register vm_map_entry_t entry)
{
register vm_offset_t s, e;
register vm_object_t object;
extern vm_object_t kernel_object;
s = entry->vme_start;
e = entry->vme_end;
assert(page_aligned(s));
assert(page_aligned(e));
assert(entry->wired_count == 0);
assert(entry->user_wired_count == 0);
/*
* Deallocate the object only after removing all
* pmap entries pointing to its pages.
*/
if (entry->is_sub_map)
vm_map_deallocate(entry->object.sub_map);
else
vm_object_deallocate(entry->object.vm_object);
vm_map_entry_unlink(map, entry);
map->size -= e - s;
vm_map_entry_dispose(map, entry);
}
/*
* vm_map_delete: [ internal use only ]
*
* Deallocates the given address range from the target map.
* Removes all user wirings. Unwires one kernel wiring if
* VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
* away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
* interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
*
* This routine is called with map locked and leaves map locked.
*/
kern_return_t
vm_map_delete(
register vm_map_t map,
vm_offset_t start,
register vm_offset_t end,
int flags)
{
register vm_map_entry_t entry, next;
struct vm_map_entry *first_entry, tmp_entry;
register vm_offset_t s, e;
register vm_object_t object;
boolean_t need_wakeup;
unsigned int last_timestamp = ~0; /* unlikely value */
int interruptible;
extern vm_map_t kernel_map;
interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
THREAD_ABORTSAFE : THREAD_UNINT;
/*
* Find the start of the region, and clip it
*/
if (vm_map_lookup_entry(map, start, &first_entry)) {
entry = first_entry;
vm_map_clip_start(map, entry, start);
/*
* Fix the lookup hint now, rather than each
* time through the loop.
*/
SAVE_HINT(map, entry->vme_prev);
} else {
entry = first_entry->vme_next;
}
need_wakeup = FALSE;
/*
* Step through all entries in this region
*/
while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
vm_map_clip_end(map, entry, end);
retry_entry:
if (entry->in_transition) {
/*
* Another thread is wiring/unwiring this entry.
* Let the other thread know we are waiting.
*/
s = entry->vme_start;
entry->needs_wakeup = TRUE;
/*
* wake up anybody waiting on entries that we have
* already unwired/deleted.
*/
if (need_wakeup) {
vm_map_entry_wakeup(map);
need_wakeup = FALSE;
}
vm_map_entry_wait(map, interruptible);
if (interruptible &&
current_thread()->wait_result == THREAD_INTERRUPTED)
/*
* We do not clear the needs_wakeup flag,
* since we cannot tell if we were the only one.
*/
return KERN_ABORTED;
vm_map_lock(map);
/*
* Cannot avoid a lookup here. reset timestamp.
*/
last_timestamp = map->timestamp;
/*
* The entry could have been clipped or it
* may not exist anymore. Look it up again.
*/
if (!vm_map_lookup_entry(map, s, &first_entry)) {
assert((map != kernel_map) &&
(!entry->is_sub_map));
/*
* User: use the next entry
*/
entry = first_entry->vme_next;
} else {
entry = first_entry;
SAVE_HINT(map, entry->vme_prev);
}
goto retry_entry;
} /* end in_transition */
if (entry->wired_count) {
/*
* Remove a kernel wiring if requested or if
* there are user wirings.
*/
if ((flags & VM_MAP_REMOVE_KUNWIRE) ||
(entry->user_wired_count > 0))
entry->wired_count--;
/* remove all user wire references */
entry->user_wired_count = 0;
if (entry->wired_count != 0) {
assert((map != kernel_map) &&
(!entry->is_sub_map));
/*
* Cannot continue. Typical case is when
* a user thread has physical io pending on
* on this page. Either wait for the
* kernel wiring to go away or return an
* error.
*/
if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
s = entry->vme_start;
entry->needs_wakeup = TRUE;
vm_map_entry_wait(map, interruptible);
if (interruptible &&
current_thread()->wait_result ==
THREAD_INTERRUPTED)
/*
* We do not clear the
* needs_wakeup flag, since we
* cannot tell if we were the
* only one.
*/
return KERN_ABORTED;
vm_map_lock(map);
/*
* Cannot avoid a lookup here. reset
* timestamp.
*/
last_timestamp = map->timestamp;
/*
* The entry could have been clipped or
* it may not exist anymore. Look it
* up again.
*/
if (!vm_map_lookup_entry(map, s,
&first_entry)) {
assert((map != kernel_map) &&
(!entry->is_sub_map));
/*
* User: use the next entry
*/
entry = first_entry->vme_next;
} else {
entry = first_entry;
SAVE_HINT(map, entry->vme_prev);
}
goto retry_entry;
}
else {
return KERN_FAILURE;
}
}
entry->in_transition = TRUE;
/*
* copy current entry. see comment in vm_map_wire()
*/
tmp_entry = *entry;
s = entry->vme_start;
e = entry->vme_end;
/*
* We can unlock the map now. The in_transition
* state guarentees existance of the entry.
*/
vm_map_unlock(map);
vm_fault_unwire(map, &tmp_entry,
tmp_entry.object.vm_object == kernel_object);
vm_map_lock(map);
if (last_timestamp+1 != map->timestamp) {
/*
* Find the entry again. It could have
* been clipped after we unlocked the map.
*/
if (!vm_map_lookup_entry(map, s, &first_entry)){
assert((map != kernel_map) &&
(!entry->is_sub_map));
first_entry = first_entry->vme_next;
} else {
SAVE_HINT(map, entry->vme_prev);
}
} else {
SAVE_HINT(map, entry->vme_prev);
first_entry = entry;
}
last_timestamp = map->timestamp;
entry = first_entry;
while ((entry != vm_map_to_entry(map)) &&
(entry->vme_start < tmp_entry.vme_end)) {
assert(entry->in_transition);
entry->in_transition = FALSE;
if (entry->needs_wakeup) {
entry->needs_wakeup = FALSE;
need_wakeup = TRUE;
}
entry = entry->vme_next;
}
/*
* We have unwired the entry(s). Go back and
* delete them.
*/
entry = first_entry;
goto retry_entry;
}
/* entry is unwired */
assert(entry->wired_count == 0);
assert(entry->user_wired_count == 0);
if ((!entry->is_sub_map &&
entry->object.vm_object != kernel_object) ||
entry->is_sub_map) {
pmap_remove(map->pmap,
entry->vme_start, entry->vme_end);
}
next = entry->vme_next;
vm_map_entry_delete(map, entry);
entry = next;
}
if (map->wait_for_space)
thread_wakeup((event_t) map);
/*
* wake up anybody waiting on entries that we have already deleted.
*/
if (need_wakeup)
vm_map_entry_wakeup(map);
return KERN_SUCCESS;
}
/*
* vm_map_remove:
*
* Remove the given address range from the target map.
* This is the exported form of vm_map_delete.
*/
kern_return_t
vm_map_remove(
register vm_map_t map,
register vm_offset_t start,
register vm_offset_t end,
register boolean_t flags)
{
register kern_return_t result;
vm_map_lock(map);
VM_MAP_RANGE_CHECK(map, start, end);
result = vm_map_delete(map, start, end, flags);
vm_map_unlock(map);
return(result);
}
/*
* vm_map_copy_steal_pages:
*
* Steal all the pages from a vm_map_copy page_list by copying ones
* that have not already been stolen.
*/
void
vm_map_copy_steal_pages(
vm_map_copy_t copy)
{
register vm_page_t m, new_m;
register int i;
vm_object_t object;
assert(copy->type == VM_MAP_COPY_PAGE_LIST);
for (i = 0; i < copy->cpy_npages; i++) {
/*
* If the page is not tabled, then it's already stolen.
*/
m = copy->cpy_page_list[i];
if (!m->tabled)
continue;
/*
* Page was not stolen, get a new
* one and do the copy now.
*/
while ((new_m = vm_page_grab()) == VM_PAGE_NULL) {
VM_PAGE_WAIT();
}
vm_page_gobble(new_m); /* mark as consumed internally */
vm_page_copy(m, new_m);
object = m->object;
vm_object_lock(object);
vm_page_lock_queues();
if (!m->active && !m->inactive)
vm_page_activate(m);
vm_page_unlock_queues();
PAGE_WAKEUP_DONE(m);
vm_object_paging_end(object);
vm_object_unlock(object);
copy->cpy_page_list[i] = new_m;
}
copy->cpy_page_loose = TRUE;
}
/*
* vm_map_copy_page_discard:
*
* Get rid of the pages in a page_list copy. If the pages are
* stolen, they are freed. If the pages are not stolen, they
* are unbusied, and associated state is cleaned up.
*/
void
vm_map_copy_page_discard(
vm_map_copy_t copy)
{
assert(copy->type == VM_MAP_COPY_PAGE_LIST);
while (copy->cpy_npages > 0) {
vm_page_t m;
if ((m = copy->cpy_page_list[--(copy->cpy_npages)]) !=
VM_PAGE_NULL) {
/*
* If it's not in the table, then it's
* a stolen page that goes back
* to the free list. Else it belongs
* to some object, and we hold a
* paging reference on that object.
*/
if (!m->tabled) {
VM_PAGE_FREE(m);
}
else {
vm_object_t object;
object = m->object;
vm_object_lock(object);
vm_page_lock_queues();
if (!m->active && !m->inactive)
vm_page_activate(m);
vm_page_unlock_queues();
if ((!m->busy)) {
kern_return_t kr;
kr = vm_page_unpin(m);
assert(kr == KERN_SUCCESS);
} else {
PAGE_WAKEUP_DONE(m);
}
vm_object_paging_end(object);
vm_object_unlock(object);
}
}
}
}
/*
* Routine: vm_map_copy_discard
*
* Description:
* Dispose of a map copy object (returned by
* vm_map_copyin).
*/
void
vm_map_copy_discard(
vm_map_copy_t copy)
{
TR_DECL("vm_map_copy_discard");
/* tr3("enter: copy 0x%x type %d", copy, copy->type);*/
free_next_copy:
if (copy == VM_MAP_COPY_NULL)
return;
switch (copy->type) {
case VM_MAP_COPY_ENTRY_LIST:
while (vm_map_copy_first_entry(copy) !=
vm_map_copy_to_entry(copy)) {
vm_map_entry_t entry = vm_map_copy_first_entry(copy);
vm_map_copy_entry_unlink(copy, entry);
vm_object_deallocate(entry->object.vm_object);
vm_map_copy_entry_dispose(copy, entry);
}
break;
case VM_MAP_COPY_OBJECT:
vm_object_deallocate(copy->cpy_object);
break;
case VM_MAP_COPY_PAGE_LIST:
/*
* To clean this up, we have to unbusy all the pages
* and release the paging references in their objects.
*/
if (copy->cpy_npages > 0)
vm_map_copy_page_discard(copy);
/*
* If there's a continuation, abort it. The
* abort routine releases any storage.
*/
if (vm_map_copy_has_cont(copy)) {
assert(vm_map_copy_cont_is_valid(copy));
/*
* Special case: recognize
* vm_map_copy_discard_cont and optimize
* here to avoid tail recursion.
*/
if (copy->cpy_cont == vm_map_copy_discard_cont) {
register vm_map_copy_t new_copy;
new_copy = (vm_map_copy_t) copy->cpy_cont_args;
zfree(vm_map_copy_zone, (vm_offset_t) copy);
copy = new_copy;
goto free_next_copy;
} else {
vm_map_copy_abort_cont(copy);
}
}
break;
case VM_MAP_COPY_KERNEL_BUFFER:
/*
* The vm_map_copy_t and possibly the data buffer were
* allocated by a single call to kalloc(), i.e. the
* vm_map_copy_t was not allocated out of the zone.
*/
kfree((vm_offset_t) copy, copy->cpy_kalloc_size);
return;
}
zfree(vm_map_copy_zone, (vm_offset_t) copy);
}
/*
* Routine: vm_map_copy_copy
*
* Description:
* Move the information in a map copy object to
* a new map copy object, leaving the old one
* empty.
*
* This is used by kernel routines that need
* to look at out-of-line data (in copyin form)
* before deciding whether to return SUCCESS.
* If the routine returns FAILURE, the original
* copy object will be deallocated; therefore,
* these routines must make a copy of the copy
* object and leave the original empty so that
* deallocation will not fail.
*/
vm_map_copy_t
vm_map_copy_copy(
vm_map_copy_t copy)
{
vm_map_copy_t new_copy;
if (copy == VM_MAP_COPY_NULL)
return VM_MAP_COPY_NULL;
/*
* Allocate a new copy object, and copy the information
* from the old one into it.
*/
new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
*new_copy = *copy;
if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
/*
* The links in the entry chain must be
* changed to point to the new copy object.
*/
vm_map_copy_first_entry(copy)->vme_prev
= vm_map_copy_to_entry(new_copy);
vm_map_copy_last_entry(copy)->vme_next
= vm_map_copy_to_entry(new_copy);
}
/*
* Change the old copy object into one that contains
* nothing to be deallocated.
*/
copy->type = VM_MAP_COPY_OBJECT;
copy->cpy_object = VM_OBJECT_NULL;
/*
* Return the new object.
*/
return new_copy;
}
/*
* Routine: vm_map_copy_discard_cont
*
* Description:
* A version of vm_map_copy_discard that can be called
* as a continuation from a vm_map_copy page list.
*/
kern_return_t
vm_map_copy_discard_cont(
vm_map_copyin_args_t cont_args,
vm_map_copy_t *copy_result) /* OUT */
{
vm_map_copy_discard((vm_map_copy_t) cont_args);
if (copy_result != (vm_map_copy_t *)0)
*copy_result = VM_MAP_COPY_NULL;
return(KERN_SUCCESS);
}
kern_return_t
vm_map_overwrite_submap_recurse(
vm_map_t dst_map,
vm_offset_t dst_addr,
vm_size_t dst_size)
{
vm_offset_t dst_end;
vm_map_entry_t tmp_entry;
vm_map_entry_t entry;
kern_return_t result;
boolean_t encountered_sub_map = FALSE;
/*
* Verify that the destination is all writeable
* initially. We have to trunc the destination
* address and round the copy size or we'll end up
* splitting entries in strange ways.
*/
dst_end = round_page(dst_addr + dst_size);
start_pass_1:
vm_map_lock(dst_map);
if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
vm_map_unlock(dst_map);
return(KERN_INVALID_ADDRESS);
}
vm_map_clip_start(dst_map, tmp_entry, trunc_page(dst_addr));
for (entry = tmp_entry;;) {
vm_map_entry_t next;
next = entry->vme_next;
while(entry->is_sub_map) {
vm_offset_t sub_start;
vm_offset_t sub_end;
vm_offset_t local_end;
if (entry->in_transition) {
/*
* Say that we are waiting, and wait for entry.
*/
entry->needs_wakeup = TRUE;
vm_map_entry_wait(dst_map, THREAD_UNINT);
goto start_pass_1;
}
encountered_sub_map = TRUE;
sub_start = entry->offset;
if(entry->vme_end < dst_end)
sub_end = entry->vme_end;
else
sub_end = dst_end;
sub_end -= entry->vme_start;
sub_end += entry->offset;
local_end = entry->vme_end;
vm_map_unlock(dst_map);
result = vm_map_overwrite_submap_recurse(
entry->object.sub_map,
sub_start,
sub_end - sub_start);
if(result != KERN_SUCCESS)
return result;
if (dst_end <= entry->vme_end)
return KERN_SUCCESS;
vm_map_lock(dst_map);
if(!vm_map_lookup_entry(dst_map, local_end,
&tmp_entry)) {
vm_map_unlock(dst_map);
return(KERN_INVALID_ADDRESS);
}
entry = tmp_entry;
next = entry->vme_next;
}
if ( ! (entry->protection & VM_PROT_WRITE)) {
vm_map_unlock(dst_map);
return(KERN_PROTECTION_FAILURE);
}
/*
* If the entry is in transition, we must wait
* for it to exit that state. Anything could happen
* when we unlock the map, so start over.
*/
if (entry->in_transition) {
/*
* Say that we are waiting, and wait for entry.
*/
entry->needs_wakeup = TRUE;
vm_map_entry_wait(dst_map, THREAD_UNINT);
goto start_pass_1;
}
/*
* our range is contained completely within this map entry
*/
if (dst_end <= entry->vme_end) {
vm_map_unlock(dst_map);
return KERN_SUCCESS;
}
/*
* check that range specified is contiguous region
*/
if ((next == vm_map_to_entry(dst_map)) ||
(next->vme_start != entry->vme_end)) {
vm_map_unlock(dst_map);
return(KERN_INVALID_ADDRESS);
}
/*
* Check for permanent objects in the destination.
*/
if ((entry->object.vm_object != VM_OBJECT_NULL) &&
((!entry->object.vm_object->internal) ||
(entry->object.vm_object->true_share))) {
if(encountered_sub_map) {
vm_map_unlock(dst_map);
return(KERN_FAILURE);
}
}
entry = next;
}/* for */
vm_map_unlock(dst_map);
return(KERN_SUCCESS);
}
/*
* Routine: vm_map_copy_overwrite
*
* Description:
* Copy the memory described by the map copy
* object (copy; returned by vm_map_copyin) onto
* the specified destination region (dst_map, dst_addr).
* The destination must be writeable.
*
* Unlike vm_map_copyout, this routine actually
* writes over previously-mapped memory. If the
* previous mapping was to a permanent (user-supplied)
* memory object, it is preserved.
*
* The attributes (protection and inheritance) of the
* destination region are preserved.
*
* If successful, consumes the copy object.
* Otherwise, the caller is responsible for it.
*
* Implementation notes:
* To overwrite aligned temporary virtual memory, it is
* sufficient to remove the previous mapping and insert
* the new copy. This replacement is done either on
* the whole region (if no permanent virtual memory
* objects are embedded in the destination region) or
* in individual map entries.
*
* To overwrite permanent virtual memory , it is necessary
* to copy each page, as the external memory management
* interface currently does not provide any optimizations.
*
* Unaligned memory also has to be copied. It is possible
* to use 'vm_trickery' to copy the aligned data. This is
* not done but not hard to implement.
*
* Once a page of permanent memory has been overwritten,
* it is impossible to interrupt this function; otherwise,
* the call would be neither atomic nor location-independent.
* The kernel-state portion of a user thread must be
* interruptible.
*
* It may be expensive to forward all requests that might
* overwrite permanent memory (vm_write, vm_copy) to
* uninterruptible kernel threads. This routine may be
* called by interruptible threads; however, success is
* not guaranteed -- if the request cannot be performed
* atomically and interruptibly, an error indication is
* returned.
*/
kern_return_t
vm_map_copy_overwrite(
vm_map_t dst_map,
vm_offset_t dst_addr,
vm_map_copy_t copy,
boolean_t interruptible)
{
vm_offset_t dst_end;
vm_map_entry_t tmp_entry;
vm_map_entry_t entry;
kern_return_t kr;
boolean_t aligned = TRUE;
boolean_t contains_permanent_objects = FALSE;
boolean_t encountered_sub_map = FALSE;
vm_offset_t base_addr;
vm_size_t copy_size;
vm_size_t total_size;
/*
* Check for null copy object.
*/
if (copy == VM_MAP_COPY_NULL)
return(KERN_SUCCESS);
/*
* Check for special kernel buffer allocated
* by new_ipc_kmsg_copyin.
*/
if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
return(vm_map_copyout_kernel_buffer(dst_map, &dst_addr,
copy, TRUE));
}
/*
* Only works for entry lists at the moment. Will
* support page lists later.
*/
assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
if (copy->size == 0) {
vm_map_copy_discard(copy);
return(KERN_SUCCESS);
}
/*
* Verify that the destination is all writeable
* initially. We have to trunc the destination
* address and round the copy size or we'll end up
* splitting entries in strange ways.
*/
if (!page_aligned(copy->size) ||
!page_aligned (copy->offset) ||
!page_aligned (dst_addr))
{
aligned = FALSE;
dst_end = round_page(dst_addr + copy->size);
} else {
dst_end = dst_addr + copy->size;
}
start_pass_1:
vm_map_lock(dst_map);
if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
vm_map_unlock(dst_map);
return(KERN_INVALID_ADDRESS);
}
vm_map_clip_start(dst_map, tmp_entry, trunc_page(dst_addr));
for (entry = tmp_entry;;) {
vm_map_entry_t next = entry->vme_next;
while(entry->is_sub_map) {
vm_offset_t sub_start;
vm_offset_t sub_end;
vm_offset_t local_end;
if (entry->in_transition) {
/*
* Say that we are waiting, and wait for entry.
*/
entry->needs_wakeup = TRUE;
vm_map_entry_wait(dst_map, THREAD_UNINT);
goto start_pass_1;
}
local_end = entry->vme_end;
if (!(entry->needs_copy)) {
/* if needs_copy we are a COW submap */
/* in such a case we just replace so */
/* there is no need for the follow- */
/* ing check. */
encountered_sub_map = TRUE;
sub_start = entry->offset;
if(entry->vme_end < dst_end)
sub_end = entry->vme_end;
else
sub_end = dst_end;
sub_end -= entry->vme_start;
sub_end += entry->offset;
vm_map_unlock(dst_map);
kr = vm_map_overwrite_submap_recurse(
entry->object.sub_map,
sub_start,
sub_end - sub_start);
if(kr != KERN_SUCCESS)
return kr;
vm_map_lock(dst_map);
}
if (dst_end <= entry->vme_end)
goto start_overwrite;
if(!vm_map_lookup_entry(dst_map, local_end,
&entry)) {
vm_map_unlock(dst_map);
return(KERN_INVALID_ADDRESS);
}
next = entry->vme_next;
}
if ( ! (entry->protection & VM_PROT_WRITE)) {
vm_map_unlock(dst_map);
return(KERN_PROTECTION_FAILURE);
}
/*
* If the entry is in transition, we must wait
* for it to exit that state. Anything could happen
* when we unlock the map, so start over.
*/
if (entry->in_transition) {
/*
* Say that we are waiting, and wait for entry.
*/
entry->needs_wakeup = TRUE;
vm_map_entry_wait(dst_map, THREAD_UNINT);
goto start_pass_1;
}
/*
* our range is contained completely within this map entry
*/
if (dst_end <= entry->vme_end)
break;
/*
* check that range specified is contiguous region
*/
if ((next == vm_map_to_entry(dst_map)) ||
(next->vme_start != entry->vme_end)) {
vm_map_unlock(dst_map);
return(KERN_INVALID_ADDRESS);
}
/*
* Check for permanent objects in the destination.
*/
if ((entry->object.vm_object != VM_OBJECT_NULL) &&
((!entry->object.vm_object->internal) ||
(entry->object.vm_object->true_share))) {
if(encountered_sub_map) {
vm_map_unlock(dst_map);
return(KERN_INVALID_ADDRESS);
}
contains_permanent_objects = TRUE;
}
entry = next;
}/* for */
start_overwrite:
/*
* If there are permanent objects in the destination, then
* the copy cannot be interrupted.
*/
if (interruptible && contains_permanent_objects) {
vm_map_unlock(dst_map);
return(KERN_FAILURE); /* XXX */
}
/*
*
* Make a second pass, overwriting the data
* At the beginning of each loop iteration,
* the next entry to be overwritten is "tmp_entry"
* (initially, the value returned from the lookup above),
* and the starting address expected in that entry
* is "start".
*/
total_size = copy->size;
if(encountered_sub_map) {
copy_size = 0;
/* re-calculate tmp_entry since we've had the map */
/* unlocked */
if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
vm_map_unlock(dst_map);
return(KERN_INVALID_ADDRESS);
}
} else {
copy_size = copy->size;
}
base_addr = dst_addr;
while(TRUE) {
/* deconstruct the copy object and do in parts */
/* only in sub_map, interruptable case */
vm_map_entry_t copy_entry;
vm_map_entry_t previous_prev;
vm_map_entry_t next_copy;
int nentries;
int remaining_entries;
int new_offset;
for (entry = tmp_entry; copy_size == 0;) {
vm_map_entry_t next;
next = entry->vme_next;
/* tmp_entry and base address are moved along */
/* each time we encounter a sub-map. Otherwise */
/* entry can outpase tmp_entry, and the copy_size */
/* may reflect the distance between them */
/* if the current entry is found to be in transition */
/* we will start over at the beginning or the last */
/* encounter of a submap as dictated by base_addr */
/* we will zero copy_size accordingly. */
if (entry->in_transition) {
/*
* Say that we are waiting, and wait for entry.
*/
entry->needs_wakeup = TRUE;
vm_map_entry_wait(dst_map, THREAD_UNINT);
vm_map_lock(dst_map);
if(!vm_map_lookup_entry(dst_map, base_addr,
&tmp_entry)) {
vm_map_unlock(dst_map);
return(KERN_INVALID_ADDRESS);
}
copy_size = 0;
entry = tmp_entry;
continue;
}
if(entry->is_sub_map) {
vm_offset_t sub_start;
vm_offset_t sub_end;
vm_offset_t local_end;
if (entry->needs_copy) {
/* if this is a COW submap */
/* just back the range with a */
/* anonymous entry */
if(entry->vme_end < dst_end)
sub_end = entry->vme_end;
else
sub_end = dst_end;
if(entry->vme_start < base_addr)
sub_start = base_addr;
else
sub_start = entry->vme_start;
vm_map_clip_end(
dst_map, entry, sub_end);
vm_map_clip_start(
dst_map, entry, sub_start);
entry->is_sub_map = FALSE;
vm_map_deallocate(
entry->object.sub_map);
entry->object.sub_map = NULL;
entry->is_shared = FALSE;
entry->needs_copy = FALSE;
entry->offset = 0;
entry->protection = VM_PROT_ALL;
entry->max_protection = VM_PROT_ALL;
entry->wired_count = 0;
entry->user_wired_count = 0;
if(entry->inheritance
== VM_INHERIT_SHARE)
entry->inheritance = VM_INHERIT_COPY;
continue;
}
/* first take care of any non-sub_map */
/* entries to send */
if(base_addr < entry->vme_start) {
/* stuff to send */
copy_size =
entry->vme_start - base_addr;
break;
}
sub_start = entry->offset;
if(entry->vme_end < dst_end)
sub_end = entry->vme_end;
else
sub_end = dst_end;
sub_end -= entry->vme_start;
sub_end += entry->offset;
local_end = entry->vme_end;
vm_map_unlock(dst_map);
copy_size = sub_end - sub_start;
/* adjust the copy object */
if (total_size > copy_size) {
vm_size_t local_size = 0;
vm_size_t entry_size;
nentries = 1;
new_offset = copy->offset;
copy_entry = vm_map_copy_first_entry(copy);
while(copy_entry !=
vm_map_copy_to_entry(copy)){
entry_size = copy_entry->vme_end -
copy_entry->vme_start;
if((local_size < copy_size) &&
((local_size + entry_size)
>= copy_size)) {
vm_map_copy_clip_end(copy,
copy_entry,
copy_entry->vme_start +
(copy_size - local_size));
entry_size = copy_entry->vme_end -
copy_entry->vme_start;
local_size += entry_size;
new_offset += entry_size;
}
if(local_size >= copy_size) {
next_copy = copy_entry->vme_next;
copy_entry->vme_next =
vm_map_copy_to_entry(copy);
previous_prev =
copy->cpy_hdr.links.prev;
copy->cpy_hdr.links.prev = copy_entry;
copy->size = copy_size;
remaining_entries =
copy->cpy_hdr.nentries;
remaining_entries -= nentries;
copy->cpy_hdr.nentries = nentries;
break;
} else {
local_size += entry_size;
new_offset += entry_size;
nentries++;
}
copy_entry = copy_entry->vme_next;
}
}
kr = vm_map_copy_overwrite(
entry->object.sub_map,
sub_start,
copy,
interruptible);
if(kr != KERN_SUCCESS) {
if(next_copy != NULL) {
copy->cpy_hdr.nentries +=
remaining_entries;
copy->cpy_hdr.links.prev->vme_next =
next_copy;
copy->cpy_hdr.links.prev
= previous_prev;
copy->size = total_size;
}
return kr;
}
if (dst_end <= local_end) {
return(KERN_SUCCESS);
}
/* otherwise copy no longer exists, it was */
/* destroyed after successful copy_overwrite */
copy = (vm_map_copy_t)
zalloc(vm_map_copy_zone);
vm_map_copy_first_entry(copy) =
vm_map_copy_last_entry(copy) =
vm_map_copy_to_entry(copy);
copy->type = VM_MAP_COPY_ENTRY_LIST;
copy->offset = new_offset;
total_size -= copy_size;
copy_size = 0;
/* put back remainder of copy in container */
if(next_copy != NULL) {
copy->cpy_hdr.nentries = remaining_entries;
copy->cpy_hdr.links.next = next_copy;
copy->cpy_hdr.links.prev = previous_prev;
copy->size = total_size;
next_copy->vme_prev =
vm_map_copy_to_entry(copy);
next_copy = NULL;
}
base_addr = local_end;
vm_map_lock(dst_map);
if(!vm_map_lookup_entry(dst_map,
local_end, &tmp_entry)) {
vm_map_unlock(dst_map);
return(KERN_INVALID_ADDRESS);
}
entry = tmp_entry;
continue;
}
if (dst_end <= entry->vme_end) {
copy_size = dst_end - base_addr;
break;
}
if ((next == vm_map_to_entry(dst_map)) ||
(next->vme_start != entry->vme_end)) {
vm_map_unlock(dst_map);
return(KERN_INVALID_ADDRESS);
}
entry = next;
}/* for */
next_copy = NULL;
nentries = 1;
/* adjust the copy object */
if (total_size > copy_size) {
vm_size_t local_size = 0;
vm_size_t entry_size;
new_offset = copy->offset;
copy_entry = vm_map_copy_first_entry(copy);
while(copy_entry != vm_map_copy_to_entry(copy)) {
entry_size = copy_entry->vme_end -
copy_entry->vme_start;
if((local_size < copy_size) &&
((local_size + entry_size)
>= copy_size)) {
vm_map_copy_clip_end(copy, copy_entry,
copy_entry->vme_start +
(copy_size - local_size));
entry_size = copy_entry->vme_end -
copy_entry->vme_start;
local_size += entry_size;
new_offset += entry_size;
}
if(local_size >= copy_size) {
next_copy = copy_entry->vme_next;
copy_entry->vme_next =
vm_map_copy_to_entry(copy);
previous_prev =
copy->cpy_hdr.links.prev;
copy->cpy_hdr.links.prev = copy_entry;
copy->size = copy_size;
remaining_entries =
copy->cpy_hdr.nentries;
remaining_entries -= nentries;
copy->cpy_hdr.nentries = nentries;
break;
} else {
local_size += entry_size;
new_offset += entry_size;
nentries++;
}
copy_entry = copy_entry->vme_next;
}
}
if (aligned) {
if ((kr = vm_map_copy_overwrite_aligned(
dst_map, tmp_entry,
copy, base_addr)) != KERN_SUCCESS) {
if(next_copy != NULL) {
copy->cpy_hdr.nentries +=
remaining_entries;
copy->cpy_hdr.links.prev->vme_next =
next_copy;
copy->cpy_hdr.links.prev =
previous_prev;
copy->size += copy_size;
}
return kr;
}
vm_map_unlock(dst_map);
} else {
/*
* Performance gain:
*
* if the copy and dst address are misaligned but the same
* offset within the page we can copy_not_aligned the
* misaligned parts and copy aligned the rest. If they are
* aligned but len is unaligned we simply need to copy
* the end bit unaligned. We'll need to split the misaligned
* bits of the region in this case !
*/
/* ALWAYS UNLOCKS THE dst_map MAP */
if ((kr = vm_map_copy_overwrite_unaligned( dst_map,
tmp_entry, copy, base_addr)) != KERN_SUCCESS) {
if(next_copy != NULL) {
copy->cpy_hdr.nentries +=
remaining_entries;
copy->cpy_hdr.links.prev->vme_next =
next_copy;
copy->cpy_hdr.links.prev =
previous_prev;
copy->size += copy_size;
}
return kr;
}
}
total_size -= copy_size;
if(total_size == 0)
break;
base_addr += copy_size;
copy_size = 0;
copy->offset = new_offset;
if(next_copy != NULL) {
copy->cpy_hdr.nentries = remaining_entries;
copy->cpy_hdr.links.next = next_copy;
copy->cpy_hdr.links.prev = previous_prev;
next_copy->vme_prev = vm_map_copy_to_entry(copy);
copy->size = total_size;
}
vm_map_lock(dst_map);
while(TRUE) {
if (!vm_map_lookup_entry(dst_map,
base_addr, &tmp_entry)) {
vm_map_unlock(dst_map);
return(KERN_INVALID_ADDRESS);
}
if (tmp_entry->in_transition) {
entry->needs_wakeup = TRUE;
vm_map_entry_wait(dst_map, THREAD_UNINT);
} else {
break;
}
}
vm_map_clip_start(dst_map, tmp_entry, trunc_page(base_addr));
entry = tmp_entry;
} /* while */
/*
* Throw away the vm_map_copy object
*/
vm_map_copy_discard(copy);
return(KERN_SUCCESS);
}/* vm_map_copy_overwrite */
/*
* Routine: vm_map_copy_overwrite_unaligned
*
* Decription:
* Physically copy unaligned data
*
* Implementation:
* Unaligned parts of pages have to be physically copied. We use
* a modified form of vm_fault_copy (which understands none-aligned
* page offsets and sizes) to do the copy. We attempt to copy as
* much memory in one go as possibly, however vm_fault_copy copies
* within 1 memory object so we have to find the smaller of "amount left"
* "source object data size" and "target object data size". With
* unaligned data we don't need to split regions, therefore the source
* (copy) object should be one map entry, the target range may be split
* over multiple map entries however. In any event we are pessimistic
* about these assumptions.
*
* Assumptions:
* dst_map is locked on entry and is return locked on success,
* unlocked on error.
*/
kern_return_t
vm_map_copy_overwrite_unaligned(
vm_map_t dst_map,
vm_map_entry_t entry,
vm_map_copy_t copy,
vm_offset_t start)
{
vm_map_entry_t copy_entry = vm_map_copy_first_entry(copy);
vm_map_version_t version;
vm_object_t dst_object;
vm_offset_t dst_offset;
vm_offset_t src_offset;
vm_offset_t entry_offset;
vm_offset_t entry_end;
vm_size_t src_size,
dst_size,
copy_size,
amount_left;
kern_return_t kr = KERN_SUCCESS;
vm_map_lock_write_to_read(dst_map);
src_offset = copy->offset - trunc_page(copy->offset);
amount_left = copy->size;
/*
* unaligned so we never clipped this entry, we need the offset into
* the vm_object not just the data.
*/
while (amount_left > 0) {
/* "start" must be within the current map entry */
assert ((start>=entry->vme_start) && (start<entry->vme_end));
dst_offset = start - entry->vme_start;
dst_size = entry->vme_end - start;
src_size = copy_entry->vme_end -
(copy_entry->vme_start + src_offset);
if (dst_size < src_size) {
/*
* we can only copy dst_size bytes before
* we have to get the next destination entry
*/
copy_size = dst_size;
} else {
/*
* we can only copy src_size bytes before
* we have to get the next source copy entry
*/
copy_size = src_size;
}
if (copy_size > amount_left) {
copy_size = amount_left;
}
/*
* Entry needs copy, create a shadow shadow object for
* copy on write region.
*/
if (entry->needs_copy &&
((entry->protection & VM_PROT_WRITE) != 0))
{
if (vm_map_lock_read_to_write(dst_map)) {
vm_map_lock_read(dst_map);
goto RetryLookup;
}
vm_object_shadow(&entry->object.vm_object,
&entry->offset,
(vm_size_t)(entry->vme_end
- entry->vme_start));
entry->needs_copy = FALSE;
vm_map_lock_write_to_read(dst_map);
}
dst_object = entry->object.vm_object;
/*
* unlike with the virtual (aligned) copy we're going
* to fault on it therefore we need a target object.
*/
if (dst_object == VM_OBJECT_NULL) {
if (vm_map_lock_read_to_write(dst_map)) {
vm_map_lock_read(dst_map);
goto RetryLookup;
}
dst_object = vm_object_allocate((vm_size_t)
entry->vme_end - entry->vme_start);
entry->object.vm_object = dst_object;
entry->offset = 0;
vm_map_lock_write_to_read(dst_map);
}
/*
* Take an object reference and unlock map. The "entry" may
* disappear or change when the map is unlocked.
*/
vm_object_reference(dst_object);
version.main_timestamp = dst_map->timestamp;
entry_offset = entry->offset;
entry_end = entry->vme_end;
vm_map_unlock_read(dst_map);
/*
* Copy as much as possible in one pass
*/
kr = vm_fault_copy(
copy_entry->object.vm_object,
copy_entry->offset + src_offset,
©_size,
dst_object,
entry_offset + dst_offset,
dst_map,
&version,
THREAD_UNINT );
start += copy_size;
src_offset += copy_size;
amount_left -= copy_size;
/*
* Release the object reference
*/
vm_object_deallocate(dst_object);
/*
* If a hard error occurred, return it now
*/
if (kr != KERN_SUCCESS)
return kr;
if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
|| amount_left == 0)
{
/*
* all done with this copy entry, dispose.
*/
vm_map_copy_entry_unlink(copy, copy_entry);
vm_object_deallocate(copy_entry->object.vm_object);
vm_map_copy_entry_dispose(copy, copy_entry);
if ((copy_entry = vm_map_copy_first_entry(copy))
== vm_map_copy_to_entry(copy) && amount_left) {
/*
* not finished copying but run out of source
*/
return KERN_INVALID_ADDRESS;
}
src_offset = 0;
}
if (amount_left == 0)
return KERN_SUCCESS;
vm_map_lock_read(dst_map);
if (version.main_timestamp == dst_map->timestamp) {
if (start == entry_end) {
/*
* destination region is split. Use the version
* information to avoid a lookup in the normal
* case.
*/
entry = entry->vme_next;
/*
* should be contiguous. Fail if we encounter
* a hole in the destination.
*/
if (start != entry->vme_start) {
vm_map_unlock_read(dst_map);
return KERN_INVALID_ADDRESS ;
}
}
} else {
/*
* Map version check failed.
* we must lookup the entry because somebody
* might have changed the map behind our backs.
*/
RetryLookup:
if (!vm_map_lookup_entry(dst_map, start, &entry))
{
vm_map_unlock_read(dst_map);
return KERN_INVALID_ADDRESS ;
}
}
}/* while */
/* NOTREACHED ?? */
vm_map_unlock_read(dst_map);
return KERN_SUCCESS;
}/* vm_map_copy_overwrite_unaligned */
/*
* Routine: vm_map_copy_overwrite_aligned
*
* Description:
* Does all the vm_trickery possible for whole pages.
*
* Implementation:
*
* If there are no permanent objects in the destination,
* and the source and destination map entry zones match,
* and the destination map entry is not shared,
* then the map entries can be deleted and replaced
* with those from the copy. The following code is the
* basic idea of what to do, but there are lots of annoying
* little details about getting protection and inheritance
* right. Should add protection, inheritance, and sharing checks
* to the above pass and make sure that no wiring is involved.
*/
kern_return_t
vm_map_copy_overwrite_aligned(
vm_map_t dst_map,
vm_map_entry_t tmp_entry,
vm_map_copy_t copy,
vm_offset_t start)
{
vm_object_t object;
vm_map_entry_t copy_entry;
vm_size_t copy_size;
vm_size_t size;
vm_map_entry_t entry;
while ((copy_entry = vm_map_copy_first_entry(copy))
!= vm_map_copy_to_entry(copy))
{
copy_size = (copy_entry->vme_end - copy_entry->vme_start);
entry = tmp_entry;
size = (entry->vme_end - entry->vme_start);
/*
* Make sure that no holes popped up in the
* address map, and that the protection is
* still valid, in case the map was unlocked
* earlier.
*/
if ((entry->vme_start != start) || (entry->is_sub_map)) {
vm_map_unlock(dst_map);
return(KERN_INVALID_ADDRESS);
}
assert(entry != vm_map_to_entry(dst_map));
/*
* Check protection again
*/
if ( ! (entry->protection & VM_PROT_WRITE)) {
vm_map_unlock(dst_map);
return(KERN_PROTECTION_FAILURE);
}
/*
* Adjust to source size first
*/
if (copy_size < size) {
vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
size = copy_size;
}
/*
* Adjust to destination size
*/
if (size < copy_size) {
vm_map_copy_clip_end(copy, copy_entry,
copy_entry->vme_start + size);
copy_size = size;
}
assert((entry->vme_end - entry->vme_start) == size);
assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
assert((copy_entry->vme_end - copy_entry->vme_start) == size);
/*
* If the destination contains temporary unshared memory,
* we can perform the copy by throwing it away and
* installing the source data.
*/
object = entry->object.vm_object;
if ((!entry->is_shared &&
((object == VM_OBJECT_NULL) ||
(object->internal && !object->true_share))) ||
entry->needs_copy) {
vm_object_t old_object = entry->object.vm_object;
vm_offset_t old_offset = entry->offset;
vm_offset_t offset;
/*
* Ensure that the source and destination aren't
* identical
*/
if (old_object == copy_entry->object.vm_object &&
old_offset == copy_entry->offset) {
vm_map_copy_entry_unlink(copy, copy_entry);
vm_map_copy_entry_dispose(copy, copy_entry);
if (old_object != VM_OBJECT_NULL)
vm_object_deallocate(old_object);
start = tmp_entry->vme_end;
tmp_entry = tmp_entry->vme_next;
continue;
}
entry->object = copy_entry->object;
object = entry->object.vm_object;
offset = entry->offset = copy_entry->offset;
entry->needs_copy = copy_entry->needs_copy;
entry->wired_count = 0;
entry->user_wired_count = 0;
vm_map_copy_entry_unlink(copy, copy_entry);
vm_map_copy_entry_dispose(copy, copy_entry);
if (old_object != VM_OBJECT_NULL) {
vm_object_pmap_protect(
old_object,
old_offset,
size,
dst_map->pmap,
tmp_entry->vme_start,
VM_PROT_NONE);
vm_object_deallocate(old_object);
}
/*
* Try to aggressively enter physical mappings
* (but avoid uninstantiated objects)
*/
if (object != VM_OBJECT_NULL) {
vm_offset_t va = entry->vme_start;
while (va < entry->vme_end) {
register vm_page_t m;
vm_prot_t prot;
/*
* Look for the page in the top object
*/
prot = entry->protection;
vm_object_lock(object);
vm_object_paging_begin(object);
if ((m = vm_page_lookup(object,offset)) !=
VM_PAGE_NULL && !m->busy &&
!m->fictitious &&
(!m->unusual || (!m->error &&
!m->restart && !m->absent &&
(prot & m->page_lock) == 0))) {
m->busy = TRUE;
vm_object_unlock(object);
/*
* Honor COW obligations
*/
if (entry->needs_copy)
prot &= ~VM_PROT_WRITE;
PMAP_ENTER(dst_map->pmap, va, m,
prot, FALSE);
vm_object_lock(object);
vm_page_lock_queues();
if (!m->active && !m->inactive)
vm_page_activate(m);
vm_page_unlock_queues();
PAGE_WAKEUP_DONE(m);
}
vm_object_paging_end(object);
vm_object_unlock(object);
offset += PAGE_SIZE;
va += PAGE_SIZE;
} /* end while (va < entry->vme_end) */
} /* end if (object) */
/*
* Set up for the next iteration. The map
* has not been unlocked, so the next
* address should be at the end of this
* entry, and the next map entry should be
* the one following it.
*/
start = tmp_entry->vme_end;
tmp_entry = tmp_entry->vme_next;
} else {
vm_map_version_t version;
vm_object_t dst_object = entry->object.vm_object;
vm_offset_t dst_offset = entry->offset;
kern_return_t r;
/*
* Take an object reference, and record
* the map version information so that the
* map can be safely unlocked.
*/
vm_object_reference(dst_object);
version.main_timestamp = dst_map->timestamp;
vm_map_unlock(dst_map);
/*
* Copy as much as possible in one pass
*/
copy_size = size;
r = vm_fault_copy(
copy_entry->object.vm_object,
copy_entry->offset,
©_size,
dst_object,
dst_offset,
dst_map,
&version,
THREAD_UNINT );
/*
* Release the object reference
*/
vm_object_deallocate(dst_object);
/*
* If a hard error occurred, return it now
*/
if (r != KERN_SUCCESS)
return(r);
if (copy_size != 0) {
/*
* Dispose of the copied region
*/
vm_map_copy_clip_end(copy, copy_entry,
copy_entry->vme_start + copy_size);
vm_map_copy_entry_unlink(copy, copy_entry);
vm_object_deallocate(copy_entry->object.vm_object);
vm_map_copy_entry_dispose(copy, copy_entry);
}
/*
* Pick up in the destination map where we left off.
*
* Use the version information to avoid a lookup
* in the normal case.
*/
start += copy_size;
vm_map_lock(dst_map);
if ((version.main_timestamp + 1) == dst_map->timestamp) {
/* We can safely use saved tmp_entry value */
vm_map_clip_end(dst_map, tmp_entry, start);
tmp_entry = tmp_entry->vme_next;
} else {
/* Must do lookup of tmp_entry */
if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
vm_map_unlock(dst_map);
return(KERN_INVALID_ADDRESS);
}
vm_map_clip_start(dst_map, tmp_entry, start);
}
}
}/* while */
return(KERN_SUCCESS);
}/* vm_map_copy_overwrite_aligned */
#if DIPC
#include <dipc/dipc_counters.h>
dcntr_decl(unsigned int c_dipc_overwrite_opt = 0;)
dcntr_decl(unsigned int c_dipc_overwrite_nonopt = 0;)
dcntr_decl(unsigned int c_dipc_overwrite_recv_done = 0;)
#else /* DIPC */
#define dstat_decl(foo)
#define dstat(foo)
#define dcntr_decl(foo)
#define dcntr(foo)
#endif /* DIPC */
#if DIPC
vm_map_copy_t
vm_map_copy_overwrite_recv(
vm_map_t map,
vm_offset_t addr,
vm_size_t size)
{
vm_offset_t end_addr;
vm_map_copy_t copy = VM_MAP_COPY_NULL;
vm_map_entry_t entry, new_entry;
kern_return_t kr;
if (size == 0)
return VM_MAP_COPY_NULL;
if (!page_aligned(size) || !page_aligned(addr)) {
end_addr = round_page(addr + size);
} else {
end_addr = addr + size;
}
/* Do allocations before taking map lock. */
/* build an ENTRY_LIST vm_map_copy_t */
copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
vm_map_copy_first_entry(copy) =
vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
copy->type = VM_MAP_COPY_ENTRY_LIST;
copy->offset = addr;
copy->size = size;
copy->cpy_hdr.nentries = 0;
copy->cpy_hdr.entries_pageable = TRUE;
new_entry = vm_map_copy_entry_create(copy);
vm_map_lock(map);
if (!vm_map_lookup_entry(map, addr, &entry)) {
vm_map_unlock(map);
vm_map_copy_entry_dispose(copy, new_entry);
vm_map_copy_discard(copy);
return VM_MAP_COPY_NULL;
}
if (entry->is_sub_map) {
vm_map_unlock(map);
vm_map_copy_entry_dispose(copy, new_entry);
vm_map_copy_discard(copy);
return VM_MAP_COPY_NULL;
}
/*
* for now, only do this if the range is contained completely
* within a single entry. Just give up if entry is in_transition:
* it could be another thread wiring/unwiring, but it could be another
* part of this message that set it, in which case we'd deadlock if
* we waited for it to clear. If the entry is not writeable, give up
* and let the ipc_kmsg_copyout code handle the error later.
*/
if ((end_addr > entry->vme_end) ||
entry->in_transition ||
!(entry->protection & VM_PROT_WRITE)) {
vm_map_unlock(map);
dcntr(++c_dipc_overwrite_nonopt);
vm_map_copy_entry_dispose(copy, new_entry);
vm_map_copy_discard(copy);
return VM_MAP_COPY_NULL;
}
vm_map_clip_start(map, entry, trunc_page(addr));
/* handle COW obligations */
if (entry->needs_copy) {
vm_object_shadow(&entry->object.vm_object, &entry->offset,
(vm_size_t)(entry->vme_end - entry->vme_start));
entry->needs_copy = FALSE;
}
/* make sure there's a target object to put pages into */
if (entry->object.vm_object == VM_OBJECT_NULL) {
entry->object.vm_object = vm_object_allocate((vm_size_t)
(entry->vme_end - entry->vme_start));
entry->offset = 0;
}
/*
* set in_transition to keep the entry in the map from moving around
* while DIPC fills the object in with data. This must be cleared
* later on by calling vm_map_copy_overwrite_recv_done (below).
*/
entry->in_transition = TRUE;
/* make a copy of the entry */
vm_map_entry_copy(new_entry, entry);
vm_object_reference(new_entry->object.vm_object);
vm_map_unlock(map);
/* link in the new entry */
vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), new_entry);
dcntr(++c_dipc_overwrite_opt);
return copy;
}
/*
* Find the map entry that corresponds to the copy entry, then clear
* in_transition on it. Discard the copy.
*/
kern_return_t
vm_map_copy_overwrite_recv_done(
vm_map_t map,
vm_map_copy_t copy)
{
vm_map_entry_t entry, map_entry;
boolean_t need_wakeup = FALSE;
assert(copy != VM_MAP_COPY_NULL);
assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
dcntr(++c_dipc_overwrite_recv_done);
entry = vm_map_copy_first_entry(copy);
assert(entry != VM_MAP_ENTRY_NULL);
vm_map_lock(map);
if (!vm_map_lookup_entry(map, entry->vme_start, &map_entry)) {
vm_map_unlock(map);
return KERN_INVALID_ADDRESS;
}
while ((map_entry != vm_map_to_entry(map)) &&
(map_entry->vme_end <= entry->vme_end)) {
assert(map_entry->in_transition);
map_entry->in_transition = FALSE;
if (map_entry->needs_wakeup) {
map_entry->needs_wakeup = FALSE;
need_wakeup = TRUE;
}
map_entry = map_entry->vme_next;
}
vm_map_unlock(map);
if (need_wakeup)
vm_map_entry_wakeup(map);
vm_map_copy_discard(copy);
return KERN_SUCCESS;
}
#endif /* DIPC */
/*
* Routine: vm_map_copyout_kernel_buffer
*
* Description:
* Copy out data from a kernel buffer into space in the
* destination map. The space may be otpionally dynamically
* allocated.
*
* If successful, consumes the copy object.
* Otherwise, the caller is responsible for it.
*/
kern_return_t
vm_map_copyout_kernel_buffer(
vm_map_t map,
vm_offset_t *addr, /* IN/OUT */
vm_map_copy_t copy,
boolean_t overwrite)
{
kern_return_t kr = KERN_SUCCESS;
thread_act_t thr_act = current_act();
if (!overwrite) {
/*
* Allocate space in the target map for the data
*/
*addr = 0;
kr = vm_map_enter(map,
addr,
round_page(copy->size),
(vm_offset_t) 0,
TRUE,
VM_OBJECT_NULL,
(vm_offset_t) 0,
FALSE,
VM_PROT_DEFAULT,
VM_PROT_ALL,
VM_INHERIT_DEFAULT);
if (kr != KERN_SUCCESS)
return(kr);
}
/*
* Copyout the data from the kernel buffer to the target map.
*/
if (thr_act->map == map) {
/*
* If the target map is the current map, just do
* the copy.
*/
if (copyout((char *)copy->cpy_kdata, (char *)*addr,
copy->size)) {
kr = KERN_INVALID_ADDRESS;
}
}
else {
vm_map_t oldmap;
/*
* If the target map is another map, assume the
* target's address space identity for the duration
* of the copy.
*/
vm_map_reference(map);
oldmap = vm_map_switch(map);
if (copyout((char *)copy->cpy_kdata, (char *)*addr,
copy->size)) {
kr = KERN_INVALID_ADDRESS;
}
(void) vm_map_switch(oldmap);
vm_map_deallocate(map);
}
kfree((vm_offset_t)copy, copy->cpy_kalloc_size);
return(kr);
}
/*
* Macro: vm_map_copy_insert
*
* Description:
* Link a copy chain ("copy") into a map at the
* specified location (after "where").
* Side effects:
* The copy chain is destroyed.
* Warning:
* The arguments are evaluated multiple times.
*/
#define vm_map_copy_insert(map, where, copy) \
MACRO_BEGIN \
vm_map_t VMCI_map; \
vm_map_entry_t VMCI_where; \
vm_map_copy_t VMCI_copy; \
VMCI_map = (map); \
VMCI_where = (where); \
VMCI_copy = (copy); \
((VMCI_where->vme_next)->vme_prev = vm_map_copy_last_entry(VMCI_copy))\
->vme_next = (VMCI_where->vme_next); \
((VMCI_where)->vme_next = vm_map_copy_first_entry(VMCI_copy)) \
->vme_prev = VMCI_where; \
VMCI_map->hdr.nentries += VMCI_copy->cpy_hdr.nentries; \
UPDATE_FIRST_FREE(VMCI_map, VMCI_map->first_free); \
zfree(vm_map_copy_zone, (vm_offset_t) VMCI_copy); \
MACRO_END
/*
* Routine: vm_map_copyout
*
* Description:
* Copy out a copy chain ("copy") into newly-allocated
* space in the destination map.
*
* If successful, consumes the copy object.
* Otherwise, the caller is responsible for it.
*/
kern_return_t
vm_map_copyout(
register vm_map_t dst_map,
vm_offset_t *dst_addr, /* OUT */
register vm_map_copy_t copy)
{
vm_size_t size;
vm_size_t adjustment;
vm_offset_t start;
vm_offset_t vm_copy_start;
vm_map_entry_t last;
register
vm_map_entry_t entry;
/*
* Check for null copy object.
*/
if (copy == VM_MAP_COPY_NULL) {
*dst_addr = 0;
return(KERN_SUCCESS);
}
/*
* Check for special copy object, created
* by vm_map_copyin_object.
*/
if (copy->type == VM_MAP_COPY_OBJECT) {
vm_object_t object = copy->cpy_object;
kern_return_t kr;
vm_size_t offset;
offset = trunc_page(copy->offset);
size = round_page(copy->size + copy->offset - offset);
*dst_addr = 0;
kr = vm_map_enter(dst_map, dst_addr, size,
(vm_offset_t) 0, TRUE,
object, offset, FALSE,
VM_PROT_DEFAULT, VM_PROT_ALL,
VM_INHERIT_DEFAULT);
if (kr != KERN_SUCCESS)
return(kr);
/* Account for non-pagealigned copy object */
*dst_addr += copy->offset - offset;
zfree(vm_map_copy_zone, (vm_offset_t) copy);
return(KERN_SUCCESS);
}
/*
* Check for special kernel buffer allocated
* by new_ipc_kmsg_copyin.
*/
if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
return(vm_map_copyout_kernel_buffer(dst_map, dst_addr,
copy, FALSE));
}
if (copy->type == VM_MAP_COPY_PAGE_LIST)
return(vm_map_copyout_page_list(dst_map, dst_addr, copy));
/*
* Find space for the data
*/
vm_copy_start = trunc_page(copy->offset);
size = round_page(copy->offset + copy->size) - vm_copy_start;
StartAgain: ;
vm_map_lock(dst_map);
assert(first_free_is_valid(dst_map));
start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
vm_map_min(dst_map) : last->vme_end;
while (TRUE) {
vm_map_entry_t next = last->vme_next;
vm_offset_t end = start + size;
if ((end > dst_map->max_offset) || (end < start)) {
if (dst_map->wait_for_space) {
if (size <= (dst_map->max_offset - dst_map->min_offset)) {
assert_wait((event_t) dst_map,
THREAD_INTERRUPTIBLE);
vm_map_unlock(dst_map);
thread_block((void (*)(void))0);
goto StartAgain;
}
}
vm_map_unlock(dst_map);
return(KERN_NO_SPACE);
}
if ((next == vm_map_to_entry(dst_map)) ||
(next->vme_start >= end))
break;
last = next;
start = last->vme_end;
}
/*
* Since we're going to just drop the map
* entries from the copy into the destination
* map, they must come from the same pool.
*/
if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
/*
* Mismatches occur when dealing with the default
* pager.
*/
zone_t old_zone;
vm_map_entry_t next, new;
/*
* Find the zone that the copies were allocated from
*/
old_zone = (copy->cpy_hdr.entries_pageable)
? vm_map_entry_zone
: vm_map_kentry_zone;
entry = vm_map_copy_first_entry(copy);
/*
* Reinitialize the copy so that vm_map_copy_entry_link
* will work.
*/
copy->cpy_hdr.nentries = 0;
copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
vm_map_copy_first_entry(copy) =
vm_map_copy_last_entry(copy) =
vm_map_copy_to_entry(copy);
/*
* Copy each entry.
*/
while (entry != vm_map_copy_to_entry(copy)) {
new = vm_map_copy_entry_create(copy);
vm_map_entry_copy_full(new, entry);
vm_map_copy_entry_link(copy,
vm_map_copy_last_entry(copy),
new);
next = entry->vme_next;
zfree(old_zone, (vm_offset_t) entry);
entry = next;
}
}
/*
* Adjust the addresses in the copy chain, and
* reset the region attributes.
*/
adjustment = start - vm_copy_start;
for (entry = vm_map_copy_first_entry(copy);
entry != vm_map_copy_to_entry(copy);
entry = entry->vme_next) {
entry->vme_start += adjustment;
entry->vme_end += adjustment;
entry->inheritance = VM_INHERIT_DEFAULT;
entry->protection = VM_PROT_DEFAULT;
entry->max_protection = VM_PROT_ALL;
entry->behavior = VM_BEHAVIOR_DEFAULT;
/*
* If the entry is now wired,
* map the pages into the destination map.
*/
if (entry->wired_count != 0) {
register vm_offset_t va;
vm_offset_t offset;
register vm_object_t object;
object = entry->object.vm_object;
offset = entry->offset;
va = entry->vme_start;
pmap_pageable(dst_map->pmap,
entry->vme_start,
entry->vme_end,
TRUE);
while (va < entry->vme_end) {
register vm_page_t m;
/*
* Look up the page in the object.
* Assert that the page will be found in the
* top object:
* either
* the object was newly created by
* vm_object_copy_slowly, and has
* copies of all of the pages from
* the source object
* or
* the object was moved from the old
* map entry; because the old map
* entry was wired, all of the pages
* were in the top-level object.
* (XXX not true if we wire pages for
* reading)
*/
vm_object_lock(object);
vm_object_paging_begin(object);
m = vm_page_lookup(object, offset);
if (m == VM_PAGE_NULL || m->wire_count == 0 ||
m->absent)
panic("vm_map_copyout: wiring 0x%x", m);
m->busy = TRUE;
vm_object_unlock(object);
PMAP_ENTER(dst_map->pmap, va, m,
entry->protection, TRUE);
vm_object_lock(object);
PAGE_WAKEUP_DONE(m);
/* the page is wired, so we don't have to activate */
vm_object_paging_end(object);
vm_object_unlock(object);
offset += PAGE_SIZE;
va += PAGE_SIZE;
}
}
else if (size <= vm_map_aggressive_enter_max) {
register vm_offset_t va;
vm_offset_t offset;
register vm_object_t object;
vm_prot_t prot;
object = entry->object.vm_object;
if (object != VM_OBJECT_NULL) {
offset = entry->offset;
va = entry->vme_start;
while (va < entry->vme_end) {
register vm_page_t m;
/*
* Look up the page in the object.
* Assert that the page will be found
* in the top object if at all...
*/
vm_object_lock(object);
vm_object_paging_begin(object);
if (((m = vm_page_lookup(object,
offset))
!= VM_PAGE_NULL) &&
!m->busy && !m->fictitious &&
!m->absent && !m->error) {
m->busy = TRUE;
vm_object_unlock(object);
/* honor cow obligations */
prot = entry->protection;
if (entry->needs_copy)
prot &= ~VM_PROT_WRITE;
PMAP_ENTER(dst_map->pmap, va,
m, prot, FALSE);
vm_object_lock(object);
vm_page_lock_queues();
if (!m->active && !m->inactive)
vm_page_activate(m);
vm_page_unlock_queues();
PAGE_WAKEUP_DONE(m);
}
vm_object_paging_end(object);
vm_object_unlock(object);
offset += PAGE_SIZE;
va += PAGE_SIZE;
}
}
}
}
/*
* Correct the page alignment for the result
*/
*dst_addr = start + (copy->offset - vm_copy_start);
/*
* Update the hints and the map size
*/
SAVE_HINT(dst_map, vm_map_copy_last_entry(copy));
dst_map->size += size;
/*
* Link in the copy
*/
vm_map_copy_insert(dst_map, last, copy);
vm_map_unlock(dst_map);
/*
* XXX If wiring_required, call vm_map_pageable
*/
return(KERN_SUCCESS);
}
boolean_t vm_map_aggressive_enter; /* not used yet */
/*
*
* vm_map_copyout_page_list:
*
* Version of vm_map_copyout() for page list vm map copies.
*
*/
kern_return_t
vm_map_copyout_page_list(
register vm_map_t dst_map,
vm_offset_t *dst_addr, /* OUT */
register vm_map_copy_t copy)
{
vm_size_t size;
vm_offset_t start;
vm_offset_t end;
vm_offset_t offset;
vm_map_entry_t last;
register
vm_object_t object;
vm_page_t *page_list, m;
vm_map_entry_t entry;
vm_offset_t old_last_offset;
boolean_t cont_invoked, needs_wakeup;
kern_return_t result = KERN_SUCCESS;
vm_map_copy_t orig_copy;
vm_offset_t dst_offset;
boolean_t must_wire;
boolean_t aggressive_enter;
/*
* Check for null copy object.
*/
if (copy == VM_MAP_COPY_NULL) {
*dst_addr = 0;
return(KERN_SUCCESS);
}
assert(copy->type == VM_MAP_COPY_PAGE_LIST);
/*
* Make sure the pages are stolen, because we are
* going to put them in a new object. Assume that
* all pages are identical to first in this regard.
*/
page_list = ©->cpy_page_list[0];
if (!copy->cpy_page_loose)
vm_map_copy_steal_pages(copy);
/*
* Find space for the data
*/
size = round_page(copy->offset + copy->size) -
trunc_page(copy->offset);
StartAgain:
vm_map_lock(dst_map);
must_wire = dst_map->wiring_required;
assert(first_free_is_valid(dst_map));
last = dst_map->first_free;
if (last == vm_map_to_entry(dst_map)) {
start = vm_map_min(dst_map);
} else {
start = last->vme_end;
}
while (TRUE) {
vm_map_entry_t next = last->vme_next;
end = start + size;
if ((end > dst_map->max_offset) || (end < start)) {
if (dst_map->wait_for_space) {
if (size <= (dst_map->max_offset -
dst_map->min_offset)) {
assert_wait((event_t) dst_map,
THREAD_INTERRUPTIBLE);
vm_map_unlock(dst_map);
thread_block((void (*)(void))0);
goto StartAgain;
}
}
vm_map_unlock(dst_map);
return(KERN_NO_SPACE);
}
if ((next == vm_map_to_entry(dst_map)) ||
(next->vme_start >= end)) {
break;
}
last = next;
start = last->vme_end;
}
/*
* See whether we can avoid creating a new entry (and object) by
* extending one of our neighbors. [So far, we only attempt to
* extend from below.]
*
* The code path below here is a bit twisted. If any of the
* extension checks fails, we branch to create_object. If
* it all works, we fall out the bottom and goto insert_pages.
*/
if (last == vm_map_to_entry(dst_map) ||
last->vme_end != start ||
last->is_shared != FALSE ||
last->is_sub_map != FALSE ||
last->inheritance != VM_INHERIT_DEFAULT ||
last->protection != VM_PROT_DEFAULT ||
last->max_protection != VM_PROT_ALL ||
last->behavior != VM_BEHAVIOR_DEFAULT ||
last->in_transition ||
(must_wire ? (last->wired_count != 1 ||
last->user_wired_count != 0) :
(last->wired_count != 0))) {
goto create_object;
}
/*
* If this entry needs an object, make one.
*/
if (last->object.vm_object == VM_OBJECT_NULL) {
object = vm_object_allocate(
(vm_size_t)(last->vme_end - last->vme_start + size));
last->object.vm_object = object;
last->offset = 0;
}
else {
vm_offset_t prev_offset = last->offset;
vm_size_t prev_size = start - last->vme_start;
vm_size_t new_size;
/*
* This is basically vm_object_coalesce.
*/
object = last->object.vm_object;
vm_object_lock(object);
/*
* Try to collapse the object first
*/
vm_object_collapse(object);
/*
* Can't coalesce if pages not mapped to
* last may be in use anyway:
* . more than one reference
* . paged out
* . shadows another object
* . has a copy elsewhere
* . paging references (pages might be in page-list)
*/
if ((object->ref_count > 1) ||
object->pager_created ||
(object->shadow != VM_OBJECT_NULL) ||
(object->copy != VM_OBJECT_NULL) ||
(object->paging_in_progress != 0)) {
vm_object_unlock(object);
goto create_object;
}
/*
* Extend the object if necessary. Don't have to call
* vm_object_page_remove because the pages aren't mapped,
* and vm_page_replace will free up any old ones it encounters.
*/
new_size = prev_offset + prev_size + size;
if (new_size > object->size) {
#if MACH_PAGEMAP
/*
* We cannot extend an object that has existence info,
* since the existence info might then fail to cover
* the entire object.
*
* This assertion must be true because the object
* has no pager, and we only create existence info
* for objects with pagers.
*/
assert(object->existence_map == VM_EXTERNAL_NULL);
#endif /* MACH_PAGEMAP */
object->size = new_size;
}
vm_object_unlock(object);
}
/*
* Coalesced the two objects - can extend
* the previous map entry to include the
* new range.
*/
dst_map->size += size;
last->vme_end = end;
UPDATE_FIRST_FREE(dst_map, dst_map->first_free);
SAVE_HINT(dst_map, last);
goto insert_pages;
create_object:
/*
* Create object
*/
object = vm_object_allocate(size);
/*
* Create entry
*/
last = vm_map_entry_insert(dst_map, last, start, start + size,
object, 0, FALSE, FALSE, TRUE,
VM_PROT_DEFAULT, VM_PROT_ALL,
VM_BEHAVIOR_DEFAULT,
VM_INHERIT_DEFAULT, (must_wire ? 1 : 0));
/*
* Transfer pages into new object.
* Scan page list in vm_map_copy.
*/
insert_pages:
dst_offset = copy->offset & PAGE_MASK;
cont_invoked = FALSE;
orig_copy = copy;
last->in_transition = TRUE;
old_last_offset = last->offset
+ (start - last->vme_start);
aggressive_enter = (size <= vm_map_aggressive_enter_max);
for (offset = 0; offset < size; offset += PAGE_SIZE) {
m = *page_list;
assert(m && !m->tabled);
/*
* Must clear busy bit in page before inserting it.
* Ok to skip wakeup logic because nobody else
* can possibly know about this page. Also set
* dirty bit on the assumption that the page is
* not a page of zeros.
*/
m->busy = FALSE;
m->dirty = TRUE;
vm_object_lock(object);
vm_page_lock_queues();
vm_page_replace(m, object, old_last_offset + offset);
if (must_wire) {
vm_page_wire(m);
} else if (aggressive_enter) {
vm_page_activate(m);
}
vm_page_unlock_queues();
vm_object_unlock(object);
if (aggressive_enter || must_wire) {
PMAP_ENTER(dst_map->pmap,
last->vme_start + m->offset - last->offset,
m, last->protection, must_wire);
}
*page_list++ = VM_PAGE_NULL;
assert(copy != VM_MAP_COPY_NULL);
assert(copy->type == VM_MAP_COPY_PAGE_LIST);
if (--(copy->cpy_npages) == 0 &&
vm_map_copy_has_cont(copy)) {
vm_map_copy_t new_copy;
/*
* Ok to unlock map because entry is
* marked in_transition.
*/
cont_invoked = TRUE;
vm_map_unlock(dst_map);
vm_map_copy_invoke_cont(copy, &new_copy, &result);
if (result == KERN_SUCCESS) {
/*
* If we got back a copy with real pages,
* steal them now. Either all of the
* pages in the list are tabled or none
* of them are; mixtures are not possible.
*
* Save original copy for consume on
* success logic at end of routine.
*/
if (copy != orig_copy)
vm_map_copy_discard(copy);
if ((copy = new_copy) != VM_MAP_COPY_NULL) {
page_list = ©->cpy_page_list[0];
if (!copy->cpy_page_loose)
vm_map_copy_steal_pages(copy);
}
}
else {
/*
* Continuation failed.
*/
vm_map_lock(dst_map);
goto error;
}
vm_map_lock(dst_map);
}
}
*dst_addr = start + dst_offset;
/*
* Clear the in transition bits. This is easy if we
* didn't have a continuation.
*/
error:
needs_wakeup = FALSE;
if (!cont_invoked) {
/*
* We didn't unlock the map, so nobody could
* be waiting.
*/
last->in_transition = FALSE;
assert(!last->needs_wakeup);
}
else {
if (!vm_map_lookup_entry(dst_map, start, &entry))
panic("vm_map_copyout_page_list: missing entry");
/*
* Clear transition bit for all constituent entries that
* were in the original entry. Also check for waiters.
*/
while ((entry != vm_map_to_entry(dst_map)) &&
(entry->vme_start < end)) {
assert(entry->in_transition);
entry->in_transition = FALSE;
if (entry->needs_wakeup) {
entry->needs_wakeup = FALSE;
needs_wakeup = TRUE;
}
entry = entry->vme_next;
}
}
if (result != KERN_SUCCESS)
(void) vm_map_delete(dst_map, start, end, VM_MAP_NO_FLAGS);
vm_map_unlock(dst_map);
if (needs_wakeup)
vm_map_entry_wakeup(dst_map);
/*
* Consume on success logic.
*/
if (copy != VM_MAP_COPY_NULL && copy != orig_copy) {
zfree(vm_map_copy_zone, (vm_offset_t) copy);
}
if (result == KERN_SUCCESS) {
assert(orig_copy != VM_MAP_COPY_NULL);
assert(orig_copy->type == VM_MAP_COPY_PAGE_LIST);
zfree(vm_map_copy_zone, (vm_offset_t) orig_copy);
}
return(result);
}
/*
* Routine: vm_map_copyin
*
* Description:
* Copy the specified region (src_addr, len) from the
* source address space (src_map), possibly removing
* the region from the source address space (src_destroy).
*
* Returns:
* A vm_map_copy_t object (copy_result), suitable for
* insertion into another address space (using vm_map_copyout),
* copying over another address space region (using
* vm_map_copy_overwrite). If the copy is unused, it
* should be destroyed (using vm_map_copy_discard).
*
* In/out conditions:
* The source map should not be locked on entry.
*/
#if DIPC
dstat_decl(unsigned int c_vmcc_volatile = 0;)
dstat_decl(unsigned int c_vmcc_null_entry = 0;)
dstat_decl(unsigned int c_vmcc_null_entry_continue = 0;)
dstat_decl(unsigned int c_vmcc_src_destroy_opt = 0;)
dstat_decl(unsigned int c_vmcc_wasnt_wired = 0;)
dstat_decl(unsigned int c_vmcc_vocq = 0;)
dstat_decl(unsigned int c_vmcc_vopp = 0;)
dstat_decl(unsigned int c_vmcc_voept = 0;)
dstat_decl(unsigned int c_vmcc_vocs = 0;)
dstat_decl(unsigned int c_vmcc_vocstrat = 0;)
dstat_decl(unsigned int c_vmcc_vmle = 0;)
dstat_decl(unsigned int c_vmcc_reloop = 0;)
dstat_decl(unsigned int c_vmcc_vmd = 0;)
#endif /* DIPC */
typedef struct submap_map {
vm_map_t parent_map;
vm_offset_t base_start;
vm_offset_t base_end;
struct submap_map *next;
} submap_map_t;
kern_return_t
vm_map_copyin_common(
vm_map_t src_map,
vm_offset_t src_addr,
vm_size_t len,
boolean_t src_destroy,
boolean_t src_volatile,
vm_map_copy_t *copy_result, /* OUT */
boolean_t use_maxprot)
{
vm_map_entry_t tmp_entry; /* Result of last map lookup --
* in multi-level lookup, this
* entry contains the actual
* vm_object/offset.
*/
register
vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
vm_offset_t src_start; /* Start of current entry --
* where copy is taking place now
*/
vm_offset_t src_end; /* End of entire region to be
* copied */
vm_offset_t base_start; /* submap fields to save offsets */
/* in original map */
vm_offset_t base_end;
vm_map_t base_map=src_map;
vm_map_entry_t base_entry;
boolean_t map_share=FALSE;
submap_map_t *parent_maps = NULL;
register
vm_map_copy_t copy; /* Resulting copy */
/*
* Check for copies of zero bytes.
*/
if (len == 0) {
*copy_result = VM_MAP_COPY_NULL;
return(KERN_SUCCESS);
}
/*
* Compute start and end of region
*/
src_start = trunc_page(src_addr);
src_end = round_page(src_addr + len);
#if DIPC
XPR(XPR_VM_MAP,
"vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d volatile %d\n",
(natural_t)src_map, src_addr, len, src_destroy, src_volatile);
#else /* DIPC */
XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n",
(natural_t)src_map, src_addr, len, src_destroy, 0);
#endif /* DIPC */
/*
* Check that the end address doesn't overflow
*/
if (src_end <= src_start)
if ((src_end < src_start) || (src_start != 0))
return(KERN_INVALID_ADDRESS);
/*
* Allocate a header element for the list.
*
* Use the start and end in the header to
* remember the endpoints prior to rounding.
*/
copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
vm_map_copy_first_entry(copy) =
vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
copy->type = VM_MAP_COPY_ENTRY_LIST;
copy->cpy_hdr.nentries = 0;
copy->cpy_hdr.entries_pageable = TRUE;
copy->offset = src_addr;
copy->size = len;
new_entry = vm_map_copy_entry_create(copy);
#define RETURN(x) \
MACRO_BEGIN \
vm_map_unlock(src_map); \
if (new_entry != VM_MAP_ENTRY_NULL) \
vm_map_copy_entry_dispose(copy,new_entry); \
vm_map_copy_discard(copy); \
{ \
submap_map_t *ptr; \
\
for(ptr = parent_maps; ptr != NULL; ptr = parent_maps) { \
parent_maps=parent_maps->next; \
kfree((vm_offset_t)ptr, sizeof(submap_map_t)); \
} \
} \
MACRO_RETURN(x); \
MACRO_END
/*
* Find the beginning of the region.
*/
vm_map_lock(src_map);
if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
RETURN(KERN_INVALID_ADDRESS);
vm_map_clip_start(src_map, tmp_entry, src_start);
/*
* Go through entries until we get to the end.
*/
while (TRUE) {
register
vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
vm_size_t src_size; /* Size of source
* map entry (in both
* maps)
*/
register
vm_object_t src_object; /* Object to copy */
vm_offset_t src_offset;
boolean_t src_needs_copy; /* Should source map
* be made read-only
* for copy-on-write?
*/
boolean_t new_entry_needs_copy; /* Will new entry be COW? */
boolean_t was_wired; /* Was source wired? */
vm_map_version_t version; /* Version before locks
* dropped to make copy
*/
kern_return_t result; /* Return value from
* copy_strategically.
*/
while(tmp_entry->is_sub_map) {
vm_size_t submap_len;
submap_map_t *ptr;
ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
ptr->next = parent_maps;
parent_maps = ptr;
ptr->parent_map = src_map;
ptr->base_start = src_start;
ptr->base_end = src_end;
submap_len = tmp_entry->vme_end - tmp_entry->vme_start;
if(submap_len > (src_end-src_start))
submap_len = src_end-src_start;
ptr->base_start += submap_len;
src_start -= tmp_entry->vme_start;
src_start += tmp_entry->offset;
src_end = src_start + submap_len;
src_map = tmp_entry->object.sub_map;
vm_map_lock(src_map);
vm_map_unlock(ptr->parent_map);
if (!vm_map_lookup_entry(
src_map, src_start, &tmp_entry))
RETURN(KERN_INVALID_ADDRESS);
map_share = TRUE;
vm_map_clip_start(src_map, tmp_entry, src_start);
src_entry = tmp_entry;
}
/*
* Create a new address map entry to hold the result.
* Fill in the fields from the appropriate source entries.
* We must unlock the source map to do this if we need
* to allocate a map entry.
*/
if (new_entry == VM_MAP_ENTRY_NULL) {
version.main_timestamp = src_map->timestamp;
vm_map_unlock(src_map);
dstat(++c_vmcc_null_entry);
new_entry = vm_map_copy_entry_create(copy);
vm_map_lock(src_map);
if ((version.main_timestamp + 1) != src_map->timestamp) {
if (!vm_map_lookup_entry(src_map, src_start,
&tmp_entry)) {
RETURN(KERN_INVALID_ADDRESS);
}
vm_map_clip_start(src_map, tmp_entry, src_start);
dstat(++c_vmcc_null_entry_continue);
continue; /* restart w/ new tmp_entry */
}
}
/*
* Verify that the region can be read.
*/
if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
!use_maxprot) ||
(src_entry->max_protection & VM_PROT_READ) == 0)
RETURN(KERN_PROTECTION_FAILURE);
/*
* Clip against the endpoints of the entire region.
*/
vm_map_clip_end(src_map, src_entry, src_end);
src_size = src_entry->vme_end - src_start;
src_object = src_entry->object.vm_object;
src_offset = src_entry->offset;
was_wired = (src_entry->wired_count != 0);
vm_map_entry_copy(new_entry, src_entry);
/*
* Attempt non-blocking copy-on-write optimizations.
*/
if (src_destroy &&
(src_object == VM_OBJECT_NULL ||
(src_object->internal && !src_object->true_share
&& !map_share))) {
/*
* If we are destroying the source, and the object
* is internal, we can move the object reference
* from the source to the copy. The copy is
* copy-on-write only if the source is.
* We make another reference to the object, because
* destroying the source entry will deallocate it.
*/
vm_object_reference(src_object);
/*
* Copy is always unwired. vm_map_copy_entry
* set its wired count to zero.
*/
dstat(++c_vmcc_src_destroy_opt);
goto CopySuccessful;
}
#if DIPC
/*
* If the caller promises not to modify the data,
* we don't have to apply copy-on-write processing
* to it. This works best in the distributed case.
* In the local case, we can wind up with fully
* shared data between sender and receiver -- a
* behavior that we aren't entirely sure we want
* at this point. So for now, we'll only do this
* on remote data transfers.
*/
if (src_volatile == TRUE) {
XPR(XPR_VM_MAP,
"vmcc src_obj 0x%x ent 0x%x obj 0x%x VOLATILE\n",
src_object, new_entry, new_entry->object.vm_object,
0, 0);
assert(src_destroy == FALSE);
dstat(++c_vmcc_volatile);
vm_object_reference(src_object);
goto CopySuccessful;
}
#endif /* DIPC */
RestartCopy:
XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
src_object, new_entry, new_entry->object.vm_object,
was_wired, 0);
dstat(!was_wired ? ++c_vmcc_wasnt_wired : 0);
if (!was_wired &&
vm_object_copy_quickly(
&new_entry->object.vm_object,
src_offset,
src_size,
&src_needs_copy,
&new_entry_needs_copy)) {
dstat(++c_vmcc_vocq);
new_entry->needs_copy = new_entry_needs_copy;
/*
* Handle copy-on-write obligations
*/
if (src_needs_copy && !tmp_entry->needs_copy) {
if (tmp_entry->is_shared ||
tmp_entry->object.vm_object->true_share ||
map_share) {
/* dec ref gained in copy_quickly */
vm_object_lock(src_object);
src_object->ref_count--;
vm_object_res_deallocate(src_object);
vm_object_unlock(src_object);
new_entry->object.vm_object =
vm_object_copy_delayed(
src_object,
src_offset,
src_size);
} else {
dstat(++c_vmcc_vopp);
vm_object_pmap_protect(
src_object,
src_offset,
src_size,
(src_entry->is_shared ?
PMAP_NULL
: src_map->pmap),
src_entry->vme_start,
src_entry->protection &
~VM_PROT_WRITE);
tmp_entry->needs_copy = TRUE;
}
}
/*
* The map has never been unlocked, so it's safe
* to move to the next entry rather than doing
* another lookup.
*/
goto CopySuccessful;
}
new_entry->needs_copy = FALSE;
/*
* Take an object reference, so that we may
* release the map lock(s).
*/
assert(src_object != VM_OBJECT_NULL);
vm_object_reference(src_object);
/*
* Record the timestamp for later verification.
* Unlock the map.
*/
version.main_timestamp = src_map->timestamp;
vm_map_unlock(src_map);
/*
* Perform the copy
*/
if (was_wired) {
dstat(++c_vmcc_vocs);
vm_object_lock(src_object);
result = vm_object_copy_slowly(
src_object,
src_offset,
src_size,
THREAD_UNINT,
&new_entry->object.vm_object);
new_entry->offset = 0;
new_entry->needs_copy = FALSE;
} else {
dstat(++c_vmcc_vocstrat);
result = vm_object_copy_strategically(src_object,
src_offset,
src_size,
&new_entry->object.vm_object,
&new_entry->offset,
&new_entry_needs_copy);
new_entry->needs_copy = new_entry_needs_copy;
}
if (result != KERN_SUCCESS &&
result != KERN_MEMORY_RESTART_COPY) {
vm_map_lock(src_map);
RETURN(result);
}
/*
* Throw away the extra reference
*/
vm_object_deallocate(src_object);
/*
* Verify that the map has not substantially
* changed while the copy was being made.
*/
vm_map_lock(src_map); /* Increments timestamp once! */
if ((version.main_timestamp + 1) == src_map->timestamp)
goto VerificationSuccessful;
/*
* Simple version comparison failed.
*
* Retry the lookup and verify that the
* same object/offset are still present.
*
* [Note: a memory manager that colludes with
* the calling task can detect that we have
* cheated. While the map was unlocked, the
* mapping could have been changed and restored.]
*/
dstat(++c_vmcc_vmle);
if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
RETURN(KERN_INVALID_ADDRESS);
}
src_entry = tmp_entry;
vm_map_clip_start(src_map, src_entry, src_start);
if ((src_entry->protection & VM_PROT_READ == VM_PROT_NONE &&
!use_maxprot) ||
src_entry->max_protection & VM_PROT_READ == 0)
goto VerificationFailed;
if (src_entry->vme_end < new_entry->vme_end)
src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
if ((src_entry->object.vm_object != src_object) ||
(src_entry->offset != src_offset) ) {
/*
* Verification failed.
*
* Start over with this top-level entry.
*/
VerificationFailed: ;
vm_object_deallocate(new_entry->object.vm_object);
tmp_entry = src_entry;
continue;
}
/*
* Verification succeeded.
*/
VerificationSuccessful: ;
if (result == KERN_MEMORY_RESTART_COPY)
goto RestartCopy;
/*
* Copy succeeded.
*/
CopySuccessful: ;
/*
* Link in the new copy entry.
*/
vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
new_entry);
/*
* Determine whether the entire region
* has been copied.
*/
src_start = new_entry->vme_end;
new_entry = VM_MAP_ENTRY_NULL;
while ((src_start >= src_end) && (src_end != 0)) {
if (src_map != base_map) {
submap_map_t *ptr;
ptr = parent_maps;
assert(ptr != NULL);
parent_maps = parent_maps->next;
vm_map_lock(ptr->parent_map);
vm_map_unlock(src_map);
src_map = ptr->parent_map;
src_start = ptr->base_start;
src_end = ptr->base_end;
if ((src_end > src_start) &&
!vm_map_lookup_entry(
src_map, src_start, &tmp_entry))
RETURN(KERN_INVALID_ADDRESS);
kfree((vm_offset_t)ptr, sizeof(submap_map_t));
if(parent_maps == NULL)
map_share = FALSE;
src_entry = tmp_entry->vme_prev;
} else
break;
}
if ((src_start >= src_end) && (src_end != 0))
break;
/*
* Verify that there are no gaps in the region
*/
tmp_entry = src_entry->vme_next;
dstat(++c_vmcc_reloop);
if (tmp_entry->vme_start != src_start)
RETURN(KERN_INVALID_ADDRESS);
}
/*
* If the source should be destroyed, do it now, since the
* copy was successful.
*/
if (src_destroy) {
dstat(++c_vmcc_vmd);
(void) vm_map_delete(src_map,
trunc_page(src_addr),
src_end,
(src_map == kernel_map) ?
VM_MAP_REMOVE_KUNWIRE :
VM_MAP_NO_FLAGS);
}
vm_map_unlock(src_map);
*copy_result = copy;
return(KERN_SUCCESS);
#undef RETURN
}
/*
* vm_map_copyin_object:
*
* Create a copy object from an object.
* Our caller donates an object reference.
*/
kern_return_t
vm_map_copyin_object(
vm_object_t object,
vm_offset_t offset, /* offset of region in object */
vm_size_t size, /* size of region in object */
vm_map_copy_t *copy_result) /* OUT */
{
vm_map_copy_t copy; /* Resulting copy */
/*
* We drop the object into a special copy object
* that contains the object directly.
*/
copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
copy->type = VM_MAP_COPY_OBJECT;
copy->cpy_object = object;
copy->cpy_index = 0;
copy->offset = offset;
copy->size = size;
*copy_result = copy;
return(KERN_SUCCESS);
}
/*
* vm_map_copyin_page_list_cont:
*
* Continuation routine for vm_map_copyin_page_list.
*
* If vm_map_copyin_page_list can't fit the entire vm range
* into a single page list object, it creates a continuation.
* When the target of the operation has used the pages in the
* initial page list, it invokes the continuation, which calls
* this routine. If an error happens, the continuation is aborted
* (abort arg to this routine is TRUE). To avoid deadlocks, the
* pages are discarded from the initial page list before invoking
* the continuation.
*
* NOTE: This is not the same sort of continuation used by
* the scheduler.
*/
kern_return_t
vm_map_copyin_page_list_cont(
vm_map_copyin_args_t cont_args,
vm_map_copy_t *copy_result) /* OUT */
{
kern_return_t result = KERN_SUCCESS;
register boolean_t abort, src_destroy, src_destroy_only;
/*
* Check for cases that only require memory destruction.
*/
abort = (copy_result == (vm_map_copy_t *) 0);
src_destroy = (cont_args->destroy_len != (vm_size_t) 0);
src_destroy_only = (cont_args->src_len == (vm_size_t) 0);
if (abort || src_destroy_only) {
if (src_destroy)
result = vm_map_remove(cont_args->map,
cont_args->destroy_addr,
cont_args->destroy_addr + cont_args->destroy_len,
VM_MAP_NO_FLAGS);
if (!abort)
*copy_result = VM_MAP_COPY_NULL;
}
else {
result = vm_map_copyin_page_list(cont_args->map,
cont_args->src_addr, cont_args->src_len,
cont_args->options, copy_result, TRUE);
if (src_destroy &&
(cont_args->options & VM_MAP_COPYIN_OPT_STEAL_PAGES) &&
vm_map_copy_has_cont(*copy_result)) {
vm_map_copyin_args_t new_args;
/*
* Transfer old destroy info.
*/
new_args = (vm_map_copyin_args_t)
(*copy_result)->cpy_cont_args;
new_args->destroy_addr = cont_args->destroy_addr;
new_args->destroy_len = cont_args->destroy_len;
}
}
vm_map_deallocate(cont_args->map);
kfree((vm_offset_t)cont_args, sizeof(vm_map_copyin_args_data_t));
return(result);
}
/*
* vm_map_copyin_page_list:
*
* This is a variant of vm_map_copyin that copies in a list of pages.
* If steal_pages is TRUE, the pages are only in the returned list.
* If steal_pages is FALSE, the pages are busy and still in their
* objects. A continuation may be returned if not all the pages fit:
* the recipient of this copy_result must be prepared to deal with it.
*/
kern_return_t
vm_map_copyin_page_list(
vm_map_t src_map,
vm_offset_t src_addr,
vm_size_t len,
int options,
vm_map_copy_t *copy_result, /* OUT */
boolean_t is_cont)
{
vm_map_entry_t src_entry;
vm_page_t m;
vm_offset_t src_start;
vm_offset_t src_end;
vm_size_t src_size;
register vm_object_t src_object;
register vm_offset_t src_offset;
vm_offset_t src_last_offset;
register vm_map_copy_t copy; /* Resulting copy */
kern_return_t result = KERN_SUCCESS;
boolean_t need_map_lookup;
vm_map_copyin_args_t cont_args;
kern_return_t error_code;
vm_prot_t prot;
boolean_t wired;
boolean_t no_zero_fill;
submap_map_t *parent_maps = NULL;
vm_map_t base_map = src_map;
prot = (options & VM_MAP_COPYIN_OPT_VM_PROT);
no_zero_fill = (options & VM_MAP_COPYIN_OPT_NO_ZERO_FILL);
/*
* If steal_pages is FALSE, this leaves busy pages in
* the object. A continuation must be used if src_destroy
* is true in this case (!steal_pages && src_destroy).
*
* XXX Still have a more general problem of what happens
* XXX if the same page occurs twice in a list. Deadlock
* XXX can happen if vm_fault_page was called. A
* XXX possible solution is to use a continuation if vm_fault_page
* XXX is called and we cross a map entry boundary.
*/
/*
* Check for copies of zero bytes.
*/
if (len == 0) {
*copy_result = VM_MAP_COPY_NULL;
return(KERN_SUCCESS);
}
/*
* Compute start and end of region
*/
src_start = trunc_page(src_addr);
src_end = round_page(src_addr + len);
/*
* If the region is not page aligned, override the no_zero_fill
* argument.
*/
if (options & VM_MAP_COPYIN_OPT_NO_ZERO_FILL) {
if (!page_aligned(src_addr) || !page_aligned(src_addr +len))
options &= ~VM_MAP_COPYIN_OPT_NO_ZERO_FILL;
}
/*
* Check that the end address doesn't overflow
*/
if (src_end <= src_start && (src_end < src_start || src_start != 0)) {
return KERN_INVALID_ADDRESS;
}
/*
* Allocate a header element for the page list.
*
* Record original offset and size, as caller may not
* be page-aligned.
*/
copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
copy->type = VM_MAP_COPY_PAGE_LIST;
copy->cpy_npages = 0;
copy->cpy_page_loose = FALSE;
copy->offset = src_addr;
copy->size = len;
copy->cpy_cont = VM_MAP_COPY_CONT_NULL;
copy->cpy_cont_args = VM_MAP_COPYIN_ARGS_NULL;
/*
* Find the beginning of the region.
*/
do_map_lookup:
vm_map_lock(src_map);
if (!vm_map_lookup_entry(src_map, src_start, &src_entry)) {
result = KERN_INVALID_ADDRESS;
goto error;
}
need_map_lookup = FALSE;
/*
* Go through entries until we get to the end.
*/
while (TRUE) {
if ((src_entry->protection & prot) != prot) {
result = KERN_PROTECTION_FAILURE;
goto error;
}
/* translate down through submaps to find the target entry */
while(src_entry->is_sub_map) {
vm_size_t submap_len;
submap_map_t *ptr;
ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
ptr->next = parent_maps;
parent_maps = ptr;
ptr->parent_map = src_map;
ptr->base_start = src_start;
ptr->base_end = src_end;
submap_len = src_entry->vme_end - src_entry->vme_start;
if(submap_len > (src_end-src_start))
submap_len = src_end-src_start;
ptr->base_start += submap_len;
src_start -= src_entry->vme_start;
src_start += src_entry->offset;
src_end = src_start + submap_len;
src_map = src_entry->object.sub_map;
vm_map_lock(src_map);
vm_map_unlock(ptr->parent_map);
if (!vm_map_lookup_entry(
src_map, src_start, &src_entry)) {
result = KERN_INVALID_ADDRESS;
goto error;
}
vm_map_clip_start(src_map, src_entry, src_start);
}
wired = (src_entry->wired_count != 0);
if (src_end > src_entry->vme_end)
src_size = src_entry->vme_end - src_start;
else
src_size = src_end - src_start;
src_object = src_entry->object.vm_object;
/*
* If src_object is NULL, allocate it now;
* we're going to fault on it shortly.
*/
if (src_object == VM_OBJECT_NULL) {
src_object = vm_object_allocate((vm_size_t)
src_entry->vme_end -
src_entry->vme_start);
src_entry->object.vm_object = src_object;
}
else if (src_entry->needs_copy && (prot & VM_PROT_WRITE)) {
vm_object_shadow(
&src_entry->object.vm_object,
&src_entry->offset,
(vm_size_t) (src_entry->vme_end -
src_entry->vme_start));
src_entry->needs_copy = FALSE;
/* reset src_object */
src_object = src_entry->object.vm_object;
}
/*
* calculate src_offset now, since vm_object_shadow
* may have changed src_entry->offset.
*/
src_offset = src_entry->offset + (src_start - src_entry->vme_start);
/*
* Iterate over pages. Fault in ones that aren't present.
*/
src_last_offset = src_offset + src_size;
for (; (src_offset < src_last_offset);
src_offset += PAGE_SIZE, src_start += PAGE_SIZE) {
if (copy->cpy_npages == VM_MAP_COPY_PAGE_LIST_MAX) {
vm_offset_t src_delta;
make_continuation:
/*
* At this point we have the max number of
* pages busy for this thread that we're
* willing to allow. Stop here and record
* arguments for the remainder. Note:
* this means that this routine isn't atomic,
* but that's the breaks. Note that only
* the first vm_map_copy_t that comes back
* from this routine has the right offset
* and size; those from continuations are
* page rounded, and short by the amount
* already done.
*
* Reset src_end so the src_destroy
* code at the bottom doesn't do
* something stupid.
*/
src_delta = src_end - src_start;
while (src_map != base_map) {
submap_map_t *ptr;
if(!need_map_lookup) {
vm_map_unlock(src_map);
}
ptr = parent_maps;
assert(ptr != NULL);
parent_maps = parent_maps->next;
src_map = ptr->parent_map;
src_start = ptr->base_start - src_delta;
src_delta = ptr->base_end - src_start;
kfree((vm_offset_t)ptr, sizeof(submap_map_t));
need_map_lookup = TRUE;
}
src_end = src_start;
cont_args = (vm_map_copyin_args_t)
kalloc(sizeof(vm_map_copyin_args_data_t));
cont_args->map = src_map;
vm_map_reference(src_map);
cont_args->src_addr = src_start;
cont_args->src_len = len - (src_start - src_addr);
if (options & VM_MAP_COPYIN_OPT_SRC_DESTROY) {
cont_args->destroy_addr = cont_args->src_addr;
cont_args->destroy_len = cont_args->src_len;
} else {
cont_args->destroy_addr = (vm_offset_t) 0;
cont_args->destroy_len = (vm_offset_t) 0;
}
cont_args->options = options;
copy->cpy_cont_args = cont_args;
copy->cpy_cont = vm_map_copyin_page_list_cont;
break;
}
/*
* Try to find the page of data. Have to
* fault it in if there's no page, or something
* going on with the page, or the object has
* a copy object.
*/
vm_object_lock(src_object);
vm_object_paging_begin(src_object);
if (((m = vm_page_lookup(src_object, src_offset)) !=
VM_PAGE_NULL) &&
!m->busy && !m->fictitious && !m->unusual &&
((prot & VM_PROT_WRITE) == 0 ||
(m->object->copy == VM_OBJECT_NULL))) {
if (!m->absent &&
!(options & VM_MAP_COPYIN_OPT_STEAL_PAGES)) {
/*
* The page is present and will not be
* replaced, prep it. Thus allowing
* mutiple access on this page
*/
kern_return_t kr;
kr = vm_page_prep(m);
assert(kr == KERN_SUCCESS);
kr = vm_page_pin(m);
assert(kr == KERN_SUCCESS);
} else {
/*
* This is the page. Mark it busy
* and keep the paging reference on
* the object whilst we do our thing.
*/
m->busy = TRUE;
}
} else {
vm_prot_t result_prot;
vm_page_t top_page;
kern_return_t kr;
boolean_t data_supply;
/*
* Have to fault the page in; must
* unlock the map to do so. While
* the map is unlocked, anything
* can happen, we must lookup the
* map entry before continuing.
*/
vm_map_unlock(src_map);
need_map_lookup = TRUE;
data_supply = src_object->silent_overwrite &&
(prot & VM_PROT_WRITE) &&
src_start >= src_addr &&
src_start + PAGE_SIZE <=
src_addr + len;
retry:
result_prot = prot;
XPR(XPR_VM_FAULT,
"vm_map_copyin_page_list -> vm_fault_page\n",
0,0,0,0,0);
kr = vm_fault_page(src_object, src_offset,
prot, FALSE, THREAD_UNINT,
src_entry->offset,
src_entry->offset +
(src_entry->vme_end -
src_entry->vme_start),
VM_BEHAVIOR_SEQUENTIAL,
&result_prot, &m, &top_page,
(int *)0,
&error_code,
options & VM_MAP_COPYIN_OPT_NO_ZERO_FILL,
data_supply);
/*
* Cope with what happened.
*/
switch (kr) {
case VM_FAULT_SUCCESS:
/*
* If we lost write access,
* try again.
*/
if ((prot & VM_PROT_WRITE) &&
!(result_prot & VM_PROT_WRITE)) {
vm_object_lock(src_object);
vm_object_paging_begin(src_object);
goto retry;
}
break;
case VM_FAULT_INTERRUPTED: /* ??? */
case VM_FAULT_RETRY:
vm_object_lock(src_object);
vm_object_paging_begin(src_object);
goto retry;
case VM_FAULT_MEMORY_SHORTAGE:
VM_PAGE_WAIT();
vm_object_lock(src_object);
vm_object_paging_begin(src_object);
goto retry;
case VM_FAULT_FICTITIOUS_SHORTAGE:
vm_page_more_fictitious();
vm_object_lock(src_object);
vm_object_paging_begin(src_object);
goto retry;
case VM_FAULT_MEMORY_ERROR:
/*
* Something broke. If this
* is a continuation, return
* a partial result if possible,
* else fail the whole thing.
* In the continuation case, the
* next continuation call will
* get this error if it persists.
*/
vm_map_lock(src_map);
if (is_cont &&
copy->cpy_npages != 0)
goto make_continuation;
result = error_code ? error_code : KERN_MEMORY_ERROR;
goto error;
}
if (top_page != VM_PAGE_NULL) {
vm_object_lock(src_object);
VM_PAGE_FREE(top_page);
vm_object_paging_end(src_object);
vm_object_unlock(src_object);
}
}
/*
* The page is busy, its object is locked, and
* we have a paging reference on it. Either
* the map is locked, or need_map_lookup is
* TRUE.
*/
/*
* Put the page in the page list.
*/
copy->cpy_page_list[copy->cpy_npages++] = m;
vm_object_unlock(m->object);
/*
* Pmap enter support. Only used for
* device I/O for colocated server.
*
* WARNING: This code assumes that this
* option is only used for well behaved
* memory. If the mapping has changed,
* the following code will make mistakes.
*
* XXXO probably ought to do pmap_extract first,
* XXXO to avoid needless pmap_enter, but this
* XXXO can't detect protection mismatch??
*/
if (options & VM_MAP_COPYIN_OPT_PMAP_ENTER) {
/*
* XXX Only used on kernel map.
* XXX Must not remove VM_PROT_WRITE on
* XXX an I/O only requiring VM_PROT_READ
* XXX as another I/O may be active on same page
* XXX assume that if mapping exists, it must
* XXX have the equivalent of at least VM_PROT_READ,
* XXX but don't assume it has VM_PROT_WRITE as the
* XXX pmap might not all the rights of the object
*/
assert(vm_map_pmap(src_map) == kernel_pmap);
if ((prot & VM_PROT_WRITE) ||
(pmap_extract(vm_map_pmap(src_map),
src_start) != m->phys_addr))
PMAP_ENTER(vm_map_pmap(src_map), src_start,
m, prot, wired);
}
if(need_map_lookup) {
need_map_lookup = FALSE;
vm_map_lock(src_map);
if (!vm_map_lookup_entry(src_map, src_start, &src_entry)) {
result = KERN_INVALID_ADDRESS;
goto error;
}
}
}
/*
* Verify that there are no gaps in the region
*/
src_start = src_entry->vme_end;
if (src_start < src_end) {
src_entry = src_entry->vme_next;
if (need_map_lookup) {
need_map_lookup = FALSE;
vm_map_lock(src_map);
if(!vm_map_lookup_entry(src_map,
src_start, &src_entry)) {
result = KERN_INVALID_ADDRESS;
goto error;
}
} else if (src_entry->vme_start != src_start) {
result = KERN_INVALID_ADDRESS;
goto error;
}
}
/*
* DETERMINE whether the entire region
* has been copied.
*/
while ((src_start >= src_end) && (src_end != 0)) {
if (src_map != base_map) {
submap_map_t *ptr;
ptr = parent_maps;
assert(ptr != NULL);
parent_maps = parent_maps->next;
src_start = ptr->base_start;
src_end = ptr->base_end;
if(need_map_lookup) {
need_map_lookup = FALSE;
}
else {
vm_map_unlock(src_map);
}
src_map = ptr->parent_map;
vm_map_lock(src_map);
if((src_start < src_end) &&
(!vm_map_lookup_entry(ptr->parent_map,
src_start, &src_entry))) {
result = KERN_INVALID_ADDRESS;
kfree((vm_offset_t)ptr, sizeof(submap_map_t));
goto error;
}
kfree((vm_offset_t)ptr, sizeof(submap_map_t));
} else
break;
}
if ((src_start >= src_end) && (src_end != 0)) {
if (need_map_lookup)
vm_map_lock(src_map);
break;
}
}
/*
* If steal_pages is true, make sure all
* pages in the copy are not in any object
* We try to remove them from the original
* object, but we may have to copy them.
*
* At this point every page in the list is busy
* and holds a paging reference to its object.
* When we're done stealing, every page is busy,
* and in no object (m->tabled == FALSE).
*/
src_start = trunc_page(src_addr);
if (options & VM_MAP_COPYIN_OPT_STEAL_PAGES) {
register int i;
vm_offset_t page_vaddr;
vm_offset_t unwire_end;
vm_offset_t map_entry_end;
boolean_t share_map = FALSE;
unwire_end = src_start;
map_entry_end = src_start;
for (i = 0; i < copy->cpy_npages; i++) {
/*
* Remove the page from its object if it
* can be stolen. It can be stolen if:
*
* (1) The source is being destroyed,
* the object is internal (hence
* temporary), and not shared.
* (2) The page is not precious.
*
* The not shared check consists of two
* parts: (a) there are no objects that
* shadow this object. (b) it is not the
* object in any shared map entries (i.e.,
* use_shared_copy is not set).
*
* The first check (a) means that we can't
* steal pages from objects that are not
* at the top of their shadow chains. This
* should not be a frequent occurrence.
*
* Stealing wired pages requires telling the
* pmap module to let go of them.
*
* NOTE: stealing clean pages from objects
* whose mappings survive requires a call to
* the pmap module. Maybe later.
*/
m = copy->cpy_page_list[i];
src_object = m->object;
vm_object_lock(src_object);
page_vaddr = src_start + (i * PAGE_SIZE);
if(page_vaddr > map_entry_end) {
if (!vm_map_lookup_entry(src_map, page_vaddr, &src_entry))
share_map = TRUE;
else if (src_entry->is_sub_map) {
map_entry_end = src_entry->vme_end;
share_map = TRUE;
} else {
map_entry_end = src_entry->vme_end;
share_map = FALSE;
}
}
if ((options & VM_MAP_COPYIN_OPT_SRC_DESTROY) &&
src_object->internal &&
!src_object->true_share &&
(!src_object->shadowed) &&
(src_object->copy_strategy ==
MEMORY_OBJECT_COPY_SYMMETRIC) &&
!m->precious &&
!share_map) {
if (m->wire_count > 0) {
assert(m->wire_count == 1);
/*
* In order to steal a wired
* page, we have to unwire it
* first. We do this inline
* here because we have the page.
*
* Step 1: Unwire the map entry.
* Also tell the pmap module
* that this piece of the
* pmap is pageable.
*/
vm_object_unlock(src_object);
if (page_vaddr >= unwire_end) {
if (!vm_map_lookup_entry(src_map,
page_vaddr, &src_entry))
panic("vm_map_copyin_page_list: missing wired map entry");
vm_map_clip_start(src_map, src_entry,
page_vaddr);
vm_map_clip_end(src_map, src_entry,
src_start + src_size);
/* revisit why this assert fails CDY
assert(src_entry->wired_count > 0);
*/
src_entry->wired_count = 0;
src_entry->user_wired_count = 0;
unwire_end = src_entry->vme_end;
pmap_pageable(vm_map_pmap(src_map),
page_vaddr, unwire_end, TRUE);
}
/*
* Step 2: Unwire the page.
* pmap_remove handles this for us.
*/
vm_object_lock(src_object);
}
/*
* Don't need to remove the mapping;
* vm_map_delete will handle it.
*
* Steal the page. Setting the wire count
* to zero is vm_page_unwire without
* activating the page.
*/
vm_page_lock_queues();
vm_page_remove(m);
if (m->wire_count > 0) {
m->wire_count = 0;
vm_page_wire_count--;
} else {
VM_PAGE_QUEUES_REMOVE(m);
}
vm_page_unlock_queues();
} else {
/*
* Have to copy this page. Have to
* unlock the map while copying,
* hence no further page stealing.
* Hence just copy all the pages.
* Unlock the map while copying;
* This means no further page stealing.
*/
vm_object_unlock(src_object);
vm_map_unlock(src_map);
vm_map_copy_steal_pages(copy);
vm_map_lock(src_map);
break;
}
vm_object_paging_end(src_object);
vm_object_unlock(src_object);
}
copy->cpy_page_loose = TRUE;
/*
* If the source should be destroyed, do it now, since the
* copy was successful.
*/
if (options & VM_MAP_COPYIN_OPT_SRC_DESTROY) {
(void) vm_map_delete(src_map, src_start,
src_end, VM_MAP_NO_FLAGS);
}
} else {
/*
* Not stealing pages leaves busy or prepped pages in the map.
* This will cause source destruction to hang. Use
* a continuation to prevent this.
*/
if ((options & VM_MAP_COPYIN_OPT_SRC_DESTROY) &&
!vm_map_copy_has_cont(copy)) {
cont_args = (vm_map_copyin_args_t)
kalloc(sizeof(vm_map_copyin_args_data_t));
vm_map_reference(src_map);
cont_args->map = src_map;
cont_args->src_addr = (vm_offset_t) 0;
cont_args->src_len = (vm_size_t) 0;
cont_args->destroy_addr = src_start;
cont_args->destroy_len = src_end - src_start;
cont_args->options = options;
copy->cpy_cont_args = cont_args;
copy->cpy_cont = vm_map_copyin_page_list_cont;
}
}
vm_map_unlock(src_map);
*copy_result = copy;
return(result);
error:
{
submap_map_t *ptr;
vm_map_unlock(src_map);
vm_map_copy_discard(copy);
for(ptr = parent_maps; ptr != NULL; ptr = parent_maps) {
parent_maps=parent_maps->next;
kfree((vm_offset_t)ptr, sizeof(submap_map_t));
}
return(result);
}
}
void
vm_map_fork_share(
vm_map_t old_map,
vm_map_entry_t old_entry,
vm_map_t new_map)
{
vm_object_t object;
vm_map_entry_t new_entry;
/*
* New sharing code. New map entry
* references original object. Internal
* objects use asynchronous copy algorithm for
* future copies. First make sure we have
* the right object. If we need a shadow,
* or someone else already has one, then
* make a new shadow and share it.
*/
object = old_entry->object.vm_object;
if (old_entry->is_sub_map) {
assert(old_entry->wired_count == 0);
} else if (object == VM_OBJECT_NULL) {
object = vm_object_allocate((vm_size_t)(old_entry->vme_end -
old_entry->vme_start));
old_entry->offset = 0;
old_entry->object.vm_object = object;
assert(!old_entry->needs_copy);
} else if (object->copy_strategy !=
MEMORY_OBJECT_COPY_SYMMETRIC) {
/*
* We are already using an asymmetric
* copy, and therefore we already have
* the right object.
*/
assert(! old_entry->needs_copy);
}
else if (old_entry->needs_copy || /* case 1 */
object->shadowed || /* case 2 */
(!object->true_share && /* case 3 */
!old_entry->is_shared &&
(object->size >
(vm_size_t)(old_entry->vme_end -
old_entry->vme_start)))) {
/*
* We need to create a shadow.
* There are three cases here.
* In the first case, we need to
* complete a deferred symmetrical
* copy that we participated in.
* In the second and third cases,
* we need to create the shadow so
* that changes that we make to the
* object do not interfere with
* any symmetrical copies which
* have occured (case 2) or which
* might occur (case 3).
*
* The first case is when we had
* deferred shadow object creation
* via the entry->needs_copy mechanism.
* This mechanism only works when
* only one entry points to the source
* object, and we are about to create
* a second entry pointing to the
* same object. The problem is that
* there is no way of mapping from
* an object to the entries pointing
* to it. (Deferred shadow creation
* works with one entry because occurs
* at fault time, and we walk from the
* entry to the object when handling
* the fault.)
*
* The second case is when the object
* to be shared has already been copied
* with a symmetric copy, but we point
* directly to the object without
* needs_copy set in our entry. (This
* can happen because different ranges
* of an object can be pointed to by
* different entries. In particular,
* a single entry pointing to an object
* can be split by a call to vm_inherit,
* which, combined with task_create, can
* result in the different entries
* having different needs_copy values.)
* The shadowed flag in the object allows
* us to detect this case. The problem
* with this case is that if this object
* has or will have shadows, then we
* must not perform an asymmetric copy
* of this object, since such a copy
* allows the object to be changed, which
* will break the previous symmetrical
* copies (which rely upon the object
* not changing). In a sense, the shadowed
* flag says "don't change this object".
* We fix this by creating a shadow
* object for this object, and sharing
* that. This works because we are free
* to change the shadow object (and thus
* to use an asymmetric copy strategy);
* this is also semantically correct,
* since this object is temporary, and
* therefore a copy of the object is
* as good as the object itself. (This
* is not true for permanent objects,
* since the pager needs to see changes,
* which won't happen if the changes
* are made to a copy.)
*
* The third case is when the object
* to be shared has parts sticking
* outside of the entry we're working
* with, and thus may in the future
* be subject to a symmetrical copy.
* (This is a preemptive version of
* case 2.)
*/
assert(!(object->shadowed && old_entry->is_shared));
vm_object_shadow(&old_entry->object.vm_object,
&old_entry->offset,
(vm_size_t) (old_entry->vme_end -
old_entry->vme_start));
/*
* If we're making a shadow for other than
* copy on write reasons, then we have
* to remove write permission.
*/
if (!old_entry->needs_copy &&
(old_entry->protection & VM_PROT_WRITE)) {
pmap_protect(vm_map_pmap(old_map),
old_entry->vme_start,
old_entry->vme_end,
old_entry->protection & ~VM_PROT_WRITE);
}
old_entry->needs_copy = FALSE;
object = old_entry->object.vm_object;
}
/*
* If object was using a symmetric copy strategy,
* change its copy strategy to the default
* asymmetric copy strategy, which is copy_delay
* in the non-norma case and copy_call in the
* norma case. Bump the reference count for the
* new entry.
*/
if(old_entry->is_sub_map) {
vm_map_lock(old_entry->object.sub_map);
vm_map_reference(old_entry->object.sub_map);
vm_map_unlock(old_entry->object.sub_map);
} else {
vm_object_lock(object);
object->ref_count++;
vm_object_res_reference(object);
if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
}
vm_object_unlock(object);
}
/*
* Clone the entry, using object ref from above.
* Mark both entries as shared.
*/
new_entry = vm_map_entry_create(new_map);
vm_map_entry_copy(new_entry, old_entry);
old_entry->is_shared = TRUE;
new_entry->is_shared = TRUE;
/*
* Insert the entry into the new map -- we
* know we're inserting at the end of the new
* map.
*/
vm_map_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
/*
* Update the physical map
*/
if (old_entry->is_sub_map) {
/* Bill Angell pmap support goes here */
} else {
pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
old_entry->vme_end - old_entry->vme_start,
old_entry->vme_start);
}
}
boolean_t
vm_map_fork_copy(
vm_map_t old_map,
vm_map_entry_t *old_entry_p,
vm_map_t new_map)
{
vm_map_entry_t old_entry = *old_entry_p;
vm_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
vm_offset_t start = old_entry->vme_start;
vm_map_copy_t copy;
vm_map_entry_t last = vm_map_last_entry(new_map);
vm_map_unlock(old_map);
/*
* Use maxprot version of copyin because we
* care about whether this memory can ever
* be accessed, not just whether it's accessible
* right now.
*/
if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, ©)
!= KERN_SUCCESS) {
/*
* The map might have changed while it
* was unlocked, check it again. Skip
* any blank space or permanently
* unreadable region.
*/
vm_map_lock(old_map);
if (!vm_map_lookup_entry(old_map, start, &last) ||
last->max_protection & VM_PROT_READ ==
VM_PROT_NONE) {
last = last->vme_next;
}
*old_entry_p = last;
/*
* XXX For some error returns, want to
* XXX skip to the next element. Note
* that INVALID_ADDRESS and
* PROTECTION_FAILURE are handled above.
*/
return FALSE;
}
/*
* Insert the copy into the new map
*/
vm_map_copy_insert(new_map, last, copy);
/*
* Pick up the traversal at the end of
* the copied region.
*/
vm_map_lock(old_map);
start += entry_size;
if (! vm_map_lookup_entry(old_map, start, &last)) {
last = last->vme_next;
} else {
vm_map_clip_start(old_map, last, start);
}
*old_entry_p = last;
return TRUE;
}
/*
* vm_map_fork:
*
* Create and return a new map based on the old
* map, according to the inheritance values on the
* regions in that map.
*
* The source map must not be locked.
*/
vm_map_t
vm_map_fork(
vm_map_t old_map)
{
pmap_t new_pmap = pmap_create((vm_size_t) 0);
vm_map_t new_map;
vm_map_entry_t old_entry;
vm_size_t new_size = 0, entry_size;
vm_map_entry_t new_entry;
boolean_t src_needs_copy;
boolean_t new_entry_needs_copy;
vm_map_reference_swap(old_map);
vm_map_lock(old_map);
new_map = vm_map_create(new_pmap,
old_map->min_offset,
old_map->max_offset,
old_map->hdr.entries_pageable);
for (
old_entry = vm_map_first_entry(old_map);
old_entry != vm_map_to_entry(old_map);
) {
entry_size = old_entry->vme_end - old_entry->vme_start;
switch (old_entry->inheritance) {
case VM_INHERIT_NONE:
break;
case VM_INHERIT_SHARE:
vm_map_fork_share(old_map, old_entry, new_map);
new_size += entry_size;
break;
case VM_INHERIT_COPY:
/*
* Inline the copy_quickly case;
* upon failure, fall back on call
* to vm_map_fork_copy.
*/
if(old_entry->is_sub_map)
break;
if (old_entry->wired_count != 0) {
goto slow_vm_map_fork_copy;
}
new_entry = vm_map_entry_create(new_map);
vm_map_entry_copy(new_entry, old_entry);
if (! vm_object_copy_quickly(
&new_entry->object.vm_object,
old_entry->offset,
(old_entry->vme_end -
old_entry->vme_start),
&src_needs_copy,
&new_entry_needs_copy)) {
vm_map_entry_dispose(new_map, new_entry);
goto slow_vm_map_fork_copy;
}
/*
* Handle copy-on-write obligations
*/
if (src_needs_copy && !old_entry->needs_copy) {
vm_object_pmap_protect(
old_entry->object.vm_object,
old_entry->offset,
(old_entry->vme_end -
old_entry->vme_start),
(old_entry->is_shared ? PMAP_NULL :
old_map->pmap),
old_entry->vme_start,
old_entry->protection & ~VM_PROT_WRITE);
old_entry->needs_copy = TRUE;
}
new_entry->needs_copy = new_entry_needs_copy;
/*
* Insert the entry at the end
* of the map.
*/
vm_map_entry_link(new_map, vm_map_last_entry(new_map),
new_entry);
new_size += entry_size;
break;
slow_vm_map_fork_copy:
if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
new_size += entry_size;
}
continue;
}
old_entry = old_entry->vme_next;
}
new_map->size = new_size;
vm_map_unlock(old_map);
vm_map_deallocate(old_map);
return(new_map);
}
/*
* vm_map_lookup_locked:
*
* Finds the VM object, offset, and
* protection for a given virtual address in the
* specified map, assuming a page fault of the
* type specified.
*
* Returns the (object, offset, protection) for
* this address, whether it is wired down, and whether
* this map has the only reference to the data in question.
* In order to later verify this lookup, a "version"
* is returned.
*
* The map MUST be locked by the caller and WILL be
* locked on exit. In order to guarantee the
* existence of the returned object, it is returned
* locked.
*
* If a lookup is requested with "write protection"
* specified, the map may be changed to perform virtual
* copying operations, although the data referenced will
* remain the same.
*/
kern_return_t
vm_map_lookup_locked(
vm_map_t *var_map, /* IN/OUT */
register vm_offset_t vaddr,
register vm_prot_t fault_type,
vm_map_version_t *out_version, /* OUT */
vm_object_t *object, /* OUT */
vm_offset_t *offset, /* OUT */
vm_prot_t *out_prot, /* OUT */
boolean_t *wired, /* OUT */
int *behavior, /* OUT */
vm_offset_t *lo_offset, /* OUT */
vm_offset_t *hi_offset) /* OUT */
{
vm_map_entry_t entry;
register vm_map_t map = *var_map;
vm_map_t old_map = *var_map;
vm_offset_t old_vaddr = vaddr;
vm_map_t cow_sub_map;
vm_offset_t old_start;
vm_offset_t old_end;
register vm_prot_t prot;
RetryLookup: ;
/*
* If the map has an interesting hint, try it before calling
* full blown lookup routine.
*/
mutex_lock(&map->s_lock);
entry = map->hint;
mutex_unlock(&map->s_lock);
if ((entry == vm_map_to_entry(map)) ||
(vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
vm_map_entry_t tmp_entry;
/*
* Entry was either not a valid hint, or the vaddr
* was not contained in the entry, so do a full lookup.
*/
if (!vm_map_lookup_entry(map, vaddr, &tmp_entry))
return KERN_INVALID_ADDRESS;
entry = tmp_entry;
}
if(map == old_map) {
old_start = entry->vme_start;
old_end = entry->vme_end;
}
/*
* Handle submaps. Drop lock on upper map, submap is
* returned locked.
*/
submap_recurse:
if (entry->is_sub_map) {
vm_map_entry_t our_entry;
vm_offset_t local_vaddr;
vm_offset_t end_delta;
vm_offset_t start_delta;
vm_map_entry_t submap_entry;
boolean_t mapped_needs_copy=FALSE;
local_vaddr = vaddr;
our_entry = entry;
vm_map_lock_read(entry->object.sub_map);
vm_map_unlock_read(map);
*var_map = map = entry->object.sub_map;
/* calculate the offset in the submap for vaddr */
local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
if(entry->needs_copy) {
if(!mapped_needs_copy) {
cow_sub_map = entry->object.sub_map;
mapped_needs_copy = TRUE;
}
}
if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
return KERN_INVALID_ADDRESS;
}
/* find the attenuated shadow of the underlying object */
/* on our target map */
/* in english the submap object may extend beyond the */
/* region mapped by the entry or, may only fill a portion */
/* of it. For our purposes, we only care if the object */
/* doesn't fill. In this case the area which will */
/* ultimately be clipped in the top map will only need */
/* to be as big as the portion of the underlying entry */
/* which is mapped */
start_delta = submap_entry->vme_start > entry->offset ?
submap_entry->vme_start - entry->offset : 0;
end_delta =
(entry->offset + start_delta + (old_end - old_start)) <=
submap_entry->vme_end ?
0 : (entry->offset +
start_delta + (old_end - old_start))
- submap_entry->vme_end;
old_start += start_delta;
old_end -= end_delta;
vaddr = local_vaddr;
entry = submap_entry;
if(submap_entry->is_sub_map) {
goto submap_recurse;
}
if((fault_type & VM_PROT_WRITE) &&
!(submap_entry->protection & VM_PROT_WRITE)) {
if(mapped_needs_copy) {
vm_object_t copy_object;
vm_offset_t object_offset;
vm_offset_t local_start;
vm_offset_t local_end;
object_offset = submap_entry->offset;
/* This is the COW case, lets connect */
/* an entry in our space to the underlying */
/* object in the submap, bypassing the */
/* submap. */
/* set up shadow object */
copy_object =
vm_object_copy_delayed(
submap_entry->object.vm_object,
submap_entry->offset,
old_end-old_start);
/* This case works opposite of the */
/* normal submap case. We go back */
/* to the top of the map tree and */
/* clip out the target portion of */
/* the sub_map, substituting the */
/* new copy object, we do this for */
/* all maps in the tree, down to */
/* the COW one. */
vm_map_unlock_read(map);
vm_map_lock(old_map);
local_start = old_start;
local_end = old_end;
local_vaddr = vaddr = old_vaddr;
map = old_map;
while(TRUE) {
vm_map_t new_map;
if(!vm_map_lookup_entry(map,
local_vaddr, &entry)) {
vm_object_deallocate(
copy_object);
vm_map_unlock(map);
vm_map_lock_read(*var_map);
return KERN_INVALID_ADDRESS;
}
/* clip out the portion of space */
/* mapped by the sub map which */
/* corresponds to the underlying */
/* object */
vm_map_clip_start(map, entry,
local_start);
vm_map_clip_end(map, entry,
local_end);
new_map = entry->object.sub_map;
/* update local start and end */
/* in possible underlying submap */
local_start = entry->offset;
local_end = (entry->vme_end -
entry->vme_start) +
entry->offset;
local_vaddr -= entry->vme_start;
local_vaddr += entry->offset;
/* substitute copy object for */
/* shared map entry */
vm_map_deallocate(
entry->object.sub_map);
entry->is_sub_map = FALSE;
vm_object_reference(copy_object);
entry->object.vm_object = copy_object;
entry->protection |= VM_PROT_WRITE;
entry->max_protection |= VM_PROT_WRITE;
entry->needs_copy = FALSE;
if(entry->inheritance
== VM_INHERIT_SHARE)
entry->inheritance = VM_INHERIT_COPY;
entry->offset = object_offset;
if (map != old_map)
entry->is_shared = TRUE;
vm_map_unlock(map);
if(new_map == cow_sub_map)
break;
map = new_map;
vm_map_lock(map);
}
map = old_map;
vm_map_lock_read(map);
vaddr = old_vaddr;
/* get rid of extra reference */
vm_object_deallocate(copy_object);
if(!vm_map_lookup_entry(map, vaddr, &entry)) {
return KERN_INVALID_ADDRESS;
}
} else {
/* write fault! */
return KERN_INVALID_ADDRESS;
}
}
vm_map_lock_read(old_map);
vm_map_unlock_read(map);
*var_map = map = old_map;
}
/*
* Check whether this task is allowed to have
* this page.
*/
prot = entry->protection;
if ((fault_type & (prot)) != fault_type)
return KERN_PROTECTION_FAILURE;
/*
* If this page is not pageable, we have to get
* it for all possible accesses.
*/
if (*wired = (entry->wired_count != 0))
prot = fault_type = entry->protection;
/*
* If the entry was copy-on-write, we either ...
*/
if (entry->needs_copy) {
/*
* If we want to write the page, we may as well
* handle that now since we've got the map locked.
*
* If we don't need to write the page, we just
* demote the permissions allowed.
*/
if (fault_type & VM_PROT_WRITE || *wired) {
/*
* Make a new object, and place it in the
* object chain. Note that no new references
* have appeared -- one just moved from the
* map to the new object.
*/
if (vm_map_lock_read_to_write(map)) {
vm_map_lock_read(map);
goto RetryLookup;
}
vm_object_shadow(&entry->object.vm_object,
&entry->offset,
(vm_size_t) (entry->vme_end -
entry->vme_start));
entry->needs_copy = FALSE;
vm_map_lock_write_to_read(map);
}
else {
/*
* We're attempting to read a copy-on-write
* page -- don't allow writes.
*/
prot &= (~VM_PROT_WRITE);
}
}
/*
* Create an object if necessary.
*/
if (entry->object.vm_object == VM_OBJECT_NULL) {
if (vm_map_lock_read_to_write(map)) {
vm_map_lock_read(map);
goto RetryLookup;
}
entry->object.vm_object = vm_object_allocate(
(vm_size_t)(entry->vme_end - entry->vme_start));
entry->offset = 0;
vm_map_lock_write_to_read(map);
}
/*
* Return the object/offset from this entry. If the entry
* was copy-on-write or empty, it has been fixed up. Also
* return the protection.
*/
*offset = (vaddr - entry->vme_start) + entry->offset;
*object = entry->object.vm_object;
*out_prot = prot;
*behavior = entry->behavior;
*lo_offset = entry->offset;
*hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
/*
* Lock the object to prevent it from disappearing
*/
vm_object_lock(*object);
/*
* Save the version number
*/
out_version->main_timestamp = map->timestamp;
return KERN_SUCCESS;
}
/*
* vm_map_verify:
*
* Verifies that the map in question has not changed
* since the given version. If successful, the map
* will not change until vm_map_verify_done() is called.
*/
boolean_t
vm_map_verify(
register vm_map_t map,
register vm_map_version_t *version) /* REF */
{
boolean_t result;
vm_map_lock_read(map);
result = (map->timestamp == version->main_timestamp);
if (!result)
vm_map_unlock_read(map);
return(result);
}
/*
* vm_map_verify_done:
*
* Releases locks acquired by a vm_map_verify.
*
* This is now a macro in vm/vm_map.h. It does a
* vm_map_unlock_read on the map.
*/
/*
* vm_region:
*
* User call to obtain information about a region in
* a task's address map. Currently, only one flavor is
* supported.
*
* XXX The reserved and behavior fields cannot be filled
* in until the vm merge from the IK is completed, and
* vm_reserve is implemented.
*
* XXX Dependency: syscall_vm_region() also supports only one flavor.
*/
kern_return_t
vm_region(
vm_map_t map,
vm_offset_t *address, /* IN/OUT */
vm_size_t *size, /* OUT */
vm_region_flavor_t flavor, /* IN */
vm_region_info_t info, /* OUT */
mach_msg_type_number_t *count, /* IN/OUT */
ipc_port_t *object_name) /* OUT */
{
vm_map_entry_t tmp_entry;
register
vm_map_entry_t entry;
register
vm_offset_t start;
vm_region_basic_info_t basic;
vm_region_extended_info_t extended;
vm_region_top_info_t top;
if (map == VM_MAP_NULL)
return(KERN_INVALID_ARGUMENT);
switch (flavor) {
case VM_REGION_BASIC_INFO:
{
if (*count < VM_REGION_BASIC_INFO_COUNT)
return(KERN_INVALID_ARGUMENT);
basic = (vm_region_basic_info_t) info;
*count = VM_REGION_BASIC_INFO_COUNT;
vm_map_lock_read(map);
start = *address;
if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
vm_map_unlock_read(map);
return(KERN_INVALID_ADDRESS);
}
} else {
entry = tmp_entry;
}
start = entry->vme_start;
basic->offset = entry->offset;
basic->protection = entry->protection;
basic->inheritance = entry->inheritance;
basic->max_protection = entry->max_protection;
basic->behavior = entry->behavior;
basic->user_wired_count = entry->user_wired_count;
basic->reserved = FALSE; /* XXX when vm_reserve fini */
*address = start;
*size = (entry->vme_end - start);
if (object_name) *object_name = IP_NULL;
if (entry->is_sub_map) {
basic->shared = FALSE;
} else {
basic->shared = entry->is_shared;
}
vm_map_unlock_read(map);
return(KERN_SUCCESS);
}
case VM_REGION_EXTENDED_INFO:
{ void vm_region_walk();
if (*count < VM_REGION_EXTENDED_INFO_COUNT)
return(KERN_INVALID_ARGUMENT);
extended = (vm_region_extended_info_t) info;
*count = VM_REGION_EXTENDED_INFO_COUNT;
vm_map_lock_read(map);
start = *address;
if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
vm_map_unlock_read(map);
return(KERN_INVALID_ADDRESS);
}
} else {
entry = tmp_entry;
}
start = entry->vme_start;
extended->protection = entry->protection;
extended->user_tag = entry->alias;
extended->pages_resident = 0;
extended->pages_swapped_out = 0;
extended->pages_shared_now_private = 0;
extended->pages_referenced = 0;
extended->external_pager = 0;
extended->shadow_depth = 0;
vm_region_walk(entry, extended, entry->offset, entry->vme_end - start, map, start);
if (object_name)
*object_name = IP_NULL;
*address = start;
*size = (entry->vme_end - start);
vm_map_unlock_read(map);
return(KERN_SUCCESS);
}
case VM_REGION_TOP_INFO:
{ void vm_region_top_walk();
if (*count < VM_REGION_TOP_INFO_COUNT)
return(KERN_INVALID_ARGUMENT);
top = (vm_region_top_info_t) info;
*count = VM_REGION_TOP_INFO_COUNT;
vm_map_lock_read(map);
start = *address;
if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
vm_map_unlock_read(map);
return(KERN_INVALID_ADDRESS);
}
} else {
entry = tmp_entry;
}
start = entry->vme_start;
top->private_pages_resident = 0;
top->shared_pages_resident = 0;
vm_region_top_walk(entry, top);
if (object_name)
*object_name = IP_NULL;
*address = start;
*size = (entry->vme_end - start);
vm_map_unlock_read(map);
return(KERN_SUCCESS);
}
default:
return(KERN_INVALID_ARGUMENT);
}
}
void
vm_region_top_walk(
vm_map_entry_t entry,
vm_region_top_info_t top)
{
register struct vm_object *obj, *tmp_obj;
if (entry->object.vm_object == 0) {
top->share_mode = SM_EMPTY;
top->ref_count = 0;
top->obj_id = 0;
return;
}
if (entry->is_sub_map)
vm_region_top_walk((vm_map_entry_t)entry->object.sub_map, top);
else {
obj = entry->object.vm_object;
vm_object_lock(obj);
if (obj->shadow) {
if (obj->ref_count == 1)
top->private_pages_resident = obj->resident_page_count;
else
top->shared_pages_resident = obj->resident_page_count;
top->ref_count = obj->ref_count;
top->share_mode = SM_COW;
while (tmp_obj = obj->shadow) {
vm_object_lock(tmp_obj);
vm_object_unlock(obj);
obj = tmp_obj;
top->shared_pages_resident += obj->resident_page_count;
top->ref_count += obj->resident_page_count - 1;
}
} else {
if (entry->needs_copy) {
top->share_mode = SM_COW;
top->shared_pages_resident = obj->resident_page_count;
} else {
if (obj->ref_count == 1) {
top->share_mode = SM_PRIVATE;
top->private_pages_resident = obj->resident_page_count;
} else {
top->share_mode = SM_SHARED;
top->shared_pages_resident = obj->resident_page_count;
}
}
top->ref_count = obj->ref_count;
}
top->obj_id = (int)obj;
vm_object_unlock(obj);
}
}
void
vm_region_walk(
vm_map_entry_t entry,
vm_region_extended_info_t extended,
vm_offset_t offset,
vm_offset_t range,
vm_map_t map,
vm_offset_t va)
{
register struct vm_object *obj, *tmp_obj;
register vm_offset_t last_offset;
register int i;
void vm_region_look_for_page();
if (entry->object.vm_object == 0) {
extended->share_mode = SM_EMPTY;
extended->ref_count = 0;
return;
}
if (entry->is_sub_map)
vm_region_walk((vm_map_entry_t)entry->object.sub_map, extended, offset + entry->offset,
range, map, va);
else {
obj = entry->object.vm_object;
vm_object_lock(obj);
for (last_offset = offset + range; offset < last_offset; offset += PAGE_SIZE, va += PAGE_SIZE)
vm_region_look_for_page(obj, extended, offset, obj->ref_count, 0, map, va);
if (extended->shadow_depth || entry->needs_copy)
extended->share_mode = SM_COW;
else {
if (obj->ref_count == 1)
extended->share_mode = SM_PRIVATE;
else {
if (obj->true_share)
extended->share_mode = SM_TRUESHARED;
else
extended->share_mode = SM_SHARED;
}
}
extended->ref_count = obj->ref_count - extended->shadow_depth;
for (i = 0; i < extended->shadow_depth; i++) {
if ((tmp_obj = obj->shadow) == 0)
break;
vm_object_lock(tmp_obj);
vm_object_unlock(obj);
extended->ref_count += tmp_obj->ref_count;
obj = tmp_obj;
}
vm_object_unlock(obj);
if (extended->share_mode == SM_SHARED) {
register vm_map_entry_t cur;
register vm_map_entry_t last;
int my_refs;
obj = entry->object.vm_object;
last = vm_map_to_entry(map);
my_refs = 0;
for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
my_refs += vm_region_count_obj_refs(cur, obj);
if (my_refs == obj->ref_count)
extended->share_mode = SM_PRIVATE_ALIASED;
else if (my_refs > 1)
extended->share_mode = SM_SHARED_ALIASED;
}
}
}
void
vm_region_look_for_page(
vm_object_t object,
vm_region_extended_info_t extended,
vm_offset_t offset,
int max_refcnt,
int depth,
vm_map_t map,
vm_offset_t va)
{
register vm_page_t p;
register vm_object_t shadow;
shadow = object->shadow;
queue_iterate(&object->memq, p, vm_page_t, listq) {
if (p->offset == offset) {
if (shadow && (max_refcnt == 1))
extended->pages_shared_now_private++;
if (pmap_extract(vm_map_pmap(map), va))
extended->pages_referenced++;
extended->pages_resident++;
return;
}
}
if (object->existence_map) {
if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
if (shadow && (max_refcnt == 1))
extended->pages_shared_now_private++;
extended->pages_swapped_out++;
return;
}
}
if (shadow) {
vm_object_lock(shadow);
if (++depth > extended->shadow_depth)
extended->shadow_depth = depth;
if (shadow->ref_count > max_refcnt)
max_refcnt = shadow->ref_count;
vm_region_look_for_page(shadow, extended, offset + object->shadow_offset,
max_refcnt, depth, map, va);
vm_object_unlock(shadow);
return;
}
if ( !(object->pager_trusted) && !(object->internal))
extended->external_pager = 1;
}
vm_region_count_obj_refs(
vm_map_entry_t entry,
vm_object_t object)
{
register int ref_count;
register vm_object_t chk_obj;
register vm_object_t tmp_obj;
if (entry->object.vm_object == 0)
return(0);
if (entry->is_sub_map)
ref_count = vm_region_count_obj_refs((vm_map_entry_t)entry->object.sub_map, object);
else {
ref_count = 0;
chk_obj = entry->object.vm_object;
vm_object_lock(chk_obj);
while (chk_obj) {
if (chk_obj == object)
ref_count++;
if (tmp_obj = chk_obj->shadow)
vm_object_lock(tmp_obj);
vm_object_unlock(chk_obj);
chk_obj = tmp_obj;
}
}
return(ref_count);
}
/*
* Routine: vm_map_simplify
*
* Description:
* Attempt to simplify the map representation in
* the vicinity of the given starting address.
* Note:
* This routine is intended primarily to keep the
* kernel maps more compact -- they generally don't
* benefit from the "expand a map entry" technology
* at allocation time because the adjacent entry
* is often wired down.
*/
void
vm_map_simplify(
vm_map_t map,
vm_offset_t start)
{
vm_map_entry_t this_entry;
vm_map_entry_t prev_entry;
vm_map_entry_t next_entry;
vm_map_lock(map);
if (
(vm_map_lookup_entry(map, start, &this_entry)) &&
((prev_entry = this_entry->vme_prev) != vm_map_to_entry(map)) &&
(prev_entry->vme_end == this_entry->vme_start) &&
(prev_entry->is_shared == FALSE) &&
(prev_entry->is_sub_map == FALSE) &&
(this_entry->is_shared == FALSE) &&
(this_entry->is_sub_map == FALSE) &&
(prev_entry->inheritance == this_entry->inheritance) &&
(prev_entry->protection == this_entry->protection) &&
(prev_entry->max_protection == this_entry->max_protection) &&
(prev_entry->behavior == this_entry->behavior) &&
(prev_entry->wired_count == this_entry->wired_count) &&
(prev_entry->user_wired_count == this_entry->user_wired_count)&&
(prev_entry->in_transition == FALSE) &&
(this_entry->in_transition == FALSE) &&
(prev_entry->needs_copy == this_entry->needs_copy) &&
(prev_entry->object.vm_object == this_entry->object.vm_object)&&
((prev_entry->offset +
(prev_entry->vme_end - prev_entry->vme_start))
== this_entry->offset)
) {
SAVE_HINT(map, prev_entry);
vm_map_entry_unlink(map, this_entry);
prev_entry->vme_end = this_entry->vme_end;
UPDATE_FIRST_FREE(map, map->first_free);
vm_object_deallocate(this_entry->object.vm_object);
vm_map_entry_dispose(map, this_entry);
counter(c_vm_map_simplified_lower++);
}
if (
(vm_map_lookup_entry(map, start, &this_entry)) &&
((next_entry = this_entry->vme_next) != vm_map_to_entry(map)) &&
(next_entry->vme_start == this_entry->vme_end) &&
(next_entry->is_shared == FALSE) &&
(next_entry->is_sub_map == FALSE) &&
(next_entry->is_shared == FALSE) &&
(next_entry->is_sub_map == FALSE) &&
(next_entry->inheritance == this_entry->inheritance) &&
(next_entry->protection == this_entry->protection) &&
(next_entry->max_protection == this_entry->max_protection) &&
(next_entry->behavior == this_entry->behavior) &&
(next_entry->wired_count == this_entry->wired_count) &&
(next_entry->user_wired_count == this_entry->user_wired_count)&&
(this_entry->in_transition == FALSE) &&
(next_entry->in_transition == FALSE) &&
(next_entry->needs_copy == this_entry->needs_copy) &&
(next_entry->object.vm_object == this_entry->object.vm_object)&&
((this_entry->offset +
(this_entry->vme_end - this_entry->vme_start))
== next_entry->offset)
) {
vm_map_entry_unlink(map, next_entry);
this_entry->vme_end = next_entry->vme_end;
UPDATE_FIRST_FREE(map, map->first_free);
vm_object_deallocate(next_entry->object.vm_object);
vm_map_entry_dispose(map, next_entry);
counter(c_vm_map_simplified_upper++);
}
counter(c_vm_map_simplify_called++);
vm_map_unlock(map);
}
/*
* Routine: vm_map_machine_attribute
* Purpose:
* Provide machine-specific attributes to mappings,
* such as cachability etc. for machines that provide
* them. NUMA architectures and machines with big/strange
* caches will use this.
* Note:
* Responsibilities for locking and checking are handled here,
* everything else in the pmap module. If any non-volatile
* information must be kept, the pmap module should handle
* it itself. [This assumes that attributes do not
* need to be inherited, which seems ok to me]
*/
kern_return_t
vm_map_machine_attribute(
vm_map_t map,
vm_offset_t address,
vm_size_t size,
vm_machine_attribute_t attribute,
vm_machine_attribute_val_t* value) /* IN/OUT */
{
kern_return_t ret;
if (address < vm_map_min(map) ||
(address + size) > vm_map_max(map))
return KERN_INVALID_ADDRESS;
vm_map_lock(map);
ret = pmap_attribute(map->pmap, address, size, attribute, value);
vm_map_unlock(map);
return ret;
}
/*
* vm_map_behavior_set:
*
* Sets the paging reference behavior of the specified address
* range in the target map. Paging reference behavior affects
* how pagein operations resulting from faults on the map will be
* clustered.
*/
kern_return_t
vm_map_behavior_set(
vm_map_t map,
vm_offset_t start,
vm_offset_t end,
vm_behavior_t new_behavior)
{
register vm_map_entry_t entry;
vm_map_entry_t temp_entry;
XPR(XPR_VM_MAP,
"vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
(integer_t)map, start, end, new_behavior, 0);
switch (new_behavior) {
case VM_BEHAVIOR_DEFAULT:
case VM_BEHAVIOR_RANDOM:
case VM_BEHAVIOR_SEQUENTIAL:
case VM_BEHAVIOR_RSEQNTL:
break;
default:
return(KERN_INVALID_ARGUMENT);
}
vm_map_lock(map);
/*
* The entire address range must be valid for the map.
* Note that vm_map_range_check() does a
* vm_map_lookup_entry() internally and returns the
* entry containing the start of the address range if
* the entire range is valid.
*/
if (vm_map_range_check(map, start, end, &temp_entry)) {
entry = temp_entry;
vm_map_clip_start(map, entry, start);
}
else {
vm_map_unlock(map);
return(KERN_INVALID_ADDRESS);
}
while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
vm_map_clip_end(map, entry, end);
entry->behavior = new_behavior;
entry = entry->vme_next;
}
vm_map_unlock(map);
return(KERN_SUCCESS);
}
#if DIPC
/*
* This should one day be eliminated;
* we should always construct the right flavor of copy object
* the first time. Troublesome areas include vm_read, where vm_map_copyin
* is called without knowing whom the copy object is for.
* There are also situations where we do want a lazy data structure
* even if we are sending to a remote port...
*/
extern kern_return_t vm_map_object_to_page_list(
vm_map_copy_t *caller_copy);
/*
* Convert a copy to a page list. The copy argument is in/out
* because we probably have to allocate a new vm_map_copy structure.
* We take responsibility for discarding the old structure and
* use a continuation to do so. Postponing this discard ensures
* that the objects containing the pages we've marked busy will stick
* around.
*
* N.B. For the entry list case, be warned that this routine steals
* the pages from the entry list's objects!
*/
kern_return_t
vm_map_convert_to_page_list(
vm_map_copy_t *caller_copy)
{
vm_map_entry_t entry;
vm_offset_t va;
vm_offset_t offset;
vm_object_t object;
vm_map_copy_t copy, new_copy;
vm_size_t vm_copy_size;
vm_prot_t result_prot;
vm_page_t m, top_page;
kern_return_t result;
kern_return_t kr;
copy = *caller_copy;
/*
* We may not have to do anything,
* or may not be able to do anything.
*/
if (copy == VM_MAP_COPY_NULL || copy->type == VM_MAP_COPY_PAGE_LIST) {
return KERN_SUCCESS;
}
if (copy->type == VM_MAP_COPY_OBJECT) {
return vm_map_object_to_page_list(caller_copy);
}
assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
/*
* Check size. If this is really big, copy it out
* to the kernel map, and copy in using src_destroy
*/
vm_copy_size = round_page(copy->offset + copy->size) -
trunc_page(copy->offset);
if (vm_copy_size > (VM_MAP_COPY_PAGE_LIST_MAX * PAGE_SIZE)) {
vm_size_t copy_size;
offset = copy->offset;
copy_size = copy->size;
result = vm_map_copyout(kernel_map, &va, copy);
assert(result == KERN_SUCCESS);
va += offset - trunc_page(offset);
result = vm_map_copyin_page_list(kernel_map, va, copy_size,
(VM_MAP_COPYIN_OPT_SRC_DESTROY|
VM_MAP_COPYIN_OPT_STEAL_PAGES|
VM_PROT_READ),
caller_copy, FALSE);
return(result);
}
/*
* Allocate the new copy. Set its continuation to
* discard the old one.
*/
new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
new_copy->type = VM_MAP_COPY_PAGE_LIST;
new_copy->cpy_npages = 0;
new_copy->cpy_page_loose = FALSE;
new_copy->offset = copy->offset;
new_copy->size = copy->size;
new_copy->cpy_cont = vm_map_copy_discard_cont;
new_copy->cpy_cont_args = (vm_map_copyin_args_t) copy;
/*
* Iterate over entries.
*/
for (entry = vm_map_copy_first_entry(copy);
entry != vm_map_copy_to_entry(copy);
entry = entry->vme_next) {
object = entry->object.vm_object;
offset = entry->offset;
/*
* Iterate over pages.
*/
for (va = entry->vme_start;
va < entry->vme_end;
va += PAGE_SIZE, offset += PAGE_SIZE) {
assert(new_copy->cpy_npages !=
VM_MAP_COPY_PAGE_LIST_MAX);
/*
* If the object is null, this is
* zero fill data.
*/
if (object == VM_OBJECT_NULL) {
while ((m = vm_page_grab()) == VM_PAGE_NULL) {
VM_PAGE_WAIT();
}
vm_page_zero_fill(m);
vm_page_gobble(m);
new_copy->cpy_page_list[
new_copy->cpy_npages++] = m;
continue;
}
/*
* Try to find the page of data.
*/
vm_object_lock(object);
if (((m = vm_page_lookup(object, offset)) !=
VM_PAGE_NULL) && !m->busy && !m->fictitious &&
!m->unusual) {
/*
* This is the page. Mark it busy.
* Remove it from its old object.
*/
m->busy = TRUE;
vm_page_lock_queues();
VM_PAGE_QUEUES_REMOVE(m);
vm_page_remove(m);
vm_page_unlock_queues();
vm_object_unlock(object);
new_copy->cpy_page_list[new_copy->cpy_npages++]
= m;
continue;
}
retry:
result_prot = VM_PROT_READ;
vm_object_paging_begin(object);
XPR(XPR_VM_FAULT,
"vm_map_convert_to_page_list -> vm_fault_page\n",
0,0,0,0,0);
kr = vm_fault_page(object, offset,
VM_PROT_READ, FALSE, THREAD_UNINT,
entry->offset,
entry->offset +
(entry->vme_end -
entry->vme_start),
VM_BEHAVIOR_SEQUENTIAL,
&result_prot, &m, &top_page,
(int *)0,
FALSE, FALSE, FALSE);
if (kr == VM_FAULT_MEMORY_SHORTAGE) {
VM_PAGE_WAIT();
vm_object_lock(object);
goto retry;
}
if (kr == VM_FAULT_FICTITIOUS_SHORTAGE) {
vm_page_more_fictitious();
vm_object_lock(object);
goto retry;
}
if (kr != VM_FAULT_SUCCESS) {
/* XXX what about data_error? */
vm_object_lock(object);
goto retry;
}
assert(m != VM_PAGE_NULL);
m->busy = TRUE;
vm_page_lock_queues();
VM_PAGE_QUEUES_REMOVE(m);
vm_page_remove(m);
vm_page_unlock_queues();
vm_object_paging_end(object);
vm_object_unlock(object);
new_copy->cpy_page_list[new_copy->cpy_npages++] = m;
if (top_page != VM_PAGE_NULL) {
assert(top_page->object == object);
vm_object_lock(object);
VM_PAGE_FREE(top_page);
vm_object_paging_end(object);
vm_object_unlock(object);
}
}
}
*caller_copy = new_copy;
return KERN_SUCCESS;
}
/*
* Continue converting pages from an object-flavor
* copy object into a page-list flavor copy object.
* The parameters in the copy object were updated
* by the last invocation of vm_map_object_to_page_list
* so there's no work to be done other than calling
* back into that routine. Note that the contents
* of the underlying object itself never change
* during the conversion process.
*/
kern_return_t
vm_map_object_to_page_list_cont(
vm_map_copy_t cont_args,
vm_map_copy_t *copy_result) /* OUT */
{
boolean_t abort;
abort = (copy_result == (vm_map_copy_t *) 0);
if (abort) {
printf("vm_map_object_to_page_list_cont: abort\n");
vm_map_copy_discard(cont_args);
}
assert(cont_args->type == VM_MAP_COPY_OBJECT);
*copy_result = cont_args;
return vm_map_object_to_page_list(copy_result);
}
/*
* Convert a copy object from object flavor to
* page list flavor. For objects containing more
* than the allowable maximum number of pages, we
* perform the conversion a piece at a time.
*
* The object flavor copy object contains a vm_object_t
* that has all the pages; the type, offset and size
* fields always present in a vm_map_copy_t; and an
* index field unique to the object flavor. This index
* field MUST be zero when first calling this routine.
* It is used to record progress converting pages from
* the object to the page list.
*
* Assumptions:
* - This routine always returns KERN_SUCCESS.
*/
kern_return_t
vm_map_object_to_page_list(
vm_map_copy_t *caller_copy)
{
vm_object_t object;
vm_offset_t offset;
vm_map_copy_t copy, new_copy;
vm_size_t index;
vm_size_t copy_size;
vm_page_t m;
vm_prot_t result_prot;
vm_page_t top_page;
kern_return_t kr;
assert(caller_copy != (vm_map_copy_t *) 0);
assert(*caller_copy != VM_MAP_COPY_NULL);
assert((*caller_copy)->type == VM_MAP_COPY_OBJECT);
copy = *caller_copy;
object = copy->cpy_object;
/*
* Allocate the new copy. Set its continuation to
* discard the old one.
*/
new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
new_copy->type = VM_MAP_COPY_PAGE_LIST;
new_copy->cpy_npages = 0;
new_copy->cpy_page_loose = FALSE;
new_copy->offset = copy->offset - trunc_page(copy->offset);
new_copy->size = copy->size;
assert((long)new_copy->size >= 0);
if (new_copy->size == 0)
printf("vm_object_to_page_list: zero size\n");
new_copy->cpy_cont = vm_map_copy_discard_cont;
new_copy->cpy_cont_args = (vm_map_copyin_args_t) copy;
/*
* Compute range of object to extract.
*/
index = (vm_size_t)copy->cpy_index; /* XXX!XXX! */
assert(copy->offset < page_size);
copy_size = round_page(copy->offset + copy->size);
assert(page_aligned(copy_size));
/*
* Detect an object larger than the maximum permitted
* for an individual page list block. Save the rest
* for later.
*/
if (copy_size > VM_MAP_COPY_PAGE_LIST_MAX_SIZE) {
/*
* Only the first copy object in the chain
* has the unaligned offset and total size.
* Each succeeding copy object has an offset
* of zero and a size decreased by the amount
* contained in the previous copy object.
* These values will be used when creating the
* next page list copy object.
*
* Reset the continuation because there are
* more pages left to extract.
*/
assert(trunc_page(copy->offset) == 0);
copy->size = copy->size -
(VM_MAP_COPY_PAGE_LIST_MAX_SIZE - copy->offset);
copy->offset = 0;
copy_size = VM_MAP_COPY_PAGE_LIST_MAX_SIZE;
new_copy->cpy_cont = (vm_map_copy_cont_t)
vm_map_object_to_page_list_cont;
assert(vm_map_copy_cont_is_valid(new_copy));
}
/*
* Fault in all pages that aren't present.
* Modelled on vm_map_copyin_page_list.
*/
for (offset = index; offset < index + copy_size; offset += PAGE_SIZE) {
vm_object_lock(object);
vm_object_paging_begin(object);
m = vm_page_lookup(object, offset);
if ((m != VM_PAGE_NULL) && !m->busy && !m->fictitious &&
!m->unusual) {
m->busy = TRUE;
} else {
retry:
result_prot = VM_PROT_READ;
XPR(XPR_VM_FAULT,
"vm_object_to_page_list -> vm_fault_page\n",
0,0,0,0,0);
kr = vm_fault_page(object, offset,
VM_PROT_READ, FALSE, THREAD_UNINT,
offset, index + copy_size,/* XXX ? */
VM_BEHAVIOR_SEQUENTIAL,
&result_prot, &m, &top_page,
(int *)0,
0, FALSE, FALSE);
switch (kr) {
case VM_FAULT_SUCCESS:
break;
case VM_FAULT_MEMORY_SHORTAGE:
VM_PAGE_WAIT();
vm_object_lock(object);
vm_object_paging_begin(object);
goto retry;
case VM_FAULT_FICTITIOUS_SHORTAGE:
vm_page_more_fictitious();
/* fall through... */
case VM_FAULT_INTERRUPTED:
case VM_FAULT_RETRY:
vm_object_lock(object);
vm_object_paging_begin(object);
goto retry;
case VM_FAULT_MEMORY_ERROR:
panic("vm_map_object_to_page_list");
break;
}
if (top_page != VM_PAGE_NULL) {
VM_PAGE_FREE(top_page);
vm_object_paging_end(object);
}
}
assert(m);
assert(m->busy);
/* assert(m->wire_count == 0); XXX */
/*
* Got the page. Save it in the page list
* and rip it away from the object.
*/
new_copy->cpy_page_list[new_copy->cpy_npages++] = m;
vm_page_lock_queues();
VM_PAGE_QUEUES_REMOVE(m);
vm_page_remove(m);
vm_page_unlock_queues();
vm_object_paging_end(object);
vm_object_unlock(object);
}
/*
* Update index for next pass through the object.
*/
copy->cpy_index += copy_size;
*caller_copy = new_copy;
assert(vm_map_copy_cont_is_valid(new_copy));
return (KERN_SUCCESS);
}
/*
* When allocating a new entry, vm_map_entry_list_from_object must
* use the pageable v. non-pageable entry zone based on the
* attribute of the map into which the entry will be pasted.
* This information must be supplied by the caller, as there is
* no way to obtain it from the object.
*
* N.B. Caller donates a reference.
*/
vm_map_copy_t
vm_map_entry_list_from_object(
vm_object_t object,
vm_offset_t offset,
vm_size_t size,
boolean_t pageable)
{
vm_map_entry_t new_entry;
vm_map_copy_t copy;
assert(object != VM_OBJECT_NULL);
assert(size != 0);
copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
assert(copy != VM_MAP_COPY_NULL);
copy->type = VM_MAP_COPY_ENTRY_LIST;
vm_map_copy_first_entry(copy) =
vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
copy->cpy_hdr.nentries = 0;
copy->cpy_hdr.entries_pageable = pageable;
copy->offset = offset;
copy->size = size;
/*
* Allocate and initialize an entry for the object.
*/
new_entry = vm_map_copy_entry_create(copy);
new_entry->vme_start = trunc_page(copy->offset);
new_entry->vme_end = round_page(copy->offset + copy->size);
new_entry->object.vm_object = object;
new_entry->offset = offset;
new_entry->is_shared = FALSE;
new_entry->is_sub_map = FALSE;
new_entry->needs_copy = FALSE;
new_entry->protection = VM_PROT_DEFAULT;
new_entry->max_protection = VM_PROT_ALL;
new_entry->inheritance = VM_INHERIT_DEFAULT;
new_entry->wired_count = 0;
new_entry->user_wired_count = 0;
new_entry->in_transition = FALSE;
/*
* Insert entry into copy object, and return.
*/
vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), new_entry);
assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
return (copy);
}
/*
* Convert a page list copy object to an entry list
* flavor copy object.
*/
kern_return_t
vm_map_convert_to_entry_list(
vm_map_copy_t copy,
boolean_t pageable)
{
vm_object_t object;
int i;
vm_map_entry_t new_entry;
vm_page_t *page_list;
vm_map_copy_t cur_copy, new_copy;
vm_offset_t offset;
kern_return_t result;
/*
* Check type of copy object.
*/
if (copy->type != VM_MAP_COPY_PAGE_LIST) {
panic("vm_map_convert_to_entry_list 0x%x %d", copy, copy->type);
}
/*
* Insert all the pages into a new object.
*/
object = vm_object_allocate(round_page(copy->offset + copy->size) -
trunc_page(copy->offset));
offset = (vm_offset_t) 0;
cur_copy = copy;
while (cur_copy) {
/*
* Make sure the pages are loose. This may be
* a "Can't Happen", but just to be safe ...
*/
page_list = &cur_copy->cpy_page_list[0];
if (!copy->cpy_page_loose)
vm_map_copy_steal_pages(cur_copy);
/*
* Stuff this set of pages into the object, removing
* them from the page list.
*/
vm_object_lock(object);
vm_page_lock_queues();
for (i = 0; i < cur_copy->cpy_npages;
i++, offset += PAGE_SIZE) {
register vm_page_t m = *page_list;
vm_page_insert(m, object, offset);
m->busy = FALSE;
m->dirty = TRUE;
vm_page_activate(m);
*page_list++ = VM_PAGE_NULL;
}
vm_page_unlock_queues();
vm_object_unlock(object);
/*
* Invoke continuation if present.
*/
if (vm_map_copy_has_cont(cur_copy)) {
vm_map_copy_invoke_cont(cur_copy, &new_copy, &result);
if (result != KERN_SUCCESS)
panic("%s: %s",
"vm_map_convert_to_entry_list",
"continuation failure");
} else {
new_copy = VM_MAP_COPY_NULL;
}
if (cur_copy != copy)
vm_map_copy_discard(cur_copy);
cur_copy = new_copy;
}
/*
* Change type of copy object
*/
vm_map_copy_first_entry(copy) =
vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
copy->type = VM_MAP_COPY_ENTRY_LIST;
copy->cpy_hdr.nentries = 0;
copy->cpy_hdr.entries_pageable = pageable;
/*
* Allocate and initialize an entry for object
*/
new_entry = vm_map_copy_entry_create(copy);
new_entry->vme_start = trunc_page(copy->offset);
new_entry->vme_end = round_page(copy->offset + copy->size);
new_entry->object.vm_object = object;
new_entry->offset = 0;
new_entry->is_shared = FALSE;
new_entry->is_sub_map = FALSE;
new_entry->needs_copy = FALSE;
new_entry->protection = VM_PROT_DEFAULT;
new_entry->max_protection = VM_PROT_ALL;
new_entry->behavior = VM_BEHAVIOR_DEFAULT;
new_entry->inheritance = VM_INHERIT_DEFAULT;
new_entry->wired_count = 0;
new_entry->user_wired_count = 0;
/*
* Insert entry into copy object, and return.
*/
vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), new_entry);
return(KERN_SUCCESS);
}
#endif /* DIPC */
int
vm_map_copy_cont_is_valid(
vm_map_copy_t copy)
{
vm_map_copy_cont_t cont;
assert(copy->type == VM_MAP_COPY_PAGE_LIST);
cont = copy->cpy_cont;
if (
cont != vm_map_copy_discard_cont &&
cont != vm_map_copyin_page_list_cont ) {
printf("vm_map_copy_cont_is_valid: bogus cont 0x%x\n", cont);
assert((integer_t) cont == 0xdeadbeef);
}
return 1;
}
#include <mach_kdb.h>
#if MACH_KDB
#include <ddb/db_output.h>
#include <vm/vm_print.h>
#define printf db_printf
/*
* Forward declarations for internal functions.
*/
extern void vm_map_links_print(
struct vm_map_links *links);
extern void vm_map_header_print(
struct vm_map_header *header);
extern void vm_map_entry_print(
vm_map_entry_t entry);
extern void vm_follow_entry(
vm_map_entry_t entry);
extern void vm_follow_map(
vm_map_t map);
/*
* vm_map_links_print: [ debug ]
*/
void
vm_map_links_print(
struct vm_map_links *links)
{
iprintf("prev=0x%x, next=0x%x, start=0x%x, end=0x%x\n",
links->prev,
links->next,
links->start,
links->end);
}
/*
* vm_map_header_print: [ debug ]
*/
void
vm_map_header_print(
struct vm_map_header *header)
{
vm_map_links_print(&header->links);
iprintf("nentries=0x%x, %sentries_pageable\n",
header->nentries,
(header->entries_pageable ? "" : "!"));
}
/*
* vm_follow_entry: [ debug ]
*/
void
vm_follow_entry(
vm_map_entry_t entry)
{
extern int db_indent;
int shadows;
iprintf("map entry 0x%x:\n", entry);
db_indent += 2;
shadows = vm_follow_object(entry->object.vm_object);
iprintf("Total objects : %d\n",shadows);
db_indent -= 2;
}
/*
* vm_map_entry_print: [ debug ]
*/
void
vm_map_entry_print(
register vm_map_entry_t entry)
{
extern int db_indent;
static char *inheritance_name[4] = { "share", "copy", "none", "?"};
static char *behavior_name[4] = { "dflt", "rand", "seqtl", "rseqntl" };
iprintf("map entry 0x%x:\n", entry);
db_indent += 2;
vm_map_links_print(&entry->links);
iprintf("start=0x%x, end=0x%x, prot=%x/%x/%s\n",
entry->vme_start,
entry->vme_end,
entry->protection,
entry->max_protection,
inheritance_name[(entry->inheritance & 0x3)]);
iprintf("behavior=%s, wired_count=%d, user_wired_count=%d\n",
behavior_name[(entry->behavior & 0x3)],
entry->wired_count,
entry->user_wired_count);
iprintf("%sin_transition, %sneeds_wakeup\n",
(entry->in_transition ? "" : "!"),
(entry->needs_wakeup ? "" : "!"));
if (entry->is_sub_map) {
iprintf("submap=0x%x, offset=0x%x\n",
entry->object.sub_map,
entry->offset);
} else {
iprintf("object=0x%x, offset=0x%x, ",
entry->object.vm_object,
entry->offset);
printf("%sis_shared, %sneeds_copy\n",
(entry->is_shared ? "" : "!"),
(entry->needs_copy ? "" : "!"));
}
db_indent -= 2;
}
/*
* vm_follow_map: [ debug ]
*/
void
vm_follow_map(
vm_map_t map)
{
register vm_map_entry_t entry;
extern int db_indent;
iprintf("task map 0x%x:\n", map);
db_indent += 2;
for (entry = vm_map_first_entry(map);
entry && entry != vm_map_to_entry(map);
entry = entry->vme_next) {
vm_follow_entry(entry);
}
db_indent -= 2;
}
/*
* vm_map_print: [ debug ]
*/
void
vm_map_print(
register vm_map_t map)
{
register vm_map_entry_t entry;
extern int db_indent;
char *swstate;
iprintf("task map 0x%x:\n", map);
db_indent += 2;
vm_map_header_print(&map->hdr);
iprintf("pmap=0x%x, size=%d, ref=%d, hint=0x%x, first_free=0x%x\n",
map->pmap,
map->size,
map->ref_count,
map->hint,
map->first_free);
iprintf("%swait_for_space, %swiring_required, timestamp=%d\n",
(map->wait_for_space ? "" : "!"),
(map->wiring_required ? "" : "!"),
map->timestamp);
#if TASK_SWAPPER
switch (map->sw_state) {
case MAP_SW_IN:
swstate = "SW_IN";
break;
case MAP_SW_OUT:
swstate = "SW_OUT";
break;
default:
swstate = "????";
break;
}
iprintf("res=%d, sw_state=%s\n", map->res_count, swstate);
#endif /* TASK_SWAPPER */
for (entry = vm_map_first_entry(map);
entry && entry != vm_map_to_entry(map);
entry = entry->vme_next) {
vm_map_entry_print(entry);
}
db_indent -= 2;
}
/*
* Routine: vm_map_copy_print
* Purpose:
* Pretty-print a copy object for ddb.
*/
void
vm_map_copy_print(
vm_map_copy_t copy)
{
extern int db_indent;
int i, npages;
vm_map_entry_t entry;
printf("copy object 0x%x\n", copy);
db_indent += 2;
iprintf("type=%d", copy->type);
switch (copy->type) {
case VM_MAP_COPY_ENTRY_LIST:
printf("[entry_list]");
break;
case VM_MAP_COPY_OBJECT:
printf("[object]");
break;
case VM_MAP_COPY_PAGE_LIST:
printf("[page_list]");
break;
case VM_MAP_COPY_KERNEL_BUFFER:
printf("[kernel_buffer]");
break;
default:
printf("[bad type]");
break;
}
printf(", offset=0x%x", copy->offset);
printf(", size=0x%x\n", copy->size);
switch (copy->type) {
case VM_MAP_COPY_ENTRY_LIST:
vm_map_header_print(©->cpy_hdr);
for (entry = vm_map_copy_first_entry(copy);
entry && entry != vm_map_copy_to_entry(copy);
entry = entry->vme_next) {
vm_map_entry_print(entry);
}
break;
case VM_MAP_COPY_OBJECT:
iprintf("object=0x%x\n", copy->cpy_object);
break;
case VM_MAP_COPY_KERNEL_BUFFER:
iprintf("kernel buffer=0x%x", copy->cpy_kdata);
printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size);
break;
case VM_MAP_COPY_PAGE_LIST:
iprintf("npages=%d", copy->cpy_npages);
printf(", cont=%x", copy->cpy_cont);
printf(", cont_args=%x\n", copy->cpy_cont_args);
if (copy->cpy_npages < 0) {
npages = 0;
} else if (copy->cpy_npages > VM_MAP_COPY_PAGE_LIST_MAX) {
npages = VM_MAP_COPY_PAGE_LIST_MAX;
} else {
npages = copy->cpy_npages;
}
iprintf("copy->cpy_page_list[0..%d] = {", npages);
for (i = 0; i < npages - 1; i++) {
printf("0x%x, ", copy->cpy_page_list[i]);
}
if (npages > 0) {
printf("0x%x", copy->cpy_page_list[npages - 1]);
}
printf("}\n");
break;
}
db_indent -=2;
}
/*
* db_vm_map_total_size(map) [ debug ]
*
* return the total virtual size (in bytes) of the map
*/
vm_size_t
db_vm_map_total_size(
vm_map_t map)
{
vm_map_entry_t entry;
vm_size_t total;
total = 0;
for (entry = vm_map_first_entry(map);
entry != vm_map_to_entry(map);
entry = entry->vme_next) {
total += entry->vme_end - entry->vme_start;
}
return total;
}
#endif /* MACH_KDB */
/*
* Routine: vm_map_entry_insert
*
* Descritpion: This routine inserts a new vm_entry in a locked map.
*/
vm_map_entry_t
vm_map_entry_insert(
vm_map_t map,
vm_map_entry_t insp_entry,
vm_offset_t start,
vm_offset_t end,
vm_object_t object,
vm_offset_t offset,
boolean_t needs_copy,
boolean_t is_shared,
boolean_t in_transition,
vm_prot_t cur_protection,
vm_prot_t max_protection,
vm_behavior_t behavior,
vm_inherit_t inheritance,
unsigned wired_count)
{
vm_map_entry_t new_entry;
assert(insp_entry != (vm_map_entry_t)0);
new_entry = vm_map_entry_create(map);
new_entry->vme_start = start;
new_entry->vme_end = end;
assert(page_aligned(new_entry->vme_start));
assert(page_aligned(new_entry->vme_end));
new_entry->object.vm_object = object;
new_entry->offset = offset;
new_entry->is_shared = is_shared;
new_entry->is_sub_map = FALSE;
new_entry->needs_copy = needs_copy;
new_entry->in_transition = in_transition;
new_entry->needs_wakeup = FALSE;
new_entry->inheritance = inheritance;
new_entry->protection = cur_protection;
new_entry->max_protection = max_protection;
new_entry->behavior = behavior;
new_entry->wired_count = wired_count;
new_entry->user_wired_count = 0;
/*
* Insert the new entry into the list.
*/
vm_map_entry_link(map, insp_entry, new_entry);
map->size += end - start;
/*
* Update the free space hint and the lookup hint.
*/
SAVE_HINT(map, new_entry);
return new_entry;
}
/*
* Routine: vm_remap_extract
*
* Descritpion: This routine returns a vm_entry list from a map.
*/
kern_return_t
vm_remap_extract(
vm_map_t map,
vm_offset_t addr,
vm_size_t size,
boolean_t copy,
struct vm_map_header *map_header,
vm_prot_t *cur_protection,
vm_prot_t *max_protection,
/* What, no behavior? */
vm_inherit_t inheritance,
boolean_t pageable)
{
kern_return_t result;
vm_size_t mapped_size;
vm_size_t tmp_size;
vm_map_entry_t src_entry; /* result of last map lookup */
vm_map_entry_t new_entry;
vm_offset_t offset;
vm_offset_t map_address;
vm_offset_t src_start; /* start of entry to map */
vm_offset_t src_end; /* end of region to be mapped */
vm_object_t object;
vm_map_version_t version;
boolean_t src_needs_copy;
boolean_t new_entry_needs_copy;
assert(map != VM_MAP_NULL);
assert(size != 0 && size == round_page(size));
assert(inheritance == VM_INHERIT_NONE ||
inheritance == VM_INHERIT_COPY ||
inheritance == VM_INHERIT_SHARE);
/*
* Compute start and end of region.
*/
src_start = trunc_page(addr);
src_end = round_page(src_start + size);
/*
* Initialize map_header.
*/
map_header->links.next = (struct vm_map_entry *)&map_header->links;
map_header->links.prev = (struct vm_map_entry *)&map_header->links;
map_header->nentries = 0;
map_header->entries_pageable = pageable;
*cur_protection = VM_PROT_ALL;
*max_protection = VM_PROT_ALL;
map_address = 0;
mapped_size = 0;
result = KERN_SUCCESS;
/*
* The specified source virtual space might correspond to
* multiple map entries, need to loop on them.
*/
vm_map_lock(map);
while (mapped_size != size) {
vm_size_t entry_size;
/*
* Find the beginning of the region.
*/
if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
result = KERN_INVALID_ADDRESS;
break;
}
if (src_start < src_entry->vme_start ||
(mapped_size && src_start != src_entry->vme_start)) {
result = KERN_INVALID_ADDRESS;
break;
}
if(src_entry->is_sub_map) {
result = KERN_INVALID_ADDRESS;
break;
}
tmp_size = size - mapped_size;
if (src_end > src_entry->vme_end)
tmp_size -= (src_end - src_entry->vme_end);
entry_size = (vm_size_t)(src_entry->vme_end -
src_entry->vme_start);
if(src_entry->is_sub_map) {
vm_map_reference(src_entry->object.sub_map);
} else {
object = src_entry->object.vm_object;
if (object == VM_OBJECT_NULL) {
object = vm_object_allocate(entry_size);
src_entry->offset = 0;
src_entry->object.vm_object = object;
} else if (object->copy_strategy !=
MEMORY_OBJECT_COPY_SYMMETRIC) {
/*
* We are already using an asymmetric
* copy, and therefore we already have
* the right object.
*/
assert(!src_entry->needs_copy);
} else if (src_entry->needs_copy || object->shadowed ||
(object->internal && !object->true_share &&
!src_entry->is_shared &&
object->size > entry_size)) {
vm_object_shadow(&src_entry->object.vm_object,
&src_entry->offset,
entry_size);
if (!src_entry->needs_copy &&
(src_entry->protection & VM_PROT_WRITE)) {
pmap_protect(vm_map_pmap(map),
src_entry->vme_start,
src_entry->vme_end,
src_entry->protection &
~VM_PROT_WRITE);
}
object = src_entry->object.vm_object;
src_entry->needs_copy = FALSE;
}
vm_object_lock(object);
object->ref_count++; /* object ref. for new entry */
VM_OBJ_RES_INCR(object);
if (object->copy_strategy ==
MEMORY_OBJECT_COPY_SYMMETRIC) {
object->copy_strategy =
MEMORY_OBJECT_COPY_DELAY;
}
vm_object_unlock(object);
}
offset = src_entry->offset + (src_start - src_entry->vme_start);
new_entry = _vm_map_entry_create(map_header);
vm_map_entry_copy(new_entry, src_entry);
new_entry->vme_start = map_address;
new_entry->vme_end = map_address + tmp_size;
new_entry->inheritance = inheritance;
new_entry->offset = offset;
/*
* The new region has to be copied now if required.
*/
RestartCopy:
if (!copy) {
src_entry->is_shared = TRUE;
new_entry->is_shared = TRUE;
if (!(new_entry->is_sub_map))
new_entry->needs_copy = FALSE;
} else if (src_entry->is_sub_map) {
/* make this a COW sub_map if not already */
new_entry->needs_copy = TRUE;
} else if (src_entry->wired_count == 0 &&
vm_object_copy_quickly(&new_entry->object.vm_object,
new_entry->offset,
(new_entry->vme_end -
new_entry->vme_start),
&src_needs_copy,
&new_entry_needs_copy)) {
new_entry->needs_copy = new_entry_needs_copy;
new_entry->is_shared = FALSE;
/*
* Handle copy_on_write semantics.
*/
if (src_needs_copy && !src_entry->needs_copy) {
vm_object_pmap_protect(object,
offset,
entry_size,
(src_entry->is_shared ?
PMAP_NULL : map->pmap),
src_entry->vme_start,
src_entry->protection &
~VM_PROT_WRITE);
src_entry->needs_copy = TRUE;
}
/*
* Throw away the old object reference of the new entry.
*/
vm_object_deallocate(object);
} else {
new_entry->is_shared = FALSE;
/*
* The map can be safely unlocked since we
* already hold a reference on the object.
*
* Record the timestamp of the map for later
* verification, and unlock the map.
*/
version.main_timestamp = map->timestamp;
vm_map_unlock(map);
/*
* Perform the copy.
*/
if (src_entry->wired_count > 0) {
vm_object_lock(object);
result = vm_object_copy_slowly(
object,
offset,
entry_size,
THREAD_UNINT,
&new_entry->object.vm_object);
new_entry->offset = 0;
new_entry->needs_copy = FALSE;
} else {
result = vm_object_copy_strategically(
object,
offset,
entry_size,
&new_entry->object.vm_object,
&new_entry->offset,
&new_entry_needs_copy);
new_entry->needs_copy = new_entry_needs_copy;
}
/*
* Throw away the old object reference of the new entry.
*/
vm_object_deallocate(object);
if (result != KERN_SUCCESS &&
result != KERN_MEMORY_RESTART_COPY) {
_vm_map_entry_dispose(map_header, new_entry);
break;
}
/*
* Verify that the map has not substantially
* changed while the copy was being made.
*/
vm_map_lock(map); /* Increments timestamp once! */
if (version.main_timestamp + 1 != map->timestamp) {
/*
* Simple version comparison failed.
*
* Retry the lookup and verify that the
* same object/offset are still present.
*/
vm_object_deallocate(new_entry->
object.vm_object);
_vm_map_entry_dispose(map_header, new_entry);
if (result == KERN_MEMORY_RESTART_COPY)
result = KERN_SUCCESS;
continue;
}
if (result == KERN_MEMORY_RESTART_COPY) {
vm_object_reference(object);
goto RestartCopy;
}
}
_vm_map_entry_link(map_header,
map_header->links.prev, new_entry);
*cur_protection &= src_entry->protection;
*max_protection &= src_entry->max_protection;
map_address += tmp_size;
mapped_size += tmp_size;
src_start += tmp_size;
} /* end while */
vm_map_unlock(map);
if (result != KERN_SUCCESS) {
/*
* Free all allocated elements.
*/
for (src_entry = map_header->links.next;
src_entry != (struct vm_map_entry *)&map_header->links;
src_entry = new_entry) {
new_entry = src_entry->vme_next;
_vm_map_entry_unlink(map_header, src_entry);
vm_object_deallocate(src_entry->object.vm_object);
_vm_map_entry_dispose(map_header, src_entry);
}
}
return result;
}
/*
* Routine: vm_remap
*
* Map portion of a task's address space.
* Mapped region must not overlap more than
* one vm memory object. Protections and
* inheritance attributes remain the same
* as in the original task and are out parameters.
* Source and Target task can be identical
* Other attributes are identical as for vm_map()
*/
kern_return_t
vm_remap(
vm_map_t target_map,
vm_offset_t *address,
vm_size_t size,
vm_offset_t mask,
boolean_t anywhere,
vm_map_t src_map,
vm_offset_t memory_address,
boolean_t copy,
vm_prot_t *cur_protection,
vm_prot_t *max_protection,
vm_inherit_t inheritance)
{
kern_return_t result;
vm_map_entry_t entry;
vm_map_entry_t insp_entry;
vm_map_entry_t new_entry;
struct vm_map_header map_header;
if (target_map == VM_MAP_NULL)
return KERN_INVALID_ARGUMENT;
switch (inheritance) {
case VM_INHERIT_NONE:
case VM_INHERIT_COPY:
case VM_INHERIT_SHARE:
if (size != 0 && src_map != VM_MAP_NULL)
break;
/*FALL THRU*/
default:
return KERN_INVALID_ARGUMENT;
}
size = round_page(size);
result = vm_remap_extract(src_map, memory_address,
size, copy, &map_header,
cur_protection,
max_protection,
inheritance,
target_map->hdr.
entries_pageable);
vm_map_deallocate(src_map);
if (result != KERN_SUCCESS) {
return result;
}
/*
* Allocate/check a range of free virtual address
* space for the target
*/
*address = trunc_page(*address);
vm_map_lock(target_map);
result = vm_remap_range_allocate(target_map, address, size,
mask, anywhere, &insp_entry);
for (entry = map_header.links.next;
entry != (struct vm_map_entry *)&map_header.links;
entry = new_entry) {
new_entry = entry->vme_next;
_vm_map_entry_unlink(&map_header, entry);
if (result == KERN_SUCCESS) {
entry->vme_start += *address;
entry->vme_end += *address;
vm_map_entry_link(target_map, insp_entry, entry);
insp_entry = entry;
} else {
if (!entry->is_sub_map) {
vm_object_deallocate(entry->object.vm_object);
} else {
vm_map_deallocate(entry->object.sub_map);
}
_vm_map_entry_dispose(&map_header, entry);
}
}
if (result == KERN_SUCCESS) {
target_map->size += size;
SAVE_HINT(target_map, insp_entry);
}
vm_map_unlock(target_map);
if (result == KERN_SUCCESS && target_map->wiring_required)
result = vm_map_wire(target_map, *address,
*address + size, *cur_protection, TRUE);
return result;
}
/*
* Routine: vm_remap_range_allocate
*
* Description:
* Allocate a range in the specified virtual address map.
* returns the address and the map entry just before the allocated
* range
*
* Map must be locked.
*/
kern_return_t
vm_remap_range_allocate(
vm_map_t map,
vm_offset_t *address, /* IN/OUT */
vm_size_t size,
vm_offset_t mask,
boolean_t anywhere,
vm_map_entry_t *map_entry) /* OUT */
{
register vm_map_entry_t entry;
register vm_offset_t start;
register vm_offset_t end;
kern_return_t result = KERN_SUCCESS;
StartAgain: ;
start = *address;
if (anywhere)
{
/*
* Calculate the first possible address.
*/
if (start < map->min_offset)
start = map->min_offset;
if (start > map->max_offset)
return(KERN_NO_SPACE);
/*
* Look for the first possible address;
* if there's already something at this
* address, we have to start after it.
*/
assert(first_free_is_valid(map));
if (start == map->min_offset) {
if ((entry = map->first_free) != vm_map_to_entry(map))
start = entry->vme_end;
} else {
vm_map_entry_t tmp_entry;
if (vm_map_lookup_entry(map, start, &tmp_entry))
start = tmp_entry->vme_end;
entry = tmp_entry;
}
/*
* In any case, the "entry" always precedes
* the proposed new region throughout the
* loop:
*/
while (TRUE) {
register vm_map_entry_t next;
/*
* Find the end of the proposed new region.
* Be sure we didn't go beyond the end, or
* wrap around the address.
*/
end = ((start + mask) & ~mask);
if (end < start)
return(KERN_NO_SPACE);
start = end;
end += size;
if ((end > map->max_offset) || (end < start)) {
if (map->wait_for_space) {
if (size <= (map->max_offset -
map->min_offset)) {
assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
vm_map_unlock(map);
thread_block((void (*)(void))0);
vm_map_lock(map);
goto StartAgain;
}
}
return(KERN_NO_SPACE);
}
/*
* If there are no more entries, we must win.
*/
next = entry->vme_next;
if (next == vm_map_to_entry(map))
break;
/*
* If there is another entry, it must be
* after the end of the potential new region.
*/
if (next->vme_start >= end)
break;
/*
* Didn't fit -- move to the next entry.
*/
entry = next;
start = entry->vme_end;
}
*address = start;
} else {
vm_map_entry_t temp_entry;
/*
* Verify that:
* the address doesn't itself violate
* the mask requirement.
*/
if ((start & mask) != 0)
return(KERN_NO_SPACE);
/*
* ... the address is within bounds
*/
end = start + size;
if ((start < map->min_offset) ||
(end > map->max_offset) ||
(start >= end)) {
return(KERN_INVALID_ADDRESS);
}
/*
* ... the starting address isn't allocated
*/
if (vm_map_lookup_entry(map, start, &temp_entry))
return(KERN_NO_SPACE);
entry = temp_entry;
/*
* ... the next region doesn't overlap the
* end point.
*/
if ((entry->vme_next != vm_map_to_entry(map)) &&
(entry->vme_next->vme_start < end))
return(KERN_NO_SPACE);
}
*map_entry = entry;
return(KERN_SUCCESS);
}
/*
* vm_map_switch:
*
* Set the address map for the current thr_act to the specified map
*/
vm_map_t
vm_map_switch(
vm_map_t map)
{
int mycpu;
thread_act_t thr_act = current_act();
vm_map_t oldmap = thr_act->map;
mp_disable_preemption();
mycpu = cpu_number();
/*
* Deactivate the current map and activate the requested map
*/
PMAP_SWITCH_USER(thr_act, map, mycpu);
mp_enable_preemption();
return(oldmap);
}
/*
* Routine: vm_map_write_user
*
* Description:
* Copy out data from a kernel space into space in the
* destination map. The space must already exist in the
* destination map.
* NOTE: This routine should only be called by threads
* which can block on a page fault. i.e. kernel mode user
* threads.
*
*/
kern_return_t
vm_map_write_user(
vm_map_t map,
vm_offset_t src_addr,
vm_offset_t dst_addr,
vm_size_t size)
{
thread_act_t thr_act = current_act();
kern_return_t kr = KERN_SUCCESS;
if(thr_act->map == map) {
if (copyout((char *)src_addr, (char *)dst_addr, size)) {
kr = KERN_INVALID_ADDRESS;
}
} else {
vm_map_t oldmap;
/* take on the identity of the target map while doing */
/* the transfer */
vm_map_reference(map);
oldmap = vm_map_switch(map);
if (copyout((char *)src_addr, (char *)dst_addr, size)) {
kr = KERN_INVALID_ADDRESS;
}
vm_map_switch(oldmap);
vm_map_deallocate(map);
}
return kr;
}
/*
* Routine: vm_map_read_user
*
* Description:
* Copy in data from a user space source map into the
* kernel map. The space must already exist in the
* kernel map.
* NOTE: This routine should only be called by threads
* which can block on a page fault. i.e. kernel mode user
* threads.
*
*/
kern_return_t
vm_map_read_user(
vm_map_t map,
vm_offset_t src_addr,
vm_offset_t dst_addr,
vm_size_t size)
{
thread_act_t thr_act = current_act();
kern_return_t kr = KERN_SUCCESS;
if(thr_act->map == map) {
if (copyin((char *)src_addr, (char *)dst_addr, size)) {
kr = KERN_INVALID_ADDRESS;
}
} else {
vm_map_t oldmap;
/* take on the identity of the target map while doing */
/* the transfer */
vm_map_reference(map);
oldmap = vm_map_switch(map);
if (copyin((char *)src_addr, (char *)dst_addr, size)) {
kr = KERN_INVALID_ADDRESS;
}
vm_map_switch(oldmap);
vm_map_deallocate(map);
}
return kr;
}
/*
* Export routines to other components for the things we access locally through
* macros.
*/
#undef current_map
vm_map_t
current_map(void)
{
return (current_map_fast());
}