Source to kern/mapfs.c
/*
* Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
*
* @[email protected]
*
* "Portions Copyright (c) 1999 Apple Computer, Inc. All Rights
* Reserved. This file contains Original Code and/or Modifications of
* Original Code as defined in and that are subject to the Apple Public
* Source License Version 1.0 (the 'License'). You may not use this file
* except in compliance with the License. Please obtain a copy of the
* License at http://www.apple.com/publicsource and read it before using
* this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
* License for the specific language governing rights and limitations
* under the License."
*
* @[email protected]
*/
/*
* Mach Operating System
* Copyright (c) 1987 Carnegie-Mellon University
* All rights reserved. The CMU software License Agreement specifies
* the terms and conditions for use and redistribution.
*/
/*
* File: mapfs.c
* Author: Avadis Tevanian, Jr.
*
* Copyright (C) 1987, Avadis Tevanian, Jr.
*
* Support for mapped file system implementation.
*
* HISTORY
* 2-Jun-1998 Umesh Vaishampayan
* Changed error handling to check for all errors.
*
* 6-Dec-1997 A.Ramesh at Apple
* Made the chages for Rhapsody; Reanamed mfs to mapfs to avoid confusion
* with memory based filesystem.
*
* 18-Nov-92 Phillip Dibner at NeXT
* Made the i/o throttle global. This is a hack on top of a hack and
* should be fixed properly, probably in the vm system.
*
* 3-Sep-92 Joe Murdock at NeXT
* Added an i/o throttle to mfs_io as a cheap work-around for a i/o buffer
* resource conflict with usr-space system bottle-necks (nfs servers, etc)
*
* 7-Feb-92 Jim Hays
* There are still bugs in this code dealing with vmp_pushing wired
* pages. We need to modify the sound drivers locks to be breakable
* except during the actual playing.
*
* 3-Aug-90 Doug Mitchell at NeXT
* Added primitives for loadable file system support.
*
* 7-Mar-90 Brian Pinkerton (bpinker) at NeXT
* Changed mfs_trunc to return an indication of change.
*
* 9-Mar-88 John Seamons (jks) at NeXT
* SUN_VFS: allocate vm_info structures from a zone.
*
* 29-Jan-88 David Golub (dbg) at Carnegie-Mellon University
* Corrected calls to inode_pager_setup and kmem_alloc.
*
* 15-Sep-87 Michael Young (mwyoung) at Carnegie-Mellon University
* De-linted.
*
* 18-Jun-87 Michael Young (mwyoung) at Carnegie-Mellon University
* Make most of this file dependent on MACH_NBC.
*
* 30-Apr-87 Avadis Tevanian (avie) at Carnegie-Mellon University
* Created.
*/
#import <mach_nbc.h>
#import <kern/lock.h>
#import <kern/mapfs.h>
#import <kern/sched_prim.h>
#import <kern/assert.h>
#import <sys/param.h>
#import <sys/systm.h>
#import <sys/mount.h>
#import <sys/proc.h>
#import <sys/user.h>
#import <sys/vnode.h>
#import <sys/uio.h>
/* Needed for VOP_DEBLOCKSIZE, ip usage */
#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
#import <sys/dir.h>
#import <vm/vm_kern.h>
#import <vm/vm_pager.h>
#import <mach/vm_param.h>
#import <mach/machine.h>
#import <vm/vnode_pager.h>
#import <vm/pmap.h>
#include <nfs/rpcv2.h>
#include <nfs/nfsproto.h>
#include <nfs/nfs.h>
#include <nfs/nfsnode.h>
#define PERFMODS 1
struct zone *vm_info_zone;
/*
* Private variables and macros.
*/
queue_head_t vm_info_queue; /* lru list of structures */
decl_simple_lock_data(, vm_info_lock_data) /* lock for lru list */
int vm_info_version = 0; /* version number */
#define vm_info_lock() simple_lock(&vm_info_lock_data)
#define vm_info_unlock() simple_unlock(&vm_info_lock_data)
#if MACH_NBC
lock_data_t mfsbuf_lock; /* lock for active_mfsbufs */
lock_data_t mfs_alloc_lock_data;
boolean_t mfs_alloc_wanted;
long mfs_alloc_blocks = 0;
#define mfs_alloc_lock() lock_write(&mfs_alloc_lock_data)
#define mfs_alloc_unlock() lock_write_done(&mfs_alloc_lock_data)
vm_map_t mfs_map;
/*
* mfs_map_size is the number of bytes of VM to use for file mapping.
* It should be set by machine dependent code (before the call to
* mapfs_init) if the default is inappropriate.
*
* mfs_max_window is the largest window size that will be given to
* a file mapping. A default value is computed in mapfs_init based on
* mfs_map_size. This too may be set by machine dependent code
* if the default is not appropriate.
*
* mfs_files_max is the maximum number of files that we will
* simultaneously leave mapped. Note th memory for unmapped
* files will not necessarily leave the memory cache, but by
* unmapping these files the file system can throw away any
* file system related info (like vnodes). Again, this value
* can be sent by machine dependent code if the default is not
* appropriate.
*/
#ifdef ppc
vm_size_t mfs_map_size = 64*1024*1024; /* size in bytes */
#else
vm_size_t mfs_map_size = 8*1024*1024; /* size in bytes */
#endif
vm_size_t mfs_max_window = 0; /* largest window to use */
#ifdef ppc
int mfs_files_max = 400; /* maximum # of files mapped */
#else
int mfs_files_max = 100; /* maximum # of files mapped */
#endif
int mfs_files_mapped = 0; /* current # mapped */
#define CHUNK_SIZE (128 * 1024)
#endif /* MACH_NBC */
#ifdef ppc
#define MFS_MAP_SIZE_MAX (64 * 1024 * 1024)
#else
#define MFS_MAP_SIZE_MAX (16 * 1024 * 1024)
#endif
/* The MFS_MAP_SIZE_PER_UNIT is used in remap; as well as in init */
#define MFS_MAP_SIZE_PER_UNIT (1024 * 1024)
#define MFS_MEMORY_UNIT (1024 * 1024)
#define MFS_FILES_PER_UNIT 12
void vm_info_enqueue __P((struct vm_info *));
void vm_info_dequeue __P((struct vm_info *));
void mapfs_put __P((struct vnode *));
int mapfs_get __P((struct vnode *,vm_offset_t, vm_size_t));
int remap_vnode __P((struct vnode *,vm_offset_t, vm_size_t));
void vmp_put __P((struct vm_info *));
void vmp_get __P((struct vm_info *));
void mapfs_cache_trim __P((void));
void mapfs_memfree __P((struct vm_info *, boolean_t));
int mapfs_map_remove __P((struct vm_info *, vm_offset_t, vm_size_t, boolean_t));
void vno_flush __P((struct vnode *, vm_offset_t, vm_size_t));
void vmp_invalidate __P((struct vm_info *));
int vmp_push __P((struct vm_info *));
int vmp_push_range __P((struct vm_info *,vm_offset_t, vm_size_t));
void vmp_push_all __P((struct vm_info *));
/* Missing from headers so provided the prototypes */
void vm_object_deactivate_pages __P((vm_object_t));
void vm_object_deactivate_pages_first __P((vm_object_t));
void vm_page_deactivate __P((vm_page_t));
void vm_page_activate __P((vm_page_t));
kern_return_t vm_allocate_with_pager __P((vm_map_t, vm_offset_t *, vm_size_t, boolean_t, vm_pager_t,vm_offset_t));
#if PERFMODS
int mapfs_map_cleanup __P((struct vm_info *,vm_offset_t,vm_size_t,boolean_t));
#endif
/*
* mapfs_init:
*
* Initialize the mapped FS module.
*/
int
mapfs_init()
{
int i;
#if MACH_NBC
int min, max;
#endif /* MACH_NBC */
queue_init(&vm_info_queue);
simple_lock_init(&vm_info_lock_data);
#if MACH_NBC
lock_init(&mfs_alloc_lock_data, TRUE);
mfs_alloc_wanted = FALSE;
mfs_map = kmem_suballoc(kernel_map, &min, &max, mfs_map_size, TRUE);
mfs_map_size = (int) ((long long) MFS_MAP_SIZE_PER_UNIT /
(long long) MFS_MEMORY_UNIT *
(long long) machine_info.memory_size);
if (mfs_map_size > MFS_MAP_SIZE_MAX)
mfs_map_size = MFS_MAP_SIZE_MAX;
#if notdef
mfs_files_max = (int)((long long) MFS_FILES_PER_UNIT *
(long long) machine_info.memory_size /
(long long) MFS_MEMORY_UNIT);
#endif /* notdef */
/* Get atleast a Meg and instead of 5% choose 6.25% */
if (mfs_max_window == 0)
mfs_max_window = mfs_map_size / 16;
if (mfs_max_window < MFS_MEMORY_UNIT)
mfs_max_window = MFS_MEMORY_UNIT;
#endif /* MACH_NBC */
i = (vm_size_t) sizeof (struct vm_info);
vm_info_zone = zinit (i, 10000*i, 8192, FALSE, "vm_info zone");
return(0);
}
/*
* vm_info_init:
*
* Initialize a vm_info structure for a vnode.
*/
int
vm_info_init(vp)
struct vnode *vp;
{
register struct vm_info *vmp;
vmp = vp->v_vm_info;
if (vmp == VM_INFO_NULL)
vmp = (struct vm_info *) zalloc(vm_info_zone);
vmp->pager = vm_pager_null;
vmp->map_count = 0;
vmp->use_count = 0;
vmp->va = 0;
vmp->size = 0;
vmp->offset = 0;
#if PERFMODS
vmp->dirtysize = 0;
vmp->dirtyoffset = 0;
#endif
vmp->cred = (struct ucred *) NULL;
vmp->error = 0;
vmp->queued = FALSE;
vmp->dirty = FALSE;
vmp->nfsdirty = FALSE;
vmp->close_flush = TRUE; /* for safety, reconsider later */
vmp->invalidate = FALSE;
vmp->busy = FALSE;
vmp->delayed_fsync = FALSE;
vmp->filesize = FALSE;
vmp->mapped = FALSE;
vmp->dying = FALSE;
vmp->vnode_size = 0;
vmp->vnode = vp;
lock_init(&vmp->lock, TRUE); /* sleep lock */
vmp->object = VM_OBJECT_NULL;
vp->v_vm_info = vmp;
return(0);
}
/*
* Loadable file system support to avoid exporting struct vm_info.
*/
void vm_info_free(struct vnode *vp)
{
register struct vm_info *vmp = vp->v_vm_info;
if (vmp == VM_INFO_NULL)
return;
/*
* If vmp->dying is set then we have reentered.
* Uninterruptible wait for the other thead to finish and return.
*/
if (vmp->dying == TRUE) {
(void)tsleep(vmp, 0, "vminfofree", 0);
return;
}
/* Prevent other threads from racing in */
vmp->dying = TRUE;
#if MACH_NBC
mapfs_uncache(vp); /* could block here */
#endif
vp->v_vm_info = VM_INFO_NULL;
wakeup(vmp); /* wakeup other threads blocked on vmp */
zfree(vm_info_zone, (vm_offset_t)vmp); /* could block here */
}
#if MACH_NBC /* [ */
void
vm_info_enqueue(vmp)
struct vm_info *vmp;
{
mfs_assert(!vmp->queued);
mfs_assert(vmp->mapped);
#if 0
mfs_assert(vmp->size);
if ((vmp->size == 0) || !vmp->mapped)
panic("VMP SIZE IS ZERO\n");
#endif
queue_enter(&vm_info_queue, vmp, struct vm_info *, lru_links);
vmp->queued = TRUE;
mfs_files_mapped++;
vm_info_version++;
}
void
vm_info_dequeue(vmp)
struct vm_info *vmp;
{
mfs_assert(vmp->queued);
queue_remove(&vm_info_queue, vmp, struct vm_info *, lru_links);
vmp->queued = FALSE;
mfs_files_mapped--;
vm_info_version++;
}
/*
* map_vnode:
*
* Indicate that the specified vnode should be mapped into VM.
* A reference count is maintained for each mapped file.
*/
void
map_vnode(vp,p)
register struct vnode *vp;
register struct proc *p;
{
register struct vm_info *vmp;
vm_pager_t pager;
extern lock_data_t vm_alloc_lock;
struct vattr vattr;
#if 1
/* Needed as in some cases the exec, namei returned vp
* with no vm_info attached -- XXX (Verify this )
*/
if (vp->v_vm_info == (struct vm_info *)0)
vm_info_init(vp);
#endif
vmp = vp->v_vm_info;
if (vmp->map_count++ > 0)
return; /* file already mapped */
if (vmp->mapped)
return; /* file was still cached */
vmp_get(vmp);
pager = vmp->pager = (vm_pager_t) vnode_pager_setup(vp, FALSE, TRUE);
/* not a TEXT file, can cache */
/*
* Lookup what object is actually holding this file's
* pages so we can flush them when necessary. This
* would be done differently in an out-of-kernel implementation.
*
* Note that the lookup keeps a reference to the object which
* we must release elsewhere.
*/
lock_write(&vm_alloc_lock);
vmp->object = vm_object_lookup(pager);
vm_stat.lookups++;
if (vmp->object == VM_OBJECT_NULL) {
vmp->object = vm_object_allocate(0);
vm_object_enter(vmp->object, pager);
vm_object_setpager(vmp->object, pager, (vm_offset_t) 0, FALSE);
}
else {
vm_stat.hits++;
}
lock_write_done(&vm_alloc_lock);
vmp->error = 0;
VOP_GETATTR(vp, &vattr, p->p_ucred ,p);
vmp->vnode_size = vattr.va_size; /* must be before setting
mapped below to prevent
mapfs_fsync from recursive
locking */
vmp->va = 0;
vmp->size = 0;
vmp->offset = 0;
vmp->mapped = TRUE;
vmp_put(vmp); /* put will queue on LRU list */
}
int close_flush = 1;
/*
* unmap_vnode:
*
* Called when an vnode is closed.
*/
void
unmap_vnode(vp, p)
register struct vnode *vp;
register struct proc *p;
{
register struct vm_info *vmp;
register struct vm_object *object;
int links;
register struct pcred *pcred = p->p_cred;
register struct ucred *cred = pcred->pc_ucred;
struct vattr vattr;
vmp = vp->v_vm_info;
if (!vmp->mapped)
return; /* not a mapped file */
/*
* If the file, which was prev mapped and closed is opened with
* O_NO_MFS, the map_count will be zero when close
* is called. SO, if it is already zero, there is nothing to
* be done here. (Otherwise 2269452 and 2269437)
*/
if (vmp->map_count == 0)
return;
if (--vmp->map_count > 0) {
return;
}
/*
* If there are no links left to the file then release
* the resources held. If there are links left, then keep
* the file mapped under the assumption that someone else
* will soon map the same file. However, the pages in
* the object are deactivated to put them near the list
* of pages to be reused by the VM system (this would
* be done differently out of the kernel, of course, then
* again, the primitives for this don't exist out of the
* kernel yet.
*/
vmp->map_count++;
VOP_GETATTR(vp, &vattr, cred, p);
links = vattr.va_nlink; /* may uncache, see below */
vmp->map_count--;
if (links == 0) {
mapfs_memfree(vmp, FALSE);
} else {
/*
* pushing the pages may cause an uncache
* operation (thanks NFS), so gain an extra
* reference to guarantee that the object
* does not go away. (Note that such an
* uncache actually takes place since we have
* already released the map_count above).
*/
object = vmp->object;
if (close_flush || vmp->close_flush) {
vmp->map_count++; /* prevent uncache race */
vmp_get(vmp);
#if PERFMODS
if (vmp->dirty)
(void)vmp_push_range(vmp, vmp->dirtyoffset, vmp->dirtysize);
#else
(void)vmp_push(vmp); /* Ignore errors! XXX */
#endif
}
vm_object_lock(object);
vm_object_deactivate_pages(object);
vm_object_unlock(object);
if (close_flush || vmp->close_flush) {
vmp_put(vmp);
vmp->map_count--;
}
}
}
/*
* remap_vnode:
*
* Remap the specified vnode (due to extension of the file perhaps).
* Upon return, it should be possible to access data in the file
* starting at the "start" address for "size" bytes.
*/
int
remap_vnode(vp, start, size)
register struct vnode *vp;
vm_offset_t start;
register vm_size_t size;
{
register struct vm_info *vmp;
vm_offset_t addr, offset;
kern_return_t ret;
int error=0;
vmp = vp->v_vm_info;
/*
* Remove old mapping (making its space available).
*/
if (vmp->size > 0) {
#if PERFMODS
if (vmp->dirty)
(void)vmp_push_range(vmp, vmp->dirtyoffset, vmp->dirtysize);
error = mapfs_map_remove(vmp, vmp->va, vmp->va + vmp->size, FALSE);
#else
error = mapfs_map_remove(vmp, vmp->va, vmp->va + vmp->size, TRUE);
#endif /* PERFMODS */
if (error)
goto out;
}
offset = trunc_page(start);
size = round_page(start + size) - offset;
if (size < CHUNK_SIZE)
size = CHUNK_SIZE;
do {
addr = vm_map_min(mfs_map);
mfs_alloc_lock();
ret = vm_allocate_with_pager(mfs_map, &addr, size, TRUE,
vmp->pager, offset);
/*
* If there was no space, see if we can free up mappings
* on the LRU list. If not, just wait for someone else
* to free their memory.
*/
if (ret == KERN_NO_SPACE) {
register struct vm_info *vmp1;
vm_info_lock();
vmp1 = VM_INFO_NULL;
if (!queue_empty(&vm_info_queue)) {
vmp1 = (struct vm_info *)
queue_first(&vm_info_queue);
vm_info_dequeue(vmp1);
}
vm_info_unlock();
/*
* If we found someone, free up its memory.
*/
if (vmp1 != VM_INFO_NULL) {
mfs_alloc_unlock();
mapfs_memfree(vmp1, TRUE);
mfs_alloc_lock();
}
else {
mfs_alloc_wanted = TRUE;
assert_wait(&mfs_map, FALSE);
mfs_alloc_blocks++; /* statistic only */
mfs_alloc_unlock();
thread_block();
mfs_alloc_lock();
}
}
else if (ret != KERN_SUCCESS) {
printf("Unexpected error on file map, ret = %d.\n",
ret);
panic("remap_vnode");
}
mfs_alloc_unlock();
} while (ret != KERN_SUCCESS);
/*
* Fill in variables corresponding to new mapping.
*/
vmp->va = addr;
vmp->size = size;
vmp->offset = offset;
out:
return(error);
}
/*
* mapfs_trunc:
*
* The specified vnode is truncated to the specified size.
* Returns 0 if successful error otherwise.
*/
int
mapfs_trunc(vp, length)
register struct vnode *vp;
register vm_offset_t length;
{
register struct vm_info *vmp;
register vm_size_t size, rsize;
int error = 0;
vmp = vp->v_vm_info;
if ((vp->v_type != VREG) || (vmp == (struct vm_info *)0))
return (0);
if (!vmp->mapped) { /* file not mapped, just update size */
vmp->vnode_size = length;
return (0);
}
vmp_get(vmp);
vmp->nfsdirty = TRUE;
/*
* Unmap everything past the new end page.
* Also flush any pages that may be left in the object using
* vno_flush (is this necessary?).
* rsize is the size relative to the mapped offset.
*/
NFSTRACE4(NFSTRC_MTR, vp, length, vmp->size, vmp->offset);
size = round_page(length);
if (size >= vmp->offset) {
rsize = size - vmp->offset;
} else {
rsize = 0;
}
if (rsize < vmp->size) {
error = mapfs_map_remove(vmp, vmp->va + rsize,
vmp->va + vmp->size, FALSE);
NFSTRACE4(NFSTRC_MTR_MREM, vp, vmp->va, vmp->size, rsize);
if (error) {
#if DIAGNOSTIC
kprintf("mapfs_trunc: mapfs_map_remove %d\n", error);
#endif /* DIAGNOSTIC */
goto out;
}
if ((vmp->size = rsize) == 0) /* mapped size */
vmp->offset = 0;
}
if (vmp->vnode_size > size)
vno_flush(vp, size, vmp->vnode_size - size);
vmp->vnode_size = length; /* file size */
/*
* If the new length isn't page aligned, zero the extra
* bytes in the last page.
*/
if (length != size) {
vm_size_t n;
n = size - length;
/*
* Make sure the bytes to be zeroed are mapped.
*/
if ((length < vmp->offset) ||
((length + n - 1) >= (vmp->offset + vmp->size))) {
NFSTRACE4(NFSTRC_MTR_RMAP, vp, vmp->offset, vmp->size, n);
error = remap_vnode(vp, length, n);
if (error) {
#if DIAGNOSTIC
kprintf("mapfs_trunc: remap_vnode %d\n", error);
#endif /* DIAGNOSTIC */
goto out;
}
}
NFSTRACE(NFSTRC_MTR_DIRT, vmp->va);
vmp->nfsdirty = TRUE;
error = safe_bzero((void *)(vmp->va + length - vmp->offset), n);
if (error) {
NFSTRACE4(NFSTRC_MTR_BZER, vp, vmp->va, vmp->offset, n);
#if DIAGNOSTIC
kprintf("mapfs_trunc: safe_bzero %d\n", error);
kprintf("mapfs_trunc: va %x vp %x n %x length %x offset %x size %x\n", vmp->va, (unsigned)vp, n, length, vmp->offset, vmp->size);
#endif /* DIAGNOSTIC */
goto out;
}
/*
* Do NOT set dirty flag... the cached memory copy
* is zeroed, but this change doesn't need to be
* flushed to disk (the vnode already has the right
* size. Besides, if we set this bit, we would need
* to clean it immediately to prevent a later sync
* operation from incorrectly cleaning a cached-only
* copy of this vmp (which causes problems with NFS
* due to the fact that we have changed the mod time
* by truncating and will need to do an mapfs_uncache).
* NFS is a pain. Note that this means that there
* will be a dirty page left in the vmp. If this
* turns out to be a problem we'll have to set the dirty
* flag and immediately do a flush.
*
* UPDATE: 4/4/13. We need to really flush this.
* Use the map_count hack to prevent a race with
* uncaching.
*/
vmp->dirty = TRUE;
}
vmp->map_count++; /* prevent uncache race */
error = vmp_push(vmp);
#if DIAGNOSTIC
if (error)
kprintf("mapfs_trunc: vmp_push %d\n", error);
#endif /* DIAGNOSTIC */
vmp->map_count--;
out:
vmp_put(vmp);
return (error);
}
/*
* mapfs_get:
*
* Get locked access to the specified file. The start and size describe
* the address range that will be accessed in the near future and
* serves as a hint of where to map the file if it is not already
* mapped. Upon return, it is guaranteed that there is enough VM
* available for remapping operations within that range (each window
* no larger than the chunk size).
*/
int
mapfs_get(vp, start, size)
register struct vnode *vp;
vm_offset_t start;
register vm_size_t size;
{
register struct vm_info *vmp;
int error=0;
vmp = vp->v_vm_info;
vmp_get(vmp);
/*
* If the requested size is larger than the size we have
* mapped, be sure we can get enough VM now. This size
* is bounded by the maximum window size.
*/
if (size > mfs_max_window)
size = mfs_max_window;
if (size > vmp->size) {
error = remap_vnode(vp, start, size);
}
return(error);
}
/*
* mapfs_put:
*
* Indicate that locked access is no longer desired of a file.
*/
void
mapfs_put(vp)
register struct vnode *vp;
{
vmp_put(vp->v_vm_info);
}
/*
* vmp_get:
*
* Get exclusive access to the specified vm_info structure.
* NeXT: Note mapfs_fsync_invalidate inlines part of this.
*/
void
vmp_get(vmp)
struct vm_info *vmp;
{
/*
* Remove from LRU list (if its there).
*/
vm_info_lock();
if (vmp->queued) {
vm_info_dequeue(vmp);
}
vmp->use_count++; /* to protect requeueing in vmp_put */
vm_info_unlock();
/*
* Lock out others using this file.
*/
lock_write(&vmp->lock);
lock_set_recursive(&vmp->lock);
}
/*
* vmp_put:
*
* Release exclusive access gained in vmp_get.
*/
void
vmp_put(vmp)
register struct vm_info *vmp;
{
/*
* Place back on LRU list if noone else using it.
*/
vm_info_lock();
if (--vmp->use_count == 0) {
vm_info_enqueue(vmp);
}
vm_info_unlock();
/*
* Let others at file.
*/
lock_clear_recursive(&vmp->lock);
lock_write_done(&vmp->lock);
if (mfs_files_mapped > mfs_files_max)
mapfs_cache_trim();
if (vmp->invalidate) {
vmp->invalidate = FALSE;
vmp_invalidate(vmp);
}
}
/*
* mapfs_uncache:
*
* Make sure there are no cached mappings for the specified vnode.
*/
void
mapfs_uncache(vp)
register struct vnode *vp;
{
register struct vm_info *vmp;
vmp = vp->v_vm_info;
/*
* If the file is mapped but there is none actively using
* it then remove its mappings.
*/
if (vmp->mapped && vmp->map_count == 0) {
mapfs_memfree(vmp, FALSE);
}
}
void
mapfs_memfree(vmp, flush)
register struct vm_info *vmp;
boolean_t flush;
{
struct ucred *cred;
vm_object_t object;
int error = 0;
vm_info_lock();
if (vmp->queued) {
vm_info_dequeue(vmp);
}
vm_info_unlock();
lock_write(&vmp->lock);
lock_set_recursive(&vmp->lock);
if (vmp->map_count == 0) { /* cached only */
vmp->mapped = FALSE; /* prevent recursive flushes */
}
error = mapfs_map_remove(vmp, vmp->va, vmp->va + vmp->size, flush);
if (error)
panic("mapfs_memfree: mapfs_map_remove failed %d", error); /* XXX */
vmp->size = 0;
vmp->va = 0;
object = VM_OBJECT_NULL;
if (vmp->map_count == 0) { /* cached only */
/*
* lookup (in map_vnode) gained a reference, so need to
* lose it.
*/
object = vmp->object;
vmp->object = VM_OBJECT_NULL;
cred = vmp->cred;
if (cred != NOCRED) {
vmp->cred = NOCRED;
crfree(cred);
}
}
lock_clear_recursive(&vmp->lock);
lock_write_done(&vmp->lock);
if (object != VM_OBJECT_NULL)
vm_object_deallocate(object);
}
/*
* mapfs_cache_trim:
*
* trim the number of files in the cache to be less than the max
* we want.
*/
void
mapfs_cache_trim()
{
register struct vm_info *vmp;
while (TRUE) {
vm_info_lock();
if (mfs_files_mapped <= mfs_files_max) {
vm_info_unlock();
return;
}
/*
* grab file at head of lru list.
*/
vmp = (struct vm_info *) queue_first(&vm_info_queue);
vm_info_dequeue(vmp);
vm_info_unlock();
/*
* Free up its memory.
*/
mapfs_memfree(vmp, TRUE);
}
}
/*
* mapfs_cache_clear:
*
* Clear the mapped file cache. Note that the map_count is implicitly
* locked by the Unix file system code that calls this routine.
*/
int
mapfs_cache_clear()
{
register struct vm_info *vmp;
int last_version;
vm_info_lock();
last_version = vm_info_version;
vmp = (struct vm_info *) queue_first(&vm_info_queue);
while (!queue_end(&vm_info_queue, (queue_entry_t) vmp)) {
if (vmp->map_count == 0) {
vm_info_unlock();
mapfs_memfree(vmp, TRUE);
vm_info_lock();
/*
* mapfs_memfree increments version number, causing
* restart below.
*/
}
/*
* If the version didn't change, just keep scanning
* down the queue. If the version did change, we
* need to restart from the beginning.
*/
if (last_version == vm_info_version) {
vmp = (struct vm_info *) queue_next(&vmp->lru_links);
}
else {
vmp = (struct vm_info *) queue_first(&vm_info_queue);
last_version = vm_info_version;
}
}
vm_info_unlock();
return(0);
}
/*
* mapfs_map_remove:
*
* Remove specified address range from the mfs map and wake up anyone
* waiting for map space. Be sure pages are flushed back to vnode.
*/
int
mapfs_map_remove(vmp, start, end, flush)
struct vm_info *vmp;
vm_offset_t start;
vm_size_t end;
boolean_t flush;
{
vm_object_t object;
int error = 0;
/*
* Note: If we do need to flush, the vmp is already
* locked at this point.
*/
if (flush) {
/* vmp->map_count++; *//* prevent recursive flushes */
error = vmp_push(vmp);
/* vmp->map_count--;*/
if (error)
goto out;
}
/*
* Free the address space.
*/
mfs_alloc_lock();
vm_map_remove(mfs_map, start, end);
if (mfs_alloc_wanted) {
mfs_alloc_wanted = FALSE;
thread_wakeup(&mfs_map);
}
mfs_alloc_unlock();
/*
* Deactivate the pages.
*/
object = vmp->object;
if (object != VM_OBJECT_NULL) {
vm_object_lock(object);
vm_object_deactivate_pages_first(object);
vm_object_unlock(object);
}
out:
return(error);
}
#if PERFMODS
/*
* mapfs_map_cleanup:
*
* Remove specified address range from the mfs map and wake up anyone
* waiting for map space. Be sure pages are flushed back to vnode.
*/
int
mapfs_map_cleanup(vmp, start, end, flush)
struct vm_info *vmp;
vm_offset_t start;
vm_size_t end;
boolean_t flush;
{
/*
* Free the address space.
*/
mfs_alloc_lock();
vm_map_remove(mfs_map, start, end);
if (mfs_alloc_wanted) {
mfs_alloc_wanted = FALSE;
thread_wakeup(&mfs_map);
}
mfs_alloc_unlock();
return(0);
}
#endif
#ifdef notdef
vnode_size(vp)
struct vnode *vp;
{
struct vattr vattr;
VOP_GETATTR(vp, &vattr, u.u_cred,p);
return(vattr.va_size);
}
#endif /* notdef */
int active_mfsbufs = 0; /* global record of buf count in use by mfs */
extern int nbuf;
extern int nmfsbuf; /* global limit to mfs buffer allocation */
int
mapfs_io(vp, uio, rw, ioflag, cred)
register struct vnode *vp;
register struct uio *uio;
enum uio_rw rw;
int ioflag;
struct ucred *cred;
{
register vm_offset_t va;
register struct vm_info *vmp;
register int n, diff, bsize;
int error=0;
#if PERFMODS
vm_offset_t newoffset;
vm_size_t newsize;
vm_size_t mapfsio_size;
#endif
struct ucred *cr;
struct proc *p;
if (uio->uio_resid == 0) {
return (0);
}
if ((int) uio->uio_offset < 0 ||
(int) ((int)uio->uio_offset + uio->uio_resid) < 0) {
return (EINVAL);
}
mfs_assert(vp->v_type==VREG || vp->v_type==VLNK);
p = uio->uio_procp;
if (p && (vp->v_type == VREG) &&
uio->uio_offset + uio->uio_resid >
p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
psignal(p, SIGXFSZ);
return (EFBIG);
}
/*
* The following code is adapted from code in nfs_bio{read,write}.
* The point of having it here is to keep us as synchronized with the
* server as we would have been had the nfs file not been mapped. Also
* helping in that synchronization goal are the mapfs_memfree calls in
* nfs_{get,load}attrcache.
*/
if (vp->v_tag == VT_NFS) {
struct nfsnode *np = VTONFS(vp);
struct proc *p = uio->uio_procp;
struct vattr vattr;
if (rw == UIO_WRITE) {
NFSTRACE4(NFSTRC_MIO_WRT, vp,
uio->uio_offset, uio->uio_resid,
(ioflag & IO_APPEND ? 0x0010 : 0) |
(ioflag & IO_SYNC ? 0x0020 : 0) |
(np->n_flag & NMODIFIED ? 0x0001 : 0) |
(vp->v_vm_info->nfsdirty ? 0x0002 : 0));
if (ioflag & (IO_APPEND | IO_SYNC)) {
if (np->n_flag & NMODIFIED || vp->v_vm_info->nfsdirty) {
np->n_attrstamp = 0;
if ((error = nfs_vinvalbuf(vp, V_SAVE,
cred, p, 1)))
return (error);
}
if (ioflag & IO_APPEND) {
np->n_attrstamp = 0;
if ((error = VOP_GETATTR(vp, &vattr,
cred, p)))
return (error);
}
}
} else { /* UIO_READ we presume */
NFSTRACE4(NFSTRC_MIO_READ, vp,
uio->uio_offset, uio->uio_resid,
(np->n_flag & NMODIFIED ? 0x0001 : 0) |
(vp->v_vm_info->nfsdirty ? 0x0002 : 0));
if (np->n_flag & NMODIFIED || vp->v_vm_info->nfsdirty) {
np->n_attrstamp = 0;
if ((error = VOP_GETATTR(vp, &vattr, cred, p)))
return (error);
np->n_mtime = vattr.va_mtime.tv_sec;
} else {
if ((error = VOP_GETATTR(vp, &vattr, cred, p)))
return (error);
else if (np->n_mtime != vattr.va_mtime.tv_sec) {
NFSTRACE(NFSTRC_MIO_RINV, vp);
if ((error = nfs_vinvalbuf(vp, V_SAVE,
cred, p, 1)))
return (error);
np->n_mtime = vattr.va_mtime.tv_sec;
}
}
}
}
error = mapfs_get(vp, (vm_offset_t)uio->uio_offset, uio->uio_resid);
if (error)
goto out;
vmp = vp->v_vm_info;
if ((rw == UIO_WRITE) && (ioflag & IO_APPEND)) {
uio->uio_offset = vmp->vnode_size;
}
#if PERFMODS
bsize = PAGE_SIZE;
#else
bsize = vp->v_mount->mnt_stat.f_bsize;
#define MAPFS_DEFAULT_BLOCKSIZE 4096
/* In some cases the f_bsize is not set; then force it to
* default; porbably should consider changing to f_iosize
* but not sure whether this will be any accurate either
* We need this anyway
*/
if (bsize == 0)
bsize = MAPFS_DEFAULT_BLOCKSIZE;
#endif
/*
* Set credentials.
*/
if (rw == UIO_WRITE || (rw == UIO_READ && vmp->cred == NULL)) {
cred = crdup(cred);
cr = vmp->cred;
if (cr != NOCRED) {
vmp->cred = NOCRED;
crfree(cr);
}
vmp->cred = cred;
}
/* Clear errors before we start */
vmp->error = 0;
#if PERFMODS
if (rw == UIO_WRITE) {
/*
* set up range for this I/O
*/
newoffset = uio->uio_offset;
newsize = uio->uio_resid;
if (vmp->dirtysize) {
/*
* if a dirty range already exists, coalesce with the new range, but
* don't update the vmp fields yet, because if there was no intersection
* between the old range and the range that encompasses the new I/O
* we may want to push the old range and not do the coalesce if the new coalesced
* size exceeds CHUNK_SIZE
*/
if (newoffset > vmp->dirtyoffset)
newoffset = vmp->dirtyoffset;
if ((uio->uio_offset + uio->uio_resid) > (vmp->dirtyoffset + vmp->dirtysize))
newsize = (uio->uio_offset + uio->uio_resid) - newoffset;
else
newsize = (vmp->dirtyoffset + vmp->dirtysize) - newoffset;
if (newsize > CHUNK_SIZE && ((uio->uio_offset > (vmp->dirtyoffset + vmp->dirtysize)) ||
(uio->uio_offset + uio->uio_resid) < vmp->dirtyoffset)) {
/*
* the new coalasced size exceeded CHUNK_SIZE, and there was no intersection
* with the current dirty range, so push the current dirty range....
* the new dirty range will be set to the range encompassing this I/O request
*/
vmp_push_range(vmp, vmp->dirtyoffset, vmp->dirtysize);
newoffset = uio->uio_offset;
newsize = uio->uio_resid;
}
}
/*
* now make sure that the proposed dirty range is fully encompassed by the
* current vm mapping of the file... if not, we'll clip at either end
* if there is no intersection at all with the current mapping, than
* we'll set the dirty size to 0.... note that any previous dirty pages would
* have been pushed above since they must have fit in the current mapping and
* if the new range doesn't intersect with the current mapping, than we couldn't
* have coalesced with the old range... in this case, we'll be going through the
* remap path before issuing any I/O... that path will set the dirty range accordingly
*/
if (newoffset < vmp->offset) {
if ((vmp->offset - newoffset) < newsize)
newsize -= vmp->offset - newoffset;
else
newsize = 0;
newoffset = vmp->offset;
}
if ((newoffset + newsize) > (vmp->offset + vmp->size))
newsize = (vmp->offset + vmp->size) - newoffset;
vmp->dirtyoffset = newoffset;
vmp->dirtysize = newsize;
}
#endif /* PERFMODS */
do {
n = MIN((unsigned)bsize, uio->uio_resid);
if (rw == UIO_READ) {
/*
* only read up to the end of the file
*/
if ((diff = (int)(vmp->vnode_size - uio->uio_offset)) <= 0) {
mapfs_put(vp);
return (0);
}
if (diff < n)
n = diff;
} else if (((vm_size_t)uio->uio_offset) + n > vmp->vnode_size)
vmp->vnode_size = (vm_size_t)uio->uio_offset + n;
/*
* Check to be sure we have a valid window
* for the mapped file.
*/
if (((vm_offset_t)uio->uio_offset < vmp->offset) ||
(((vm_offset_t)uio->uio_offset + n) > (vmp->offset + vmp->size))) {
if ((mapfsio_size = (vmp->size << 1)) > mfs_max_window)
mapfsio_size = mfs_max_window;
error = remap_vnode(vp, (vm_offset_t)uio->uio_offset, mapfsio_size);
/*
* remap_vnode does a push of the dirty pages and then
* sets vmp->dirtyoffset and vmp->dirtysize to 0
*/
if (error)
goto out;
/*
* new dirty range encompasses the remaining I/O of this request
*/
vmp->dirtyoffset = uio->uio_offset;
vmp->dirtysize = uio->uio_resid;
/*
* make sure the new dirty range doesn't extend beyond the end of the map
*/
if ((vmp->dirtyoffset + vmp->dirtysize) > (vmp->offset + vmp->size))
vmp->dirtysize = (vmp->offset + vmp->size) - vmp->dirtyoffset;
}
va = vmp->va + (vm_offset_t)uio->uio_offset - vmp->offset;
vmp->busy = TRUE;
if (rw == UIO_WRITE)
vmp->nfsdirty = TRUE;
error = uiomove((caddr_t)va, (int)n, uio);
vmp->busy = FALSE;
if (error)
goto out;
if (vmp->delayed_fsync) {
vmp->delayed_fsync = FALSE;
if (rw == UIO_WRITE)
vmp->dirtysize = uio->uio_offset - vmp->dirtyoffset;
error = vmp_push_range(vmp, vmp->dirtyoffset, vmp->dirtysize);
if (error)
goto out;
if (rw == UIO_WRITE) {
/*
* new dirty range encompasses the remaining I/O of this request
*/
vmp->dirtyoffset = uio->uio_offset;
vmp->dirtysize = uio->uio_resid;
/*
* make sure the new dirty range doesn't extend beyond
* the end of the map
*/
if ((vmp->dirtyoffset + vmp->dirtysize) > (vmp->offset + vmp->size))
vmp->dirtysize = (vmp->offset + vmp->size) - vmp->dirtyoffset;
}
} else if (rw == UIO_WRITE)
/*
* Set dirty bit each time through loop just in
* case remap above caused it to be cleared.
*/
vmp->dirty = TRUE;
/*
* Check for errors left by the pager. Report the
* error only once.
*/
if (vmp->error) {
error = vmp->error;
vmp->error = 0;
/*
* The error might have been a permission
* error based on the credential. We release it
* so that the next person who tries a read doesn't
* get stuck with it.
*/
cr = vmp->cred;
if (cr != NOCRED) {
vmp->cred = NOCRED;
crfree(cr);
}
}
/*
* Test to prevent mfs from swamping the buffer cache,
* locking out higher-priority transfers, like
* pageins, and causing system hangs.
*/
} while (error == 0 && uio->uio_resid > 0);
#if PERFMODS
if (error == 0 && rw == UIO_WRITE) {
/*
* Since the window may be as much as 4 Mbytes; write it out
* when we reach or exceed CHUNK_SIZE to avoid flooding the
* underlying disks with a huge stream of writes all at once
*/
if ((ioflag & IO_SYNC) || vmp->dirtysize >= CHUNK_SIZE) {
error = vmp_push_range(vmp, vmp->dirtyoffset, vmp->dirtysize);
if (error == 0 && (ioflag & IO_SYNC)) {
error = VOP_FSYNC(vp, cred, MNT_WAIT, (struct proc *)0);
if (error)
goto out;
}
/* This looks like redundant info; but I am keeping this
* as this worked at least from one reported case
*/
if (vmp->error) {
error = vmp->error;
vmp->error = 0;
}
}
}
#else
if (
(error == 0) &&
(rw == UIO_WRITE) &&
(ioflag & IO_SYNC)) {
error = vmp_push(vmp); /* initiate all i/o */
if (!error) {
error = VOP_FSYNC(vp, cred, MNT_WAIT, (struct proc *)0);
if (error)
goto out;
}
/* This looks like redundant info; but I am keeping this
* as this worked at least from one reported case
*/
if (vmp->error) {
error = vmp->error;
vmp->error = 0;
}
}
#endif /* PERFMODS */
out:
mapfs_put(vp);
return(error);
}
/*
* mapfs_sync:
*
* Sync the mfs cache (called by sync()).
*/
int
mapfs_sync()
{
register struct vm_info *vmp, *next;
int last_version;
int error = 0;
vm_info_lock();
last_version = vm_info_version;
vmp = (struct vm_info *) queue_first(&vm_info_queue);
while (!queue_end(&vm_info_queue, (queue_entry_t) vmp)) {
next = (struct vm_info *) queue_next(&vmp->lru_links);
if (vmp->dirty) {
vm_info_unlock();
vmp_get(vmp);
error = vmp_push(vmp);
vmp_put(vmp);
if (error)
goto out;
vm_info_lock();
/*
* Since we unlocked, the get and put
* operations would increment version by
* two, so add two to our version.
* If anything else happened in the meantime,
* version numbers will not match and we
* will restart.
*/
last_version += 2;
}
/*
* If the version didn't change, just keep scanning
* down the queue. If the version did change, we
* need to restart from the beginning.
*/
if (last_version == vm_info_version) {
vmp = next;
}
else {
vmp = (struct vm_info *) queue_first(&vm_info_queue);
last_version = vm_info_version;
}
}
vm_info_unlock();
out:
return(error);
}
/*
* Sync pages in specified vnode.
*/
int
mapfs_fsync(vp)
struct vnode *vp;
{
struct vm_info *vmp;
int error=0;
vmp = vp->v_vm_info;
if (vp->v_type == VREG && vmp != VM_INFO_NULL && vmp->mapped) {
vmp_get(vmp);
error = vmp_push(vmp);
vmp_put(vmp);
return(error);
}
return(0);
}
#if 0 /* dead code elimination */
/*
* Sync pages in specified vnode, annd invalidate clean.
* The vm_info lock protects the vm_info from modification,
* or removal. XXX Must protect against sync/invalidate race
*/
int
mapfs_fsync_invalidate(vp, flag)
struct vnode *vp;
{
struct vm_info *vmp;
vmp = vp->v_vm_info;
if (vp->v_type == VREG && vmp != VM_INFO_NULL && vmp->mapped) {
/* Part of vmp_get(vmp), we don't actually
* need the write lock if we hold a ref as
* we are not changing the vm_info data
*
* Remove from LRU list (if its there).
*/
vm_info_lock();
if (vmp->queued) {
vm_info_dequeue(vmp);
}
vmp->use_count++; /* to protect requeueing in vmp_put */
vm_info_unlock();
if (!(flag & MFS_NOFLUSH))
vmp_push_all(vmp);
/* This is not under a lock, nor is it in vm_put XXX */
/* But it is below */
if (!(flag & MFS_NOINVALID)){
vmp->invalidate = FALSE;
vmp_invalidate(vmp);
}
/*
* Place back on LRU list if noone else using it.
*/
vm_info_lock();
if (--vmp->use_count == 0) {
vm_info_enqueue(vmp);
}
vm_info_unlock();
return(vmp->error);
}
return(0);
}
#endif
/*
* Invalidate pages in specified vnode.
*/
int
mapfs_invalidate(vp)
struct vnode *vp;
{
struct vm_info *vmp;
vmp = vp->v_vm_info;
if (vp->v_type == VREG && vmp != VM_INFO_NULL && vmp->mapped) {
if (vmp->use_count > 0)
vmp->invalidate = TRUE;
else {
vmp_get(vmp);
vmp_invalidate(vmp);
vmp_put(vmp);
}
}
return(vmp ? vmp->error : 0);
}
#import <vm/vm_page.h>
#import <vm/vm_object.h>
/*
* Search for and flush pages in the specified range. For now, it is
* unnecessary to flush to disk since I do that synchronously.
*/
void vno_flush(vp, start, size)
struct vnode *vp;
register vm_offset_t start;
vm_size_t size;
{
register vm_offset_t end;
register vm_object_t object;
register vm_page_t m;
object = vp->v_vm_info->object;
if (object == VM_OBJECT_NULL)
return;
#if SCRUBVM3
/* Isn't this the wrong order to aquire the lock */
#endif
vm_page_lock_queues();
vm_object_lock(object); /* mfs code holds reference */
end = round_page(size + start); /* must be first */
start = trunc_page(start);
while (start < end) {
m = vm_page_lookup(object, start);
if (m != VM_PAGE_NULL) {
if (m->busy) {
#if SCRUBVM3
/* THIS SHOULD NOT HAPPEN IF ONLY ASYNC
* on SWAP */
/* hint if we miss it its ok */
if (m->dry_vp){
/* object and page queues locked, note
* page might not be clean wrt backing
* store */
(void) vm_page_completeio(m, TRUE);
} else {
#endif
PAGE_ASSERT_WAIT(m, FALSE);
vm_object_unlock(object);
vm_page_unlock_queues();
thread_block();
vm_page_lock_queues();
vm_object_lock(object);
continue; /* try again */
#if SCRUBVM3
}
#endif
}
vm_page_free(m);
}
start += PAGE_SIZE;
}
vm_object_unlock(object);
vm_page_unlock_queues();
}
int mfs_mdirty;
int mfs_mclean;
/*
* Search for and free pages in the specified vmp.
*/
void
vmp_invalidate(struct vm_info *vmp)
{
register vm_object_t object;
register vm_page_t m;
NFSTRACE(NFSTRC_VMP_INV, vmp);
object = vmp->object;
if (object == VM_OBJECT_NULL)
return;
vm_page_lock_queues();
vm_object_lock(object); /* mfs code holds reference */
/* Sanity. Different code calls this with and without the vminfo
* lock. The locking needs to be fixed for MP. XXX
*/
if (vmp->object != object) {
vm_object_unlock(object);
vm_page_unlock_queues();
return;
}
retry:
m = (vm_page_t) queue_first(&object->memq);
while (!queue_end(&object->memq, (queue_entry_t) m)) {
vm_page_t next = (vm_page_t) queue_next(&m->listq);
/* If NFS is paging us in we are not really valid yet. XXX
* Re-address this. Without this check we can block forever
* waiting on the busy bit that we set. */
if (m->nfspagereq == TRUE){
m = next;
continue;
}
if (m->busy) {
#if SCRUBVM3
/* THIS SHOULD NOT HAPPEN IF ONLY ASYNC
* on SWAP */
/* hint if we miss it its ok */
if (m->dry_vp){
/* object and page queues locked, note
* page might not be clean wrt backing
* store */
(void) vm_page_completeio(m, TRUE);
} else {
#endif
PAGE_ASSERT_WAIT(m, FALSE);
vm_object_unlock(object);
vm_page_unlock_queues();
thread_block();
vm_page_lock_queues();
vm_object_lock(object);
goto retry;
#if SCRUBVM3
}
#endif
}
/* Kill off the translation as well.
* mapfs_map_remove removes them as well, but as
* we have seen not everyone calls that.
*
* If there is a ref to this file and we are being called
* and the page is wired we will skip this page. If there
* are no more refs to this file and we are being called
* the wire count should always be zero. In the future we
* may want to block on the wired count. XXX joh
*/
if (m->wire_count == 0){
pmap_remove_all(VM_PAGE_TO_PHYS(m));
/* In the case of mfs only one guy can be in here at a
* a time. In the case of mmap they can be dirtying
* pages in parallel . So after our sync and invalidate
* above we need to check again. If someone has re-
* written them again, then they get to keep the page.
* NFS does not give any assurances for multiple
* writers on different nodes.
*/
if ((m->clean == FALSE) ||
pmap_is_modified(VM_PAGE_TO_PHYS(m))){
mfs_mdirty++;
} else {
mfs_mclean++;
vm_page_free(m);
}
}
m = next;
}
vm_object_unlock(object);
vm_page_unlock_queues();
}
/*
* Search for and push (to disk) pages in the specified range.
* We need some better interactions with the VM system to simplify
* the code. Force tries to push the object regardless of whether
* the MFS thinks it is dirty (mmap could have written it). Some day
* vmp_push could support ranges vmp_push(vmp,start,size).
*/
/* Something must be done to handle dirty wired pages. XXX joh */
int
vmp_push(vmp)
struct vm_info *vmp;
{
register vm_offset_t start;
vm_size_t size;
int error=0;
if (!vmp->dirty)
return(0);
start = vmp->offset;
size = vmp->size;
/* vmp->dirty is set FALSE in vmp_push_range */
error = vmp_push_range(vmp, start, size);
return(error);
}
int
vmp_push_range(vmp, start, size)
struct vm_info *vmp;
register vm_offset_t start;
vm_size_t size;
{
register vm_offset_t end;
register vm_object_t object;
register vm_page_t m;
struct vattr vattr;
int error=0;
NFSTRACE4(NFSTRC_VPR, vmp->vnode, start, size, vmp->busy);
if (!vmp->dirty)
return(0);
if (vmp->busy) {
vmp->delayed_fsync = TRUE;
return(0);
}
vmp->dirty = FALSE;
vmp->dirtysize = 0;
vmp->dirtyoffset = 0;
object = vmp->object;
/* We are trying to catch BSd error; no need to bother
* about these errors for now
*/
if (object == VM_OBJECT_NULL)
return(0);
vm_page_lock_queues();
vm_object_lock(object); /* mfs code holds reference */
end = round_page(size + start); /* must be first */
start = trunc_page(start);
/* Cleanup error before we start */
vmp->error = 0;
while (start < end) {
m = vm_page_lookup(object, start);
/* We don't want to deadlock on the page we are bring in */
if ((m != VM_PAGE_NULL) && (m->nfspagereq == FALSE)){
if (m->busy) {
#if SCRUBVM3
/* THIS SHOULD NOT HAPPEN IF ONLY ASYNC
* on SWAP */
/* hint if we miss it its ok */
if (m->dry_vp){
/* object and page queues locked, note
* page might not be clean wrt backing
* store */
(void) vm_page_completeio(m, TRUE);
} else {
#endif
PAGE_ASSERT_WAIT(m, FALSE);
vm_object_unlock(object);
vm_page_unlock_queues();
thread_block();
vm_page_lock_queues();
vm_object_lock(object);
continue; /* try again */
#if SCRUBVM3
}
#endif
}
if (!m->active) {
vm_page_activate(m); /* so deactivate works */
}
vm_page_deactivate(m); /* gets dirty/laundry bit */
/*
* Prevent pageout from playing with
* this page. We know it is inactive right
* now (and are holding lots of locks keeping
* it there).
*/
queue_remove(&vm_page_queue_inactive, m, vm_page_t,
pageq);
m->inactive = FALSE;
vm_page_inactive_count--;
m->busy = TRUE;
if (m->laundry) {
pager_return_t ret;
pmap_remove_all(VM_PAGE_TO_PHYS(m));
object->paging_in_progress++;
vm_object_unlock(object);
vm_page_unlock_queues();
/* should call pageout daemon code */
ret = vnode_pageout(m);
vm_page_lock_queues();
vm_object_lock(object);
object->paging_in_progress--;
if (ret == PAGER_SUCCESS) {
/* vnode_pageout marks clean */
#if PERFMODS
pmap_clear_reference(VM_PAGE_TO_PHYS(m));
#endif /* PERFMODS */
m->laundry = FALSE;
} else {
/* don't set dirty bit, unrecoverable
errors will cause update to go
crazy. User is responsible for
retrying the write */
/* vmp->dirty = TRUE; */
error = vmp->error;
vmp->error =0;
}
/* if pager failed, activate below */
}
vm_page_activate(m);
m->busy = FALSE;
PAGE_WAKEUP(m);
}
start += PAGE_SIZE;
}
vmp->nfsdirty = FALSE;
vm_object_unlock(object);
vm_page_unlock_queues();
/*
* On error we have to reset the true file size in the vmp
* structure. The lack of a credential structure pointer
* would indicate nothing was changing in the file.
*/
if (error && vmp->cred) {
vmp->filesize=TRUE;
VOP_GETATTR (vmp->vnode, &vattr, vmp->cred, current_proc());
vmp->filesize=FALSE;
vmp->vnode_size = vattr.va_size;
}
NFSTRACE(NFSTRC_VPR_DONE, error);
return(error);
}
#if 0 /* dead code elimination */
/* Something must be done to handle dirty wired pages. XXX joh */
void
vmp_push_all(vmp)
struct vm_info *vmp;
{
register vm_object_t object;
register vm_page_t m;
struct vattr vattr;
int error=0;
vmp->dirty = FALSE;
object = vmp->object;
if (object == VM_OBJECT_NULL)
return;
vm_page_lock_queues();
vm_object_lock(object); /* mfs code holds reference */
retry:
m = (vm_page_t) queue_first(&object->memq);
while (!queue_end(&object->memq, (queue_entry_t) m)) {
/* We don't want to deadlock on the page we are bring in */
if (m->nfspagereq == FALSE){
if (m->busy) {
#if SCRUBVM3
/* THIS SHOULD NOT HAPPEN IF ONLY ASYNC
* on SWAP */
/* hint if we miss it its ok */
if (m->dry_vp){
/* object and page queues locked, note
* page might not be clean wrt backing
* store */
(void) vm_page_completeio(m, TRUE);
} else {
#endif
PAGE_ASSERT_WAIT(m, FALSE);
vm_object_unlock(object);
vm_page_unlock_queues();
thread_block();
vm_page_lock_queues();
vm_object_lock(object);
/* Page may be long gone, XXX Forward
* progress */
goto retry;
#if SCRUBVM3
}
#endif
}
if (!m->active) {
vm_page_activate(m); /* so deactivate works */
}
vm_page_deactivate(m); /* gets dirty/laundry bit */
/*
* Prevent pageout from playing with
* this page. We know it is inactive right
* now (and are holding lots of locks keeping
* it there).
*/
queue_remove(&vm_page_queue_inactive, m, vm_page_t,
pageq);
m->inactive = FALSE;
vm_page_inactive_count--;
m->busy = TRUE;
if (m->laundry) {
pager_return_t ret;
pmap_remove_all(VM_PAGE_TO_PHYS(m));
object->paging_in_progress++;
vm_object_unlock(object);
vm_page_unlock_queues();
/* should call pageout daemon code */
ret = vnode_pageout(m);
vm_page_lock_queues();
vm_object_lock(object);
object->paging_in_progress--;
if (ret == PAGER_SUCCESS) {
/* vnode_pageout marks clean */
m->laundry = FALSE;
} else {
/* don't set dirty bit, unrecoverable
errors will cause update to go
crazy. User is responsible for
retrying the write */
/* vmp->dirty = TRUE; */
error = vmp->error;
vmp->error=0;
}
/* if pager failed, activate below */
}
vm_page_activate(m);
m->busy = FALSE;
PAGE_WAKEUP(m);
}
m = (vm_page_t) queue_next(&m->listq);
}
vmp->nfsdirty = FALSE;
vm_object_unlock(object);
vm_page_unlock_queues();
/*
* On error we have to reset the true file size in the vmp
* structure. The lack of a credential structure pointer
* would indicate nothing was changing in the file.
*/
if (error && vmp->cred) {
vmp->filesize=TRUE;
VOP_GETATTR (vmp->vnode, &vattr, vmp->cred, current_proc());
vmp->filesize=FALSE;
vmp->vnode_size = vattr.va_size;
}
}
#endif
vm_size_t vm_get_vnode_size(struct vnode *vp)
{
return(vp->v_vm_info->vnode_size);
}
void vm_set_vnode_size(struct vnode *vp, vm_size_t vnode_size)
{
vp->v_vm_info->vnode_size = vnode_size;
}
void vm_set_close_flush(struct vnode *vp, boolean_t close_flush)
{
vp->v_vm_info->close_flush = close_flush ? 1 : 0;
}
void vm_set_error(struct vnode *vp, int error)
{
vp->v_vm_info->error = error;
}
#endif /* MACH_NBC ] */