Source to kern/kern_physio.c
/*
* Copyright (c) 1982, 1986, 1990 Regents of the University of California.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* from: @(#)kern_physio.c 7.20 (Berkeley) 5/11/91
* kern_physio.c,v 1.6 1993/07/08 10:53:40 cgd Exp
*/
#include "param.h"
#include "systm.h"
#include "buf.h"
#include "conf.h"
#include "proc.h"
/* #include "seg.h" XXX - cgd */
#include "trace.h"
/* #include "map.h" XXX - cgd */
#include "vnode.h"
#include "specdev.h"
#ifdef HPUXCOMPAT
#include "user.h"
#endif
#include "malloc.h" /* XXX -- i had to add this, so it could very
* well be wrong... -- cgd
*/
/*
* This routine does raw device I/O for a user process.
*
* If the user has the proper access privileges, the process is
* marked 'delayed unlock' and the pages involved in the I/O are
* faulted and locked. After the completion of the I/O, the pages
* are unlocked.
*/
int
physio(strat, bp, dev, rw, mincnt, uio)
int (*strat)();
register struct buf *bp;
dev_t dev;
int rw;
u_int (*mincnt)();
struct uio *uio;
{
/*
* Body deleted
*
* And reimplemented by cgd.
* comments in brackets are my own, the rest come from
* _The Design and Implementation of the 4.3 BSD UNIX Operating System_
* by Leffler, McKusick, et al., page 232
*
* also, parts of this snarfed from wfj's physio, but *it*
* was mostly ick.
*/
int s, i;
int error, wanted,
nobuf = (bp == NULL);
if (nobuf) { /* [ if we have no buffer, we need one... ] */
/* [ so malloc it... XXX? ] */
bp = (struct buf *)malloc(sizeof(*bp), M_TEMP, M_WAITOK);
bzero(bp, sizeof(*bp));
}
/* check user read/write access to the data buffer
* [ and if no access, then return EFAULT ]
*/
for (i = 0; i < uio->uio_iovcnt; i++) {
/* [ check each iov one by one. note that we're
* given an rw param, so we ignore the uio's
* rw parameter... also note that if we're
* doing a read, that's a *write* to user-space... ]
*/
if (!useracc(uio->uio_iov[i].iov_base,
uio->uio_iov[i].iov_len,
(rw == B_READ) ? B_WRITE : B_READ)) {
if (nobuf)
free(bp, M_TEMP);
return EFAULT;
}
}
s = splbio(); /* raise the processor priority to splbio */
/* while (the buffer is marked busy) { */
while (bp->b_flags & B_BUSY) {
bp->b_flags |= B_WANTED; /* mark the buffer wanted */
/* wait until the buffer is available */
tsleep((caddr_t) bp, PRIBIO+1, "physbuf", 0);
} /* } */
/* [ mark it as busy so it's not reused by somebody else ] */
bp->b_flags |= B_BUSY;
splx(s); /* lower the priority level */
error = 0;
/* [ for each element of the iov ] */
for (i = 0; i < uio->uio_iovcnt && !error; i++) {
struct iovec *iovp;
iovp = &uio->uio_iov[i];
/* set up the fixed part of the buffer for a transfer */
/* [ also, clear error flag, but that's done later ] */
bp->b_error = 0; /* no error yet */
bp->b_proc = curproc; /* on behalf of this process */
/* while (there are data to transfer and no I/O error) { */
while (iovp->iov_len > 0 && !error) {
int todo, done;
caddr_t tmp;
/* mark the buffer busy for physical I/O
*/
bp->b_flags = B_BUSY | B_PHYS | rw;
/* set up the buffer for a maximum-sized transfer */
bp->b_dev = dev;
bp->b_blkno = btodb(uio->uio_offset);
bp->b_bcount = iovp->iov_len;
/* [ base of buffer is iov's ] */
bp->b_un.b_addr = iovp->iov_base;
/* call minphys [actually mincnt] to bound the transfer size */
todo = (*mincnt)(bp);
/* [ and if it returns zero, e.g. in the "end of disk"
* case, bail... ] */
if (todo == 0)
goto leave;
/* lock the part of the user address space involved in
* the transfer
* [ vmapbuf clobbers the b_addr, so save it first ]
*/
tmp = bp->b_un.b_addr;
vslock(bp->b_un.b_addr, todo);
vmapbuf(bp);
/* call strategy to start the transfer
* [ some of the next bit snarfed from wfj's machdep.c ]
*/
(*strat)(bp);
s = splbio(); /* raise the priority level to splbio */
/* wait for the transfer to complete */
while ((bp->b_flags & B_DONE) == 0)
tsleep((caddr_t) bp, PRIBIO, "physio", 0);
/* unlock the part of the address space previously locked */
vunmapbuf(bp);
vsunlock(tmp, todo, 0); /* [ 3rd param unused!!! ] */
splx(s); /* lower the priority level */
/* deduct the transfer size from the total number
* of data to transfer
*/
done = bp->b_bcount - bp->b_resid;
iovp->iov_len -= done;
iovp->iov_base += done;
uio->uio_offset += done;
uio->uio_resid -= done;
/* [ set error from the buffer's error code, and
* do other miscellaneous cleanup on the buffer ]
*/
/* [ XXX this B_INVAL trick is bullshit for broken
* vfs_bio where it wants to rehash a buf if
* and error, but buf is not already B_INVAL ]
*/
if (nobuf && ((bp->b_flags & B_ERROR) || bp->b_error))
bp->b_flags |= B_INVAL; /* XXX */
error = biowait(bp);
/* [ handle disks like they want to be handled ] */
if (bp->b_flags & B_ERROR || done < todo)
goto leave;
} /* } */
}
leave:
if (nobuf) { /* [ if we had to allocate it, get rid of it ] */
if (bp->b_vp) /* [ have a vnode; dissociate from it ] */
brelvp(bp);
/* XXX any other buf fields which should be taken care of
* if they had values?
*/
free(bp, M_TEMP);
} else {
/* clean up the state of the buffer */
wanted = bp->b_flags & B_WANTED;
bp->b_flags &= ~(B_BUSY | B_WANTED | B_PHYS | B_RAW);
/* if (another process is waiting for the raw I/O buffer) */
if (wanted)
/* wake up process waiting to do physical I/O */
wakeup((caddr_t) bp);
}
/* [ finally, if there's an error, return it ] */
return error;
}
/*
* Calculate the maximum size of I/O request that can be requested
* in a single operation. This limit is necessary to prevent a single
* process from being able to lock more than a fixed amount of memory
* in the kernel.
*/
u_int
minphys(bp)
struct buf *bp;
{
/*
* Body deleted.
*
* and reimplemented by cgd.
* Leffler, McKusick, et al., says on p. 231:
* "The minphys() routine is called by physio() to adjust the
* size of each I/O transfer before the latter is passed to
* the strategy routine..."
*
* so, just adjust the buffer's count accounting to MAXPHYS here,
* and return the new count;
*/
bp->b_bcount = min(MAXPHYS, bp->b_bcount);
return bp->b_bcount;
}
/*
* Do a read on a device for a user process.
*/
int
rawread(dev, uio)
dev_t dev;
struct uio *uio;
{
return (physio(cdevsw[major(dev)].d_strategy, (struct buf *)NULL,
dev, B_READ, minphys, uio));
}
/*
* Do a write on a device for a user process.
*/
int
rawwrite(dev, uio)
dev_t dev;
struct uio *uio;
{
return (physio(cdevsw[major(dev)].d_strategy, (struct buf *)NULL,
dev, B_WRITE, minphys, uio));
}