Annotation of XNU/bsd/vfs/vfs_bio.c, revision 1.1

1.1     ! root        1: /*
        !             2:  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
        !             3:  *
        !             4:  * @APPLE_LICENSE_HEADER_START@
        !             5:  * 
        !             6:  * The contents of this file constitute Original Code as defined in and
        !             7:  * are subject to the Apple Public Source License Version 1.1 (the
        !             8:  * "License").  You may not use this file except in compliance with the
        !             9:  * License.  Please obtain a copy of the License at
        !            10:  * http://www.apple.com/publicsource and read it before using this file.
        !            11:  * 
        !            12:  * This Original Code and all software distributed under the License are
        !            13:  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
        !            14:  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
        !            15:  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
        !            16:  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
        !            17:  * License for the specific language governing rights and limitations
        !            18:  * under the License.
        !            19:  * 
        !            20:  * @APPLE_LICENSE_HEADER_END@
        !            21:  */
        !            22: /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
        !            23: /*-
        !            24:  * Copyright (c) 1994 Christopher G. Demetriou
        !            25:  * Copyright (c) 1982, 1986, 1989, 1993
        !            26:  *     The Regents of the University of California.  All rights reserved.
        !            27:  * (c) UNIX System Laboratories, Inc.
        !            28:  * All or some portions of this file are derived from material licensed
        !            29:  * to the University of California by American Telephone and Telegraph
        !            30:  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
        !            31:  * the permission of UNIX System Laboratories, Inc.
        !            32:  *
        !            33:  * Redistribution and use in source and binary forms, with or without
        !            34:  * modification, are permitted provided that the following conditions
        !            35:  * are met:
        !            36:  * 1. Redistributions of source code must retain the above copyright
        !            37:  *    notice, this list of conditions and the following disclaimer.
        !            38:  * 2. Redistributions in binary form must reproduce the above copyright
        !            39:  *    notice, this list of conditions and the following disclaimer in the
        !            40:  *    documentation and/or other materials provided with the distribution.
        !            41:  * 3. All advertising materials mentioning features or use of this software
        !            42:  *    must display the following acknowledgement:
        !            43:  *     This product includes software developed by the University of
        !            44:  *     California, Berkeley and its contributors.
        !            45:  * 4. Neither the name of the University nor the names of its contributors
        !            46:  *    may be used to endorse or promote products derived from this software
        !            47:  *    without specific prior written permission.
        !            48:  *
        !            49:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
        !            50:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
        !            51:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
        !            52:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
        !            53:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
        !            54:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
        !            55:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
        !            56:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
        !            57:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
        !            58:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
        !            59:  * SUCH DAMAGE.
        !            60:  *
        !            61:  * The NEXTSTEP Software License Agreement specifies the terms
        !            62:  * and conditions for redistribution.
        !            63:  *
        !            64:  *     @(#)vfs_bio.c   8.6 (Berkeley) 1/11/94
        !            65:  */
        !            66: 
        !            67: /*
        !            68:  * Some references:
        !            69:  *     Bach: The Design of the UNIX Operating System (Prentice Hall, 1986)
        !            70:  *     Leffler, et al.: The Design and Implementation of the 4.3BSD
        !            71:  *             UNIX Operating System (Addison Welley, 1989)
        !            72:  */
        !            73: /*
        !            74:  * HISTORY
        !            75:  * 17-July-97  Umesh Vaishampayan ([email protected])
        !            76:  *     Eliminated multiple definition of buffers and buf which are defined in
        !            77:  *     conf/param.c.
        !            78:  *     Eliminated multiple definition of nbuf and bufpages which are defined
        !            79:  *     in machdep/XXX/unix_startup.c
        !            80:  *
        !            81:  * 11-July-97  Umesh Vaishampayan ([email protected])
        !            82:  *     Defined global variables for use when tracing is turned on.
        !            83:  */
        !            84: 
        !            85: #include <mach_nbc.h>
        !            86: #include <sys/param.h>
        !            87: #include <sys/systm.h>
        !            88: #include <sys/proc.h>
        !            89: #include <sys/buf.h>
        !            90: #include <sys/vnode.h>
        !            91: #include <sys/mount.h>
        !            92: #include <sys/trace.h>
        !            93: #include <sys/malloc.h>
        !            94: #include <sys/resourcevar.h>
        !            95: #include <miscfs/specfs/specdev.h>
        !            96: 
        !            97: extern void reassignbuf(struct buf *, struct vnode *);
        !            98: 
        !            99: extern int nbuf;               /* The number of buffer headers */
        !           100: extern int niobuf;
        !           101: extern struct buf *buf;                /* The buffer headers. */
        !           102: extern char    *buffers;       /* The buffer contents. */
        !           103: extern int bufpages;           /* Number of memory pages in the buffer pool. */
        !           104: struct buf *swbuf;     /* Swap I/O buffer headers. */
        !           105: int nswbuf;                    /* Number of swap I/O buffer headers. */
        !           106: struct buf bswlist;    /* Head of swap I/O buffer headers free list. */
        !           107: struct buf *bclnlist;/* Head of cleaned page list. */
        !           108: 
        !           109: #if TRACE
        !           110: struct proc *traceproc;
        !           111: int    tracewhich, tracebuf[TRCSIZ];
        !           112: u_int  tracex;
        !           113: char   traceflags[TR_NFLAGS];
        !           114: #endif /* TRACE */
        !           115: 
        !           116: /* Macros to clear/set/test flags. */
        !           117: #define        SET(t, f)       (t) |= (f)
        !           118: #define        CLR(t, f)       (t) &= ~(f)
        !           119: #define        ISSET(t, f)     ((t) & (f))
        !           120: 
        !           121: /*
        !           122:  * Definitions for the buffer hash lists.
        !           123:  */
        !           124: #define        BUFHASH(dvp, lbn)       \
        !           125:        (&bufhashtbl[((long)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash])
        !           126: LIST_HEAD(bufhashhdr, buf) *bufhashtbl, invalhash;
        !           127: u_long bufhash;
        !           128: 
        !           129: /*
        !           130:  * Insq/Remq for the buffer hash lists.
        !           131:  */
        !           132: #define        binshash(bp, dp)        LIST_INSERT_HEAD(dp, bp, b_hash)
        !           133: #define        bremhash(bp)            LIST_REMOVE(bp, b_hash)
        !           134: 
        !           135: /*
        !           136:  * Definitions for the buffer free lists.
        !           137:  */
        !           138: #define        BQUEUES         4               /* number of free buffer queues */
        !           139: 
        !           140: #define        BQ_LOCKED       0               /* super-blocks &c */
        !           141: #define        BQ_LRU          1               /* lru, useful buffers */
        !           142: #define        BQ_AGE          2               /* rubbish */
        !           143: #define        BQ_EMPTY        3               /* buffer headers with no memory */
        !           144: 
        !           145: TAILQ_HEAD(ioqueue, buf) iobufqueue;
        !           146: TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES];
        !           147: int needbuffer;
        !           148: 
        !           149: /*
        !           150:  * Insq/Remq for the buffer free lists.
        !           151:  */
        !           152: #define        binsheadfree(bp, dp)    do { \
        !           153:                                    TAILQ_INSERT_HEAD(dp, bp, b_freelist); \
        !           154:                                    (bp)->b_timestamp = time.tv_sec; \
        !           155:                                } while (0)
        !           156: 
        !           157: #define        binstailfree(bp, dp)    do { \
        !           158:                                    TAILQ_INSERT_TAIL(dp, bp, b_freelist); \
        !           159:                                    (bp)->b_timestamp = time.tv_sec; \
        !           160:                                } while (0)
        !           161: 
        !           162: 
        !           163: /* Time in seconds before a buf on a list is considered as a stale buf */
        !           164: #define LRU_IS_STALE 120 /* default value for the LRU */
        !           165: #define AGE_IS_STALE 60  /* default value for the AGE */
        !           166: 
        !           167: int lru_is_stale = LRU_IS_STALE;
        !           168: int age_is_stale = AGE_IS_STALE;
        !           169: 
        !           170: 
        !           171: 
        !           172: void
        !           173: bremfree(bp)
        !           174:        struct buf *bp;
        !           175: {
        !           176:        struct bqueues *dp = NULL;
        !           177: 
        !           178:        /*
        !           179:         * We only calculate the head of the freelist when removing
        !           180:         * the last element of the list as that is the only time that
        !           181:         * it is needed (e.g. to reset the tail pointer).
        !           182:         *
        !           183:         * NB: This makes an assumption about how tailq's are implemented.
        !           184:         */
        !           185:        if (bp->b_freelist.tqe_next == NULL) {
        !           186:                for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
        !           187:                        if (dp->tqh_last == &bp->b_freelist.tqe_next)
        !           188:                                break;
        !           189:                if (dp == &bufqueues[BQUEUES])
        !           190:                        panic("bremfree: lost tail");
        !           191:        }
        !           192:        TAILQ_REMOVE(dp, bp, b_freelist);
        !           193:        bp->b_timestamp = 0; 
        !           194: }
        !           195: 
        !           196: /*
        !           197:  * Initialize buffers and hash links for buffers.
        !           198:  */
        !           199: void
        !           200: bufinit()
        !           201: {
        !           202:        register struct buf *bp;
        !           203:        struct bqueues *dp;
        !           204:        register int i;
        !           205:        int base, residual;
        !           206: 
        !           207:        for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
        !           208:                TAILQ_INIT(dp);
        !           209:        bufhashtbl = hashinit(nbuf, M_CACHE, &bufhash);
        !           210:        base = bufpages / nbuf;
        !           211:        residual = bufpages % nbuf;
        !           212:        for (i = 0; i < nbuf; i++) {
        !           213:                bp = &buf[i];
        !           214:                bzero((char *)bp, sizeof *bp);
        !           215:                bp->b_dev = NODEV;
        !           216:                bp->b_rcred = NOCRED;
        !           217:                bp->b_wcred = NOCRED;
        !           218:                bp->b_vnbufs.le_next = NOLIST;
        !           219:                bp->b_data = buffers + i * MAXBSIZE;
        !           220:                if (i < residual)
        !           221:                        bp->b_bufsize = (base + 1) * CLBYTES;
        !           222:                else
        !           223:                        bp->b_bufsize = base * CLBYTES;
        !           224:                bp->b_flags = B_INVAL;
        !           225:                dp = bp->b_bufsize ? &bufqueues[BQ_AGE] : &bufqueues[BQ_EMPTY];
        !           226:                binsheadfree(bp, dp);
        !           227:                binshash(bp, &invalhash);
        !           228:        }
        !           229:        base = (int )(buffers + (i * MAXBSIZE));
        !           230: 
        !           231:        for (; i < nbuf + niobuf; i++) {
        !           232:                bp = &buf[i];
        !           233:                bzero((char *)bp, sizeof *bp);
        !           234:                bp->b_dev = NODEV;
        !           235:                bp->b_rcred = NOCRED;
        !           236:                bp->b_wcred = NOCRED;
        !           237:                bp->b_vnbufs.le_next = NOLIST;
        !           238:                bp->b_data = (char *)base;
        !           239:                bp->b_bufsize = 0;
        !           240:                bp->b_flags = B_INVAL;
        !           241:                binsheadfree(bp, &iobufqueue);
        !           242: 
        !           243:                base += MAXPHYSIO;
        !           244:        }
        !           245: }
        !           246: 
        !           247: __inline struct buf *
        !           248: bio_doread(vp, blkno, size, cred, async)
        !           249:        struct vnode *vp;
        !           250:        daddr_t blkno;
        !           251:        int size;
        !           252:        struct ucred *cred;
        !           253:        int async;
        !           254: {
        !           255:        register struct buf *bp;
        !           256:        struct proc     *p = current_proc();
        !           257: 
        !           258:        bp = getblk(vp, blkno, size, 0, 0);
        !           259: 
        !           260:        /*
        !           261:         * If buffer does not have data valid, start a read.
        !           262:         * Note that if buffer is B_INVAL, getblk() won't return it.
        !           263:         * Therefore, it's valid if it's I/O has completed or been delayed.
        !           264:         */
        !           265:        if (!ISSET(bp->b_flags, (B_DONE | B_DELWRI))) {
        !           266:                /* Start I/O for the buffer (keeping credentials). */
        !           267:                SET(bp->b_flags, B_READ | async);
        !           268:                if (cred != NOCRED && bp->b_rcred == NOCRED) {
        !           269:                        crhold(cred);
        !           270:                        bp->b_rcred = cred;
        !           271:                }
        !           272:                VOP_STRATEGY(bp);
        !           273: 
        !           274:                trace(TR_BREADMISS, pack(vp, size), blkno);
        !           275: 
        !           276:                /* Pay for the read. */
        !           277:                if (p && p->p_stats) 
        !           278:                        p->p_stats->p_ru.ru_inblock++;          /* XXX */
        !           279:        } else if (async) {
        !           280:                brelse(bp);
        !           281:        }
        !           282: 
        !           283:        trace(TR_BREADHIT, pack(vp, size), blkno);
        !           284: 
        !           285:        return (bp);
        !           286: }
        !           287: 
        !           288: /*
        !           289:  * Read a disk block.
        !           290:  * This algorithm described in Bach (p.54).
        !           291:  */
        !           292: int
        !           293: bread(vp, blkno, size, cred, bpp)
        !           294:        struct vnode *vp;
        !           295:        daddr_t blkno;
        !           296:        int size;
        !           297:        struct ucred *cred;
        !           298:        struct buf **bpp;
        !           299: {
        !           300:        register struct buf *bp;
        !           301: 
        !           302:        /* Get buffer for block. */
        !           303:        bp = *bpp = bio_doread(vp, blkno, size, cred, 0);
        !           304: 
        !           305:        /* Wait for the read to complete, and return result. */
        !           306:        return (biowait(bp));
        !           307: }
        !           308: 
        !           309: /*
        !           310:  * Read-ahead multiple disk blocks. The first is sync, the rest async.
        !           311:  * Trivial modification to the breada algorithm presented in Bach (p.55).
        !           312:  */
        !           313: int
        !           314: breadn(vp, blkno, size, rablks, rasizes, nrablks, cred, bpp)
        !           315:        struct vnode *vp;
        !           316:        daddr_t blkno; int size;
        !           317:        daddr_t rablks[]; int rasizes[];
        !           318:        int nrablks;
        !           319:        struct ucred *cred;
        !           320:        struct buf **bpp;
        !           321: {
        !           322:        register struct buf *bp;
        !           323:        int i;
        !           324: 
        !           325:        bp = *bpp = bio_doread(vp, blkno, size, cred, 0);
        !           326: 
        !           327:        /*
        !           328:         * For each of the read-ahead blocks, start a read, if necessary.
        !           329:         */
        !           330:        for (i = 0; i < nrablks; i++) {
        !           331:                /* If it's in the cache, just go on to next one. */
        !           332:                if (incore(vp, rablks[i]))
        !           333:                        continue;
        !           334: 
        !           335:                /* Get a buffer for the read-ahead block */
        !           336:                (void) bio_doread(vp, rablks[i], rasizes[i], cred, B_ASYNC);
        !           337:        }
        !           338: 
        !           339:        /* Otherwise, we had to start a read for it; wait until it's valid. */
        !           340:        return (biowait(bp));
        !           341: }
        !           342: 
        !           343: /*
        !           344:  * Read with single-block read-ahead.  Defined in Bach (p.55), but
        !           345:  * implemented as a call to breadn().
        !           346:  * XXX for compatibility with old file systems.
        !           347:  */
        !           348: int
        !           349: breada(vp, blkno, size, rablkno, rabsize, cred, bpp)
        !           350:        struct vnode *vp;
        !           351:        daddr_t blkno; int size;
        !           352:        daddr_t rablkno; int rabsize;
        !           353:        struct ucred *cred;
        !           354:        struct buf **bpp;
        !           355: {
        !           356: 
        !           357:        return (breadn(vp, blkno, size, &rablkno, &rabsize, 1, cred, bpp));     
        !           358: }
        !           359: 
        !           360: /*
        !           361:  * Block write.  Described in Bach (p.56)
        !           362:  */
        !           363: int
        !           364: bwrite(bp)
        !           365:        struct buf *bp;
        !           366: {
        !           367:        int rv, sync, wasdelayed;
        !           368:        struct proc     *p = current_proc();
        !           369: 
        !           370:        /* Remember buffer type, to switch on it later. */
        !           371:        sync = !ISSET(bp->b_flags, B_ASYNC);
        !           372:        wasdelayed = ISSET(bp->b_flags, B_DELWRI);
        !           373:        CLR(bp->b_flags, (B_READ | B_DONE | B_ERROR | B_DELWRI));
        !           374: 
        !           375:        if (!sync) {
        !           376:                /*
        !           377:                 * If not synchronous, pay for the I/O operation and make
        !           378:                 * sure the buf is on the correct vnode queue.  We have
        !           379:                 * to do this now, because if we don't, the vnode may not
        !           380:                 * be properly notified that its I/O has completed.
        !           381:                 */
        !           382:                if (wasdelayed)
        !           383:                        reassignbuf(bp, bp->b_vp);
        !           384:                else
        !           385:                if (p && p->p_stats) 
        !           386:                        p->p_stats->p_ru.ru_oublock++;          /* XXX */
        !           387:        }
        !           388: 
        !           389:        trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno);
        !           390: 
        !           391:        /* Initiate disk write.  Make sure the appropriate party is charged. */
        !           392:        SET(bp->b_flags, B_WRITEINPROG);
        !           393:        bp->b_vp->v_numoutput++;
        !           394:        VOP_STRATEGY(bp);
        !           395: 
        !           396:        if (sync) {
        !           397:                /*
        !           398:                 * If I/O was synchronous, wait for it to complete.
        !           399:                 */
        !           400:                rv = biowait(bp);
        !           401: 
        !           402:                /*
        !           403:                 * Pay for the I/O operation, if it's not been paid for, and
        !           404:                 * make sure it's on the correct vnode queue. (async operatings
        !           405:                 * were payed for above.)
        !           406:                 */
        !           407:                if (wasdelayed)
        !           408:                        reassignbuf(bp, bp->b_vp);
        !           409:                else
        !           410:                if (p && p->p_stats) 
        !           411:                        p->p_stats->p_ru.ru_oublock++;          /* XXX */
        !           412: 
        !           413:                /* Release the buffer. */
        !           414:                brelse(bp);
        !           415: 
        !           416:                return (rv);
        !           417:        } else {
        !           418:                return (0);
        !           419:        }
        !           420: }
        !           421: 
        !           422: int
        !           423: vn_bwrite(ap)
        !           424:        struct vop_bwrite_args *ap;
        !           425: {
        !           426: 
        !           427:        return (bwrite(ap->a_bp));
        !           428: }
        !           429: 
        !           430: /*
        !           431:  * Delayed write.
        !           432:  *
        !           433:  * The buffer is marked dirty, but is not queued for I/O.
        !           434:  * This routine should be used when the buffer is expected
        !           435:  * to be modified again soon, typically a small write that
        !           436:  * partially fills a buffer.
        !           437:  *
        !           438:  * NB: magnetic tapes cannot be delayed; they must be
        !           439:  * written in the order that the writes are requested.
        !           440:  *
        !           441:  * Described in Leffler, et al. (pp. 208-213).
        !           442:  */
        !           443: void
        !           444: bdwrite(bp)
        !           445:        struct buf *bp;
        !           446: {
        !           447:        struct proc *p = current_proc();
        !           448: 
        !           449:        /*
        !           450:         * If the block hasn't been seen before:
        !           451:         *      (1) Mark it as having been seen,
        !           452:         *      (2) Charge for the write.
        !           453:         *      (3) Make sure it's on its vnode's correct block list,
        !           454:         */
        !           455:        if (!ISSET(bp->b_flags, B_DELWRI)) {
        !           456:                SET(bp->b_flags, B_DELWRI);
        !           457:                if (p && p->p_stats) 
        !           458:                        p->p_stats->p_ru.ru_oublock++;          /* XXX */
        !           459:                reassignbuf(bp, bp->b_vp);
        !           460:        }
        !           461: 
        !           462:        /* If this is a tape block, write it the block now. */
        !           463:        if (ISSET(bp->b_flags, B_TAPE)) {
        !           464:                bwrite(bp);
        !           465:                return;
        !           466:        }
        !           467: 
        !           468:        /* Otherwise, the "write" is done, so mark and release the buffer. */
        !           469:        SET(bp->b_flags, B_DONE);
        !           470:        brelse(bp);
        !           471: }
        !           472: 
        !           473: /*
        !           474:  * Asynchronous block write; just an asynchronous bwrite().
        !           475:  */
        !           476: void
        !           477: bawrite(bp)
        !           478:        struct buf *bp;
        !           479: {
        !           480: 
        !           481:        SET(bp->b_flags, B_ASYNC);
        !           482:        VOP_BWRITE(bp);
        !           483: }
        !           484: 
        !           485: /*
        !           486:  * Release a buffer on to the free lists.
        !           487:  * Described in Bach (p. 46).
        !           488:  */
        !           489: void
        !           490: brelse(bp)
        !           491:        struct buf *bp;
        !           492: {
        !           493:        struct bqueues *bufq;
        !           494:        int s;
        !           495: 
        !           496:        trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
        !           497: 
        !           498:        /* Wake up any processes waiting for any buffer to become free. */
        !           499:        if (needbuffer) {
        !           500:                needbuffer = 0;
        !           501:                wakeup(&needbuffer);
        !           502:        }
        !           503: 
        !           504:        /* Wake up any proceeses waiting for _this_ buffer to become free. */
        !           505:        if (ISSET(bp->b_flags, B_WANTED)) {
        !           506:                CLR(bp->b_flags, B_WANTED);
        !           507:                wakeup(bp);
        !           508:        }
        !           509: 
        !           510:        /* Block disk interrupts. */
        !           511:        s = splbio();
        !           512: 
        !           513:        /*
        !           514:         * Determine which queue the buffer should be on, then put it there.
        !           515:         */
        !           516: 
        !           517:        /* If it's locked, don't report an error; try again later. */
        !           518:        if (ISSET(bp->b_flags, (B_LOCKED|B_ERROR)) == (B_LOCKED|B_ERROR))
        !           519:                CLR(bp->b_flags, B_ERROR);
        !           520: 
        !           521:        /* If it's not cacheable, or an error, mark it invalid. */
        !           522:        if (ISSET(bp->b_flags, (B_NOCACHE|B_ERROR)))
        !           523:                SET(bp->b_flags, B_INVAL);
        !           524: 
        !           525:        if ((bp->b_bufsize <= 0) || ISSET(bp->b_flags, B_INVAL)) {
        !           526:                /*
        !           527:                 * If it's invalid or empty, dissociate it from its vnode
        !           528:                 * and put on the head of the appropriate queue.
        !           529:                 */
        !           530:                if (bp->b_vp)
        !           531:                        brelvp(bp);
        !           532:                CLR(bp->b_flags, B_DELWRI);
        !           533:                if (bp->b_bufsize <= 0)
        !           534:                        /* no data */
        !           535:                        bufq = &bufqueues[BQ_EMPTY];
        !           536:                else
        !           537:                        /* invalid data */
        !           538:                        bufq = &bufqueues[BQ_AGE];
        !           539:                binsheadfree(bp, bufq);
        !           540:        } else {
        !           541:                /*
        !           542:                 * It has valid data.  Put it on the end of the appropriate
        !           543:                 * queue, so that it'll stick around for as long as possible.
        !           544:                 */
        !           545:                if (ISSET(bp->b_flags, B_LOCKED))
        !           546:                        /* locked in core */
        !           547:                        bufq = &bufqueues[BQ_LOCKED];
        !           548:                else if (ISSET(bp->b_flags, B_AGE))
        !           549:                        /* stale but valid data */
        !           550:                        bufq = &bufqueues[BQ_AGE];
        !           551:                else
        !           552:                        /* valid data */
        !           553:                        bufq = &bufqueues[BQ_LRU];
        !           554:                binstailfree(bp, bufq);
        !           555:        }
        !           556: 
        !           557:        /* Unlock the buffer. */
        !           558:        CLR(bp->b_flags, (B_AGE | B_ASYNC | B_BUSY | B_NOCACHE));
        !           559: 
        !           560:        /* Allow disk interrupts. */
        !           561:        splx(s);
        !           562: }
        !           563: 
        !           564: /*
        !           565:  * Determine if a block is in the cache.
        !           566:  * Just look on what would be its hash chain.  If it's there, return
        !           567:  * a pointer to it, unless it's marked invalid.  If it's marked invalid,
        !           568:  * we normally don't return the buffer, unless the caller explicitly
        !           569:  * wants us to.
        !           570:  */
        !           571: struct buf *
        !           572: incore(vp, blkno)
        !           573:        struct vnode *vp;
        !           574:        daddr_t blkno;
        !           575: {
        !           576:        struct buf *bp;
        !           577: 
        !           578:        bp = BUFHASH(vp, blkno)->lh_first;
        !           579: 
        !           580:        /* Search hash chain */
        !           581:        for (; bp != NULL; bp = bp->b_hash.le_next) {
        !           582:                if (bp->b_lblkno == blkno && bp->b_vp == vp &&
        !           583:                    !ISSET(bp->b_flags, B_INVAL))
        !           584:                return (bp);
        !           585:        }
        !           586: 
        !           587:        return (0);
        !           588: }
        !           589: 
        !           590: /*
        !           591:  * Get a block of requested size that is associated with
        !           592:  * a given vnode and block offset. If it is found in the
        !           593:  * block cache, mark it as having been found, make it busy
        !           594:  * and return it. Otherwise, return an empty block of the
        !           595:  * correct size. It is up to the caller to insure that the
        !           596:  * cached blocks be of the correct size.
        !           597:  */
        !           598: struct buf *
        !           599: getblk(vp, blkno, size, slpflag, slptimeo)
        !           600:        register struct vnode *vp;
        !           601:        daddr_t blkno;
        !           602:        int size, slpflag, slptimeo;
        !           603: {
        !           604:        struct buf *bp;
        !           605:        int s, err;
        !           606: 
        !           607: start:
        !           608:        s = splbio();
        !           609:        if (bp = incore(vp, blkno)) {   /* XXX NFS VOP_BWRITE foolishness */
        !           610:                if (ISSET(bp->b_flags, B_BUSY)) {
        !           611:                        SET(bp->b_flags, B_WANTED);
        !           612:                        err = tsleep(bp, slpflag | (PRIBIO + 1), "getblk",
        !           613:                            slptimeo);
        !           614:                        splx(s);
        !           615:                        if (err)
        !           616:                                return (NULL);
        !           617:                        goto start;
        !           618:                }
        !           619:                SET(bp->b_flags, (B_BUSY | B_CACHE));
        !           620:                bremfree(bp);
        !           621:                splx(s);
        !           622:                allocbuf(bp, size);
        !           623:        } else {
        !           624:                splx(s);
        !           625:                if ((bp = getnewbuf(slpflag, slptimeo)) == NULL)
        !           626:                        goto start;
        !           627:                binshash(bp, BUFHASH(vp, blkno));
        !           628:                allocbuf(bp, size);
        !           629:                bp->b_blkno = bp->b_lblkno = blkno;
        !           630:                s = splbio();
        !           631:                bgetvp(vp, bp);
        !           632:                splx(s);
        !           633:        }
        !           634:        return (bp);
        !           635: }
        !           636: 
        !           637: /*
        !           638:  * Get an empty, disassociated buffer of given size.
        !           639:  */
        !           640: struct buf *
        !           641: geteblk(size)
        !           642:        int size;
        !           643: {
        !           644:        struct buf *bp; 
        !           645: 
        !           646:        while ((bp = getnewbuf(0, 0)) == 0)
        !           647:                ;
        !           648:        SET(bp->b_flags, B_INVAL);
        !           649:        binshash(bp, &invalhash);
        !           650:        allocbuf(bp, size);
        !           651: 
        !           652:        return (bp);
        !           653: }
        !           654: 
        !           655: /*
        !           656:  * Expand or contract the actual memory allocated to a buffer.
        !           657:  *
        !           658:  * If the buffer shrinks, data is lost, so it's up to the
        !           659:  * caller to have written it out *first*; this routine will not
        !           660:  * start a write.  If the buffer grows, it's the callers
        !           661:  * responsibility to fill out the buffer's additional contents.
        !           662:  */
        !           663: int
        !           664: allocbuf(bp, size)
        !           665:        struct buf *bp;
        !           666:        int size;
        !           667: {
        !           668:        struct buf      *nbp;
        !           669:        vm_size_t       desired_size;
        !           670:        int          s;
        !           671: 
        !           672:        desired_size = roundup(size, CLBYTES);
        !           673:        if (desired_size > MAXBSIZE)
        !           674:                panic("allocbuf: buffer larger than MAXBSIZE requested");
        !           675: 
        !           676:        if (bp->b_bufsize == desired_size)
        !           677:                goto out;
        !           678: 
        !           679:        /*
        !           680:         * If the buffer is smaller than the desired size, we need to snarf
        !           681:         * it from other buffers.  Get buffers (via getnewbuf()), and
        !           682:         * steal their pages.
        !           683:         */
        !           684:        while (bp->b_bufsize < desired_size) {
        !           685:                int amt;
        !           686: 
        !           687:                /* find a buffer */
        !           688:                while ((nbp = getnewbuf(0, 0)) == NULL)
        !           689:                        ;
        !           690:                SET(nbp->b_flags, B_INVAL);
        !           691:                binshash(nbp, &invalhash);
        !           692: 
        !           693:                /* and steal its pages, up to the amount we need */
        !           694:                amt = min(nbp->b_bufsize, (desired_size - bp->b_bufsize));
        !           695:                pagemove((nbp->b_data + nbp->b_bufsize - amt),
        !           696:                        bp->b_data + bp->b_bufsize, amt);
        !           697:                bp->b_bufsize += amt;
        !           698:                nbp->b_bufsize -= amt;
        !           699: 
        !           700:                /* reduce transfer count if we stole some data */
        !           701:                if (nbp->b_bcount > nbp->b_bufsize)
        !           702:                        nbp->b_bcount = nbp->b_bufsize;
        !           703: 
        !           704: #if DIAGNOSTIC
        !           705:                if (nbp->b_bufsize < 0)
        !           706:                        panic("allocbuf: negative bufsize");
        !           707: #endif
        !           708: 
        !           709:                brelse(nbp);
        !           710:        }
        !           711: 
        !           712:        /*
        !           713:         * If we want a buffer smaller than the current size,
        !           714:         * shrink this buffer.  Grab a buf head from the EMPTY queue,
        !           715:         * move a page onto it, and put it on front of the AGE queue.
        !           716:         * If there are no free buffer headers, leave the buffer alone.
        !           717:         */
        !           718:        if (bp->b_bufsize > desired_size) {
        !           719:                s = splbio();
        !           720:                if ((nbp = bufqueues[BQ_EMPTY].tqh_first) == NULL) {
        !           721:                        /* No free buffer head */
        !           722:                        splx(s);
        !           723:                        goto out;
        !           724:                }
        !           725:                bremfree(nbp);
        !           726:                SET(nbp->b_flags, B_BUSY);
        !           727:                splx(s);
        !           728: 
        !           729:                /* move the page to it and note this change */
        !           730:                pagemove(bp->b_data + desired_size,
        !           731:                    nbp->b_data, bp->b_bufsize - desired_size);
        !           732:                nbp->b_bufsize = bp->b_bufsize - desired_size;
        !           733:                bp->b_bufsize = desired_size;
        !           734:                nbp->b_bcount = 0;
        !           735:                SET(nbp->b_flags, B_INVAL);
        !           736: 
        !           737:                /* release the newly-filled buffer and leave */
        !           738:                brelse(nbp);
        !           739:        }
        !           740: 
        !           741: out:
        !           742:        bp->b_bcount = size;
        !           743: }
        !           744: 
        !           745: /*
        !           746:  * Find a buffer which is available for use.
        !           747:  * Select something from a free list.
        !           748:  * Preference is to AGE list, then LRU list.    
        !           749:  */
        !           750: struct buf *
        !           751: getnewbuf(slpflag, slptimeo)
        !           752:        int slpflag, slptimeo;
        !           753: {
        !           754:        register struct buf *bp;
        !           755:        register struct buf *lru_bp;
        !           756:        register struct buf *age_bp;
        !           757:        register int age_time, lru_time;
        !           758:        int s;
        !           759:        struct ucred *cred;
        !           760: 
        !           761: start:
        !           762:        s = splbio();
        !           763: 
        !           764:        age_bp = bufqueues[BQ_AGE].tqh_first;
        !           765:        lru_bp = bufqueues[BQ_LRU].tqh_first;
        !           766: 
        !           767:        if (age_bp == NULL && lru_bp == NULL) {
        !           768:                /* wait for a free buffer of any kind */
        !           769:                needbuffer = 1;
        !           770:                tsleep(&needbuffer, slpflag|(PRIBIO+1), "getnewbuf", slptimeo);
        !           771:                splx(s);
        !           772:                return (0);
        !           773:        }
        !           774:        if (age_bp == NULL)
        !           775:                bp = lru_bp;
        !           776:        else if (lru_bp == NULL)
        !           777:                bp = age_bp;
        !           778:        else {
        !           779:                if (((age_time = (time.tv_sec - age_bp->b_timestamp)) < 0) ||
        !           780:                        ((lru_time = (time.tv_sec - lru_bp->b_timestamp)) < 0)) {
        !           781:                        /* time was set backwards */
        !           782:                        bp = age_bp;
        !           783:                        /*
        !           784:                         * we should probably re-timestamp eveything in the queues
        !           785:                         * at this point with the current time
        !           786:                         */
        !           787:                } else {
        !           788:                        if (lru_time >= lru_is_stale && age_time < age_is_stale)
        !           789:                                bp = lru_bp;
        !           790:                        else
        !           791:                                bp = age_bp;
        !           792:                }
        !           793:        }
        !           794:        bremfree(bp);
        !           795: 
        !           796:        /* Buffer is no longer on free lists. */
        !           797:        SET(bp->b_flags, B_BUSY);
        !           798:        splx(s);
        !           799: 
        !           800:        /* If buffer was a delayed write, start it, and go back to the top. */
        !           801:        if (ISSET(bp->b_flags, B_DELWRI)) {
        !           802:                bawrite (bp);
        !           803:                goto start;
        !           804:        }
        !           805: 
        !           806:        trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
        !           807: 
        !           808:        /* disassociate us from our vnode, if we had one... */
        !           809:        s = splbio();
        !           810:        if (bp->b_vp)
        !           811:                brelvp(bp);
        !           812:        splx(s);
        !           813: 
        !           814:        /* clear out various other fields */
        !           815:        bp->b_flags = B_BUSY;
        !           816:        bp->b_dev = NODEV;
        !           817:        bp->b_blkno = bp->b_lblkno = 0;
        !           818:        bp->b_iodone = 0;
        !           819:        bp->b_error = 0;
        !           820:        bp->b_resid = 0;
        !           821:        bp->b_bcount = 0;
        !           822:        bp->b_dirtyoff = bp->b_dirtyend = 0;
        !           823:        bp->b_validoff = bp->b_validend = 0;
        !           824: 
        !           825:        /* nuke any credentials we were holding */
        !           826:        cred = bp->b_rcred;
        !           827:        if (cred != NOCRED) {
        !           828:                bp->b_rcred = NOCRED; 
        !           829:                crfree(cred);
        !           830:        }
        !           831:        cred = bp->b_wcred;
        !           832:        if (cred != NOCRED) {
        !           833:                bp->b_wcred = NOCRED;
        !           834:                crfree(cred);
        !           835:        }
        !           836:        
        !           837:        bremhash(bp);
        !           838:        return (bp); 
        !           839: }
        !           840: 
        !           841: /*
        !           842:  * Wait for operations on the buffer to complete.
        !           843:  * When they do, extract and return the I/O's error value.
        !           844:  */
        !           845: int
        !           846: biowait(bp)
        !           847:        struct buf *bp;
        !           848: {
        !           849:        int s;
        !           850: 
        !           851:        s = splbio();
        !           852:        while (!ISSET(bp->b_flags, B_DONE))
        !           853:                tsleep(bp, PRIBIO + 1, "biowait", 0);
        !           854:        splx(s);
        !           855: 
        !           856:        /* check for interruption of I/O (e.g. via NFS), then errors. */
        !           857:        if (ISSET(bp->b_flags, B_EINTR)) {
        !           858:                CLR(bp->b_flags, B_EINTR);
        !           859:                return (EINTR);
        !           860:        } else if (ISSET(bp->b_flags, B_ERROR))
        !           861:                return (bp->b_error ? bp->b_error : EIO);
        !           862:        else
        !           863:                return (0);
        !           864: }
        !           865: 
        !           866: /*
        !           867:  * Mark I/O complete on a buffer.
        !           868:  *
        !           869:  * If a callback has been requested, e.g. the pageout
        !           870:  * daemon, do so. Otherwise, awaken waiting processes.
        !           871:  *
        !           872:  * [ Leffler, et al., says on p.247:
        !           873:  *     "This routine wakes up the blocked process, frees the buffer
        !           874:  *     for an asynchronous write, or, for a request by the pagedaemon
        !           875:  *     process, invokes a procedure specified in the buffer structure" ]
        !           876:  *
        !           877:  * In real life, the pagedaemon (or other system processes) wants
        !           878:  * to do async stuff to, and doesn't want the buffer brelse()'d.
        !           879:  * (for swap pager, that puts swap buffers on the free lists (!!!),
        !           880:  * for the vn device, that puts malloc'd buffers on the free lists!)
        !           881:  */
        !           882: void
        !           883: biodone(bp)
        !           884:        struct buf *bp;
        !           885: {
        !           886:        boolean_t       funnel_state;
        !           887: 
        !           888:        funnel_state = thread_set_funneled(TRUE);
        !           889:        if (ISSET(bp->b_flags, B_DONE))
        !           890:                panic("biodone already");
        !           891:        SET(bp->b_flags, B_DONE);               /* note that it's done */
        !           892: 
        !           893:        if (!ISSET(bp->b_flags, B_READ) && !ISSET(bp->b_flags, B_RAW))  /* wake up reader */
        !           894:                vwakeup(bp);
        !           895: 
        !           896:        if (ISSET(bp->b_flags, B_CALL)) {       /* if necessary, call out */
        !           897:                CLR(bp->b_flags, B_CALL);       /* but note callout done */
        !           898:                (*bp->b_iodone)(bp);
        !           899:        } else if (ISSET(bp->b_flags, B_ASYNC)) /* if async, release it */
        !           900:                brelse(bp);
        !           901:        else {                                  /* or just wakeup the buffer */
        !           902:                CLR(bp->b_flags, B_WANTED);
        !           903:                wakeup(bp);
        !           904:        }
        !           905:        (void) thread_set_funneled(funnel_state);
        !           906: }
        !           907: 
        !           908: /*
        !           909:  * Return a count of buffers on the "locked" queue.
        !           910:  */
        !           911: int
        !           912: count_lock_queue()
        !           913: {
        !           914:        register struct buf *bp;
        !           915:        register int n = 0;
        !           916: 
        !           917:        for (bp = bufqueues[BQ_LOCKED].tqh_first; bp;
        !           918:            bp = bp->b_freelist.tqe_next)
        !           919:                n++;
        !           920:        return (n);
        !           921: }
        !           922: 
        !           923: #if MACH_NBC
        !           924: #include <ufs/ufs/quota.h>
        !           925: #include <ufs/ufs/inode.h>
        !           926: 
        !           927: #define        btodevblk(b) ((b) / devBlocksize)
        !           928: void
        !           929: blkflush(struct vnode *vp, daddr_t blkno, vm_size_t size)
        !           930: {
        !           931:        register struct buf *ep, *nbp;
        !           932:        daddr_t start, last;
        !           933:        int s,err;
        !           934:        struct inode *ip= VTOI(vp);
        !           935:        int devBlocksize=1024;
        !           936: 
        !           937: #if 1
        !           938:        VOP_DEVBLOCKSIZE(ip->i_devvp, &devBlocksize);
        !           939: #endif
        !           940: 
        !           941: 
        !           942:        start = blkno;
        !           943:        last = start + btodb(size, devBlocksize) - 1;
        !           944: loop:
        !           945:        for(ep = vp->v_dirtyblkhd.lh_first; ep; ep = nbp) {
        !           946:                nbp = ep->b_vnbufs.le_next;
        !           947:                if (ep->b_vp != vp || ISSET(ep->b_flags, B_INVAL))
        !           948:                        continue;
        !           949:                /* look for overlap */
        !           950:                if (ep->b_bcount == 0 || ep->b_blkno > last ||
        !           951:                    ep->b_blkno + btodevblk(ep->b_bcount) <= start)
        !           952:                        continue;
        !           953:                s = splbio();
        !           954:                if (ISSET(ep->b_flags, B_BUSY)) {
        !           955:                        SET(ep->b_flags, B_WANTED);
        !           956:                        err = tsleep(ep, (PRIBIO + 1), "blkflush",
        !           957:                            0);
        !           958:                        splx(s);
        !           959:                        goto loop;
        !           960:                }
        !           961:                if(ISSET(ep->b_flags, B_DELWRI)) {
        !           962:                        bremfree(ep);
        !           963:                        SET(ep->b_flags, B_BUSY);
        !           964:                        (void) splx(s);
        !           965:                        bwrite(ep);
        !           966:                        goto loop;
        !           967:                }
        !           968:                (void) splx(s);
        !           969:        }
        !           970: 
        !           971: }
        !           972: #endif /* MACH_NBC */
        !           973: #if DIAGNOSTIC
        !           974: /*
        !           975:  * Print out statistics on the current allocation of the buffer pool.
        !           976:  * Can be enabled to print out on every ``sync'' by setting "syncprt"
        !           977:  * in vfs_syscalls.c using sysctl.
        !           978:  */
        !           979: void
        !           980: vfs_bufstats()
        !           981: {
        !           982:        int s, i, j, count;
        !           983:        register struct buf *bp;
        !           984:        register struct bqueues *dp;
        !           985:        int counts[MAXBSIZE/CLBYTES+1];
        !           986:        static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY" };
        !           987: 
        !           988:        for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) {
        !           989:                count = 0;
        !           990:                for (j = 0; j <= MAXBSIZE/CLBYTES; j++)
        !           991:                        counts[j] = 0;
        !           992:                s = splbio();
        !           993:                for (bp = dp->tqh_first; bp; bp = bp->b_freelist.tqe_next) {
        !           994:                        counts[bp->b_bufsize/CLBYTES]++;
        !           995:                        count++;
        !           996:                }
        !           997:                splx(s);
        !           998:                printf("%s: total-%d", bname[i], count);
        !           999:                for (j = 0; j <= MAXBSIZE/CLBYTES; j++)
        !          1000:                        if (counts[j] != 0)
        !          1001:                                printf(", %d-%d", j * CLBYTES, counts[j]);
        !          1002:                printf("\n");
        !          1003:        }
        !          1004: }
        !          1005: #endif /* DIAGNOSTIC */
        !          1006: 
        !          1007: 
        !          1008: struct buf *
        !          1009: alloc_io_buf(vp)
        !          1010:        struct vnode *vp;
        !          1011: {      register struct buf *bp;
        !          1012:        int s;
        !          1013: 
        !          1014:        s = splbio();
        !          1015: 
        !          1016:        if ((bp = iobufqueue.tqh_first) == NULL) {
        !          1017:                splx(s);
        !          1018:                return (NULL);
        !          1019:        }
        !          1020:        TAILQ_REMOVE(&iobufqueue, bp, b_freelist);
        !          1021:        bp->b_timestamp = 0; 
        !          1022: 
        !          1023:        /* clear out various fields */
        !          1024:        bp->b_flags = (B_BUSY | B_RAW);
        !          1025:        bp->b_blkno = bp->b_lblkno = 0;
        !          1026:        bp->b_iodone = 0;
        !          1027:        bp->b_error = 0;
        !          1028:        bp->b_resid = 0;
        !          1029:        bp->b_bcount = 0;
        !          1030:        bp->b_bufsize = 0;
        !          1031:        bp->b_vp = vp;
        !          1032: 
        !          1033:        if (vp->v_type == VBLK || vp->v_type == VCHR)
        !          1034:                bp->b_dev = vp->v_rdev;
        !          1035:        else
        !          1036:                bp->b_dev = NODEV;
        !          1037:        splx(s);
        !          1038: 
        !          1039:        return (bp);
        !          1040: }
        !          1041: 
        !          1042: void
        !          1043: free_io_buf(bp)
        !          1044:        struct buf *bp;
        !          1045: {
        !          1046:         int s;
        !          1047: 
        !          1048:        s = splbio();
        !          1049:        /*
        !          1050:         * put buffer back on the head of the iobufqueue
        !          1051:         */
        !          1052:        bp->b_vp = NULL;
        !          1053:        bp->b_flags = B_INVAL;
        !          1054: 
        !          1055:        binsheadfree(bp, &iobufqueue);
        !          1056: 
        !          1057:        splx(s);
        !          1058: }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.