|
|
1.1 ! root 1: /* ! 2: * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. ! 3: * ! 4: * @APPLE_LICENSE_HEADER_START@ ! 5: * ! 6: * The contents of this file constitute Original Code as defined in and ! 7: * are subject to the Apple Public Source License Version 1.1 (the ! 8: * "License"). You may not use this file except in compliance with the ! 9: * License. Please obtain a copy of the License at ! 10: * http://www.apple.com/publicsource and read it before using this file. ! 11: * ! 12: * This Original Code and all software distributed under the License are ! 13: * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER ! 14: * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, ! 15: * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, ! 16: * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the ! 17: * License for the specific language governing rights and limitations ! 18: * under the License. ! 19: * ! 20: * @APPLE_LICENSE_HEADER_END@ ! 21: */ ! 22: /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ ! 23: /* ! 24: * Copyright (c) 1993 ! 25: * The Regents of the University of California. All rights reserved. ! 26: * ! 27: * Redistribution and use in source and binary forms, with or without ! 28: * modification, are permitted provided that the following conditions ! 29: * are met: ! 30: * 1. Redistributions of source code must retain the above copyright ! 31: * notice, this list of conditions and the following disclaimer. ! 32: * 2. Redistributions in binary form must reproduce the above copyright ! 33: * notice, this list of conditions and the following disclaimer in the ! 34: * documentation and/or other materials provided with the distribution. ! 35: * 3. All advertising materials mentioning features or use of this software ! 36: * must display the following acknowledgement: ! 37: * This product includes software developed by the University of ! 38: * California, Berkeley and its contributors. ! 39: * 4. Neither the name of the University nor the names of its contributors ! 40: * may be used to endorse or promote products derived from this software ! 41: * without specific prior written permission. ! 42: * ! 43: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ! 44: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ! 45: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ! 46: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ! 47: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ! 48: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ! 49: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ! 50: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ! 51: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ! 52: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ! 53: * SUCH DAMAGE. ! 54: * ! 55: * @(#)vfs_cluster.c 8.10 (Berkeley) 3/28/95 ! 56: */ ! 57: ! 58: #include <sys/param.h> ! 59: #include <sys/proc.h> ! 60: #include <sys/buf.h> ! 61: #include <sys/vnode.h> ! 62: #include <sys/mount.h> ! 63: #include <sys/trace.h> ! 64: #include <sys/malloc.h> ! 65: #include <sys/resourcevar.h> ! 66: #include <libkern/libkern.h> ! 67: #include <kern/mapfs.h> ! 68: ! 69: #include <sys/kdebug.h> ! 70: ! 71: /* ! 72: * Local declarations ! 73: */ ! 74: struct buf *cluster_rbuild __P((struct vnode *, u_quad_t, struct buf *, ! 75: daddr_t, daddr_t, long, int, long, long)); ! 76: struct buf *cluster_create __P((struct vnode *, struct buf *, daddr_t, daddr_t, long, ! 77: int, long, daddr_t *, int)); ! 78: int cluster_block __P((struct vnode *, u_quad_t, struct buf *, long, long)); ! 79: void cluster_wbuild __P((struct vnode *, struct buf *, long, ! 80: daddr_t, int, daddr_t, long, int)); ! 81: struct cluster_save *cluster_collectbufs __P((struct vnode *, struct buf *)); ! 82: ! 83: #if DIAGNOSTIC ! 84: /* ! 85: * Set to 1 if reads of block zero should cause readahead to be done. ! 86: * Set to 0 treats a read of block zero as a non-sequential read. ! 87: * ! 88: * Setting to one assumes that most reads of block zero of files are due to ! 89: * sequential passes over the files (e.g. cat, sum) where additional blocks ! 90: * will soon be needed. Setting to zero assumes that the majority are ! 91: * surgical strikes to get particular info (e.g. size, file) where readahead ! 92: * blocks will not be used and, in fact, push out other potentially useful ! 93: * blocks from the cache. The former seems intuitive, but some quick tests ! 94: * showed that the latter performed better from a system-wide point of view. ! 95: */ ! 96: int doclusterraz = 0; ! 97: #define ISSEQREAD(vp, blk) \ ! 98: (((blk) != 0 || doclusterraz) && \ ! 99: ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr)) ! 100: #else ! 101: #define ISSEQREAD(vp, blk) \ ! 102: ((blk) != 0 && ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr)) ! 103: #endif ! 104: ! 105: /* ! 106: * This replaces bread. If this is a bread at the beginning of a file and ! 107: * lastr is 0, we assume this is the first read and we'll read up to two ! 108: * blocks if they are sequential. After that, we'll do regular read ahead ! 109: * in clustered chunks. ! 110: * ! 111: * There are 4 or 5 cases depending on how you count: ! 112: * Desired block is in the cache: ! 113: * 1 Not sequential access (0 I/Os). ! 114: * 2 Access is sequential, do read-ahead (1 ASYNC). ! 115: * Desired block is not in cache: ! 116: * 3 Not sequential access (1 SYNC). ! 117: * 4 Sequential access, next block is contiguous (1 SYNC). ! 118: * 5 Sequential access, next block is not contiguous (1 SYNC, 1 ASYNC) ! 119: * ! 120: * There are potentially two buffers that require I/O. ! 121: * bp is the block requested. ! 122: * rbp is the read-ahead block. ! 123: * If either is NULL, then you don't have to do the I/O. ! 124: */ ! 125: ! 126: cluster_read(vp, filesize, lblkno, size, cred, bpp, secsize, ! 127: firstpass, resid, fp_sequential) ! 128: struct vnode *vp; ! 129: u_quad_t filesize; ! 130: daddr_t lblkno; ! 131: long size; ! 132: struct ucred *cred; ! 133: struct buf **bpp; ! 134: long secsize; ! 135: int firstpass; ! 136: long resid; ! 137: int *fp_sequential; ! 138: { ! 139: struct buf *bp, *rbp, *cbp; ! 140: daddr_t blkno, ioblkno; ! 141: long flags; ! 142: int error, num_ra, alreadyincore; ! 143: long num; ! 144: int sequential, case4; ! 145: int l_maxra; ! 146: int l_ralen; ! 147: int l_lastr; ! 148: ! 149: #if DIAGNOSTIC ! 150: if (size == 0) ! 151: panic("cluster_read: size = 0"); ! 152: #endif ! 153: ! 154: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 11)) | DBG_FUNC_START, ! 155: lblkno, ! 156: resid, ! 157: firstpass, ! 158: vp, ! 159: 0); ! 160: error = 0; ! 161: flags = B_READ; ! 162: *bpp = bp = getblk(vp, lblkno, size, 0, 0); ! 163: ! 164: if (resid == PAGE_SIZE && lblkno && !ISSEQREAD(vp, lblkno) && ! 165: (vp->v_mount->mnt_stat.f_iosize & (PAGE_SIZE - 1)) == 0) { ! 166: if (bp->b_flags & B_CACHE) { ! 167: ! 168: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 11)) | DBG_FUNC_END, ! 169: lblkno, ! 170: size, ! 171: -1, ! 172: 0, ! 173: 0); ! 174: ! 175: vp->v_consumed += (bp->b_bcount/size); ! 176: return (0); ! 177: } ! 178: bp->b_flags |= B_READ; ! 179: ! 180: if (cluster_block(vp, filesize, bp, size, secsize)) { ! 181: ! 182: error = biowait(bp); ! 183: ! 184: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 11)) | DBG_FUNC_END, ! 185: bp, ! 186: 0, ! 187: 0, ! 188: 0, ! 189: 0); ! 190: ! 191: return(error); ! 192: } ! 193: } ! 194: l_maxra = vp->v_maxra; ! 195: l_ralen = vp->v_ralen; ! 196: l_lastr = vp->v_lastr; ! 197: ! 198: /* round up resid count to nearest block size */ ! 199: if ( resid > size ) ! 200: resid += size - 1; ! 201: ! 202: if (bp->b_flags & B_CACHE) { ! 203: /* ! 204: * Desired block is in cache; do any readahead ASYNC. ! 205: * Case 1, 2. ! 206: */ ! 207: trace(TR_BREADHIT, pack(vp, size), lblkno); ! 208: flags |= B_ASYNC; ! 209: if (resid > size) ! 210: resid -= size; ! 211: ! 212: ioblkno = lblkno + (l_ralen ? l_ralen : 1); ! 213: alreadyincore = incore(vp, ioblkno) != NULL; ! 214: ! 215: /* ! 216: * treat this as a hit for purposes of speculative I/O around paging activity ! 217: */ ! 218: vp->v_consumed += (bp->b_bcount/size); ! 219: ! 220: bp = NULL; ! 221: } else { ! 222: /* Block wasn't in cache, case 3, 4, 5. */ ! 223: trace(TR_BREADMISS, pack(vp, size), lblkno); ! 224: bp->b_flags |= B_READ; ! 225: ioblkno = lblkno; ! 226: alreadyincore = 0; ! 227: current_proc()->p_stats->p_ru.ru_inblock++; /* XXX */ ! 228: } ! 229: /* ! 230: * XXX ! 231: * Replace 1 with a window size based on some permutation of ! 232: * maxcontig and rot_delay. This will let you figure out how ! 233: * many blocks you should read-ahead (case 2, 4, 5). ! 234: * ! 235: * If the access isn't sequential, reset the window to 1. ! 236: * Note that a read to the same block is considered sequential. ! 237: * This catches the case where the file is being read sequentially, ! 238: * but at smaller than the filesystem block size. ! 239: */ ! 240: rbp = NULL; ! 241: cbp = NULL; ! 242: case4 = 0; ! 243: ! 244: if (!ISSEQREAD(vp, lblkno)) { ! 245: l_ralen = 0; ! 246: l_maxra = lblkno; ! 247: sequential = 0; ! 248: } ! 249: else ! 250: sequential = 1; ! 251: ! 252: /* On first pass set the sequential state. ! 253: * Otherwise, just use the value passed in. ! 254: */ ! 255: if (firstpass) ! 256: *fp_sequential = sequential; ! 257: ! 258: if (resid > size || *fp_sequential) { ! 259: if (((u_quad_t)(ioblkno + 1)) * (u_quad_t)size <= filesize && !alreadyincore && ! 260: !(error = VOP_BMAP(vp, ioblkno, NULL, &blkno, &num_ra)) && ! 261: blkno != -1) { ! 262: /* ! 263: * Reading sequentially, and the next block is not in the ! 264: * cache. We are going to try reading ahead. ! 265: */ ! 266: if (num_ra) { ! 267: /* ! 268: * If our desired readahead block had been read ! 269: * in a previous readahead but is no longer in ! 270: * core, then we may be reading ahead too far ! 271: * or are not using our readahead very rapidly. ! 272: * In this case we scale back the window. ! 273: */ ! 274: if (*fp_sequential) { ! 275: if (!alreadyincore && ioblkno <= l_maxra) ! 276: l_ralen = max(l_ralen >> 1, 1); ! 277: /* ! 278: * There are more sequential blocks than our current ! 279: * window allows, scale up. Ideally we want to get ! 280: * in sync with the filesystem maxcontig value. ! 281: */ ! 282: else if (num_ra > l_ralen && lblkno != l_lastr) ! 283: l_ralen = l_ralen ? ! 284: min(num_ra, l_ralen << 1) : 1; ! 285: } ! 286: num = max((resid/size)-1, l_ralen); ! 287: num_ra = min(num, num_ra); ! 288: } ! 289: ! 290: if (num_ra) { /* case 2, 4 */ ! 291: cbp = cluster_rbuild(vp, filesize, ! 292: bp, ioblkno, blkno, size, num_ra, flags, secsize); ! 293: ! 294: if (cbp) { ! 295: if ( !(cbp->b_flags & B_CALL)) { ! 296: if ((rbp = cbp) == bp) ! 297: rbp = NULL; ! 298: cbp = NULL; ! 299: } else ! 300: case4 = 1; ! 301: } ! 302: } else if (ioblkno == lblkno) { ! 303: bp->b_blkno = blkno; ! 304: /* Case 5: check how many blocks to read ahead */ ! 305: ++ioblkno; ! 306: if (((u_quad_t)(ioblkno + 1)) * (u_quad_t)size > filesize || ! 307: incore(vp, ioblkno) || (error = VOP_BMAP(vp, ! 308: ioblkno, NULL, &blkno, &num_ra)) || blkno == -1) ! 309: goto skip_readahead; ! 310: /* ! 311: * Adjust readahead as above. ! 312: * Don't check alreadyincore, we know it is 0 from ! 313: * the previous conditional. ! 314: */ ! 315: if (num_ra) { ! 316: if (*fp_sequential) { ! 317: if (ioblkno <= l_maxra) ! 318: l_ralen = max(l_ralen >> 1, 1); ! 319: else if (num_ra > l_ralen && lblkno != l_lastr) ! 320: l_ralen = l_ralen ? ! 321: min(num_ra, l_ralen<<1) : 1; ! 322: } ! 323: num = max((resid/size)-1, l_ralen); ! 324: num_ra = min(num, num_ra); ! 325: } ! 326: flags |= B_ASYNC; ! 327: ! 328: if (num_ra) { ! 329: cbp = cluster_rbuild(vp, filesize, ! 330: NULL, ioblkno, blkno, size, num_ra, flags, ! 331: secsize); ! 332: if (cbp) { ! 333: if ( !(cbp->b_flags & B_CALL)) { ! 334: rbp = cbp; ! 335: cbp = NULL; ! 336: } ! 337: } ! 338: } else { ! 339: rbp = getblk(vp, ioblkno, size, 0, 0); ! 340: rbp->b_flags |= flags; ! 341: rbp->b_blkno = blkno; ! 342: } ! 343: } else { ! 344: /* case 2; read ahead single block */ ! 345: rbp = getblk(vp, ioblkno, size, 0, 0); ! 346: rbp->b_flags |= flags; ! 347: rbp->b_blkno = blkno; ! 348: } ! 349: if (cbp || rbp) { /* case 2, 5 */ ! 350: trace(TR_BREADMISSRA, ! 351: pack(vp, (num_ra + 1) * size), ioblkno); ! 352: current_proc()->p_stats->p_ru.ru_inblock++; /* XXX */ ! 353: } ! 354: } ! 355: } ! 356: ! 357: skip_readahead: ! 358: if (bp && !case4) { ! 359: if (bp->b_flags & (B_DONE | B_DELWRI)) ! 360: panic("cluster_read: DONE bp"); ! 361: else { ! 362: /* ! 363: * issue the BMAP here if needed due to the block device's ! 364: * lack of a BMAP call in the strategy routine.... when being ! 365: * used by the filesystem/mount code, the blockno's being worked ! 366: * with are always physical so the strategy routine doesn't bother. ! 367: * Now that we are calling cluster read/write from spec_read/spec_write ! 368: * we have to use real logical blockno's in order to properly trigger ! 369: * the read-ahead and write-coalescing. ! 370: */ ! 371: if (bp->b_lblkno == bp->b_blkno) { ! 372: VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL); ! 373: ! 374: if ((long)bp->b_blkno == -1) ! 375: clrbuf(bp); ! 376: } ! 377: error = VOP_STRATEGY(bp); ! 378: ! 379: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 11)) | DBG_FUNC_NONE, ! 380: bp->b_lblkno, ! 381: bp->b_bcount, ! 382: vp, ! 383: 0xaaaaaaaa, 0 ); ! 384: } ! 385: } ! 386: if (rbp) { ! 387: if (error || rbp->b_flags & (B_DONE | B_DELWRI)) { ! 388: rbp->b_flags &= ~(B_ASYNC | B_READ); ! 389: brelse(rbp); ! 390: } else { ! 391: /* ! 392: * issue the BMAP here if needed due to the block device's ! 393: * lack of a BMAP call in the strategy routine.... when being ! 394: * used by the filesystem/mount code, the blockno's being worked ! 395: * with are always physical so the strategy routine doesn't bother. ! 396: * Now that we are calling cluster read/write from spec_read/spec_write ! 397: * we have to use real logical blockno's in order to properly trigger ! 398: * the read-ahead and write-coalescing. ! 399: */ ! 400: if (rbp->b_lblkno == rbp->b_blkno) { ! 401: VOP_BMAP(vp, rbp->b_lblkno, NULL, &rbp->b_blkno, NULL); ! 402: ! 403: if ((long)rbp->b_blkno == -1) ! 404: clrbuf(rbp); ! 405: } ! 406: (void) VOP_STRATEGY(rbp); ! 407: ! 408: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 11)) | DBG_FUNC_NONE, ! 409: rbp->b_lblkno, ! 410: rbp->b_bcount, ! 411: vp, ! 412: 0xaaaaaabb, 0 ); ! 413: } ! 414: } ! 415: if (cbp) { ! 416: (void) VOP_STRATEGY(cbp); ! 417: ! 418: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 11)) | DBG_FUNC_NONE, ! 419: cbp->b_lblkno, ! 420: cbp->b_bcount, ! 421: vp, ! 422: 0xaaaaaacc, 0 ); ! 423: } ! 424: /* ! 425: * Recalculate our maximum readahead ! 426: */ ! 427: if (rbp == NULL) { ! 428: if (cbp) ! 429: rbp = cbp; ! 430: else ! 431: rbp = bp; ! 432: } ! 433: if (rbp) ! 434: vp->v_maxra = rbp->b_lblkno + (rbp->b_bcount / size) - 1; ! 435: else ! 436: vp->v_maxra = l_maxra; ! 437: vp->v_ralen = l_ralen; ! 438: ! 439: if (bp) ! 440: error = biowait(bp); ! 441: ! 442: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 11)) | DBG_FUNC_END, ! 443: bp, ! 444: rbp, ! 445: cbp, ! 446: vp->v_maxra, ! 447: 0); ! 448: return(error); ! 449: } ! 450: ! 451: struct pent { ! 452: int mask; ! 453: int num; ! 454: } pent[7] = { ! 455: {0,0}, ! 456: {0,0}, ! 457: {~0,1}, ! 458: {~1,2}, ! 459: {~3,4}, ! 460: {~7,8}, ! 461: {~15,16}}; ! 462: ! 463: ! 464: int cluster_block(vp, filesize, bp, size, secsize) ! 465: struct vnode *vp; ! 466: u_quad_t filesize; ! 467: struct buf *bp; ! 468: long size; ! 469: long secsize; ! 470: { ! 471: struct buf *cbp; ! 472: daddr_t lblkno, blkno, ioblkno, lbn; ! 473: int num_io, num; ! 474: unsigned ratio; ! 475: ! 476: #if 0 /* FIXED READS */ ! 477: /* calculate maximum number of blocks to read in */ ! 478: ! 479: lblkno = bp->b_lblkno & ~0x07; /* put us on a 32k (8 page boundary) boundary */ ! 480: num = 8; ! 481: num_io = 0; ! 482: #else /* ADAPTIVE READS */ ! 483: if (vp->v_bread > vp->v_trigger) { ! 484: ratio = (vp->v_consumed*100) / vp->v_bread; ! 485: ! 486: if (ratio < 50 && vp->v_power > 2) { ! 487: vp->v_power--; ! 488: vp->v_trigger = vp->v_bread + (16 * pent[vp->v_power].num); ! 489: } else if (ratio > 75 && vp->v_power < 6) { ! 490: vp->v_power++; ! 491: vp->v_trigger = vp->v_bread + (16 * pent[vp->v_power].num); ! 492: } ! 493: } ! 494: if ((num = pent[vp->v_power].num) == 1) ! 495: return (0); ! 496: lblkno = bp->b_lblkno & pent[vp->v_power].mask; ! 497: num_io = 0; ! 498: #endif ! 499: ! 500: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 14)) | DBG_FUNC_START, ! 501: lblkno, ! 502: num, ! 503: vp->v_flag, ! 504: vp, ! 505: 0 ); ! 506: ! 507: for (lbn = bp->b_lblkno; lbn > lblkno; lbn--) { ! 508: if (incore(vp, lbn - 1)) ! 509: break; ! 510: } ! 511: num -= (lbn - lblkno); ! 512: ! 513: for (;;) { ! 514: if (VOP_BMAP(vp, lbn, NULL, &blkno, &num_io) || blkno == -1 || num_io == 0) { ! 515: if (lbn == bp->b_lblkno) { ! 516: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 14)) | DBG_FUNC_END, ! 517: -1, ! 518: lbn, ! 519: blkno, ! 520: num_io, ! 521: 0); ! 522: return (0); ! 523: } ! 524: } ! 525: if ((lbn + num_io) >= bp->b_lblkno) ! 526: break; ! 527: lbn++; ! 528: num--; ! 529: } ! 530: if ((num_io = min(num, num_io + 1)) == 1) ! 531: return (0); ! 532: ! 533: if ((u_quad_t)size * ((u_quad_t)(lbn + num_io)) > filesize) ! 534: num_io = (filesize - ((u_quad_t)size * (u_quad_t)lbn)) / size; ! 535: ! 536: cbp = cluster_create(vp, bp, lbn, blkno, size, num_io, secsize, &ioblkno, B_AGE); ! 537: ! 538: if (cbp) { ! 539: (void) VOP_STRATEGY(cbp); ! 540: vp->v_bread += (cbp->b_bcount / size); ! 541: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 14)) | DBG_FUNC_END, ! 542: cbp->b_lblkno, ! 543: cbp->b_bcount, ! 544: vp, ! 545: 0xaaaaaadd, ! 546: 0 ); ! 547: ! 548: return (1); ! 549: } ! 550: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 14)) | DBG_FUNC_END, ! 551: 0, ! 552: 0, ! 553: 0, ! 554: 0, ! 555: 0); ! 556: return (0); ! 557: } ! 558: ! 559: ! 560: /* ! 561: * generate advisory I/O in as big of chunks as possible ! 562: * and then parcel them up into logical blocks in the buffer hash table. ! 563: */ ! 564: advisory_read(vp, filesize, lblkno, size, runt_size, io_size, secsize) ! 565: struct vnode *vp; ! 566: u_quad_t filesize; ! 567: daddr_t lblkno; ! 568: long size; ! 569: long runt_size; ! 570: long io_size; ! 571: long secsize; ! 572: { ! 573: struct buf *bp, *cbp; ! 574: daddr_t blkno, ioblkno; ! 575: int error, num_io; ! 576: long num; ! 577: ! 578: error = 0; ! 579: ! 580: /* calculate maximum number of blocks to read in */ ! 581: ! 582: num = (io_size + (size - 1)) / size; ! 583: ! 584: if ((u_quad_t)size * ((u_quad_t)(lblkno + num)) > filesize) { ! 585: if (((u_quad_t)size * (u_quad_t)lblkno) >= filesize) ! 586: return(EFBIG); ! 587: io_size = filesize - ((u_quad_t)size * (u_quad_t)lblkno); ! 588: ! 589: num = io_size / size; ! 590: } else ! 591: io_size = num * size; ! 592: ! 593: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 13)) | DBG_FUNC_START, ! 594: lblkno, ! 595: io_size, ! 596: num, ! 597: vp, ! 598: 0 ); ! 599: ! 600: while (num) { ! 601: if (error = VOP_BMAP(vp, lblkno, NULL, &blkno, &num_io)) ! 602: break; ! 603: ! 604: if (blkno == -1) { ! 605: lblkno++; ! 606: num--; ! 607: io_size -= size; ! 608: continue; ! 609: } ! 610: num_io = min(num, num_io + 1); ! 611: ! 612: cbp = cluster_create(vp, NULL, lblkno, blkno, size, num_io, secsize, &ioblkno, 0); ! 613: ! 614: if (cbp) { ! 615: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 13)) | DBG_FUNC_NONE, ! 616: cbp->b_blkno, ! 617: cbp->b_bcount, ! 618: vp, ! 619: 0xaaaaaaee, ! 620: 0 ); ! 621: ! 622: (void) VOP_STRATEGY(cbp); ! 623: } else { ! 624: if (ioblkno == lblkno) { ! 625: error = ENOMEM; ! 626: break; ! 627: } ! 628: } ! 629: io_size -= ((ioblkno - lblkno) * size); ! 630: num -= ioblkno - lblkno; ! 631: lblkno = ioblkno; ! 632: } ! 633: if (io_size && !error) { ! 634: bp = getblk(vp, lblkno, runt_size, 0, 0); ! 635: ! 636: if (bp->b_flags & (B_DONE | B_DELWRI)) ! 637: brelse(bp); ! 638: else { ! 639: bp->b_flags |= (B_READ | B_ASYNC); ! 640: ! 641: (void) VOP_STRATEGY(bp); ! 642: ! 643: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 13)) | DBG_FUNC_NONE, ! 644: bp->b_blkno, ! 645: bp->b_bcount, ! 646: vp, ! 647: 0xaaaaaaff, ! 648: 0 ); ! 649: } ! 650: io_size -= runt_size; ! 651: } ! 652: ! 653: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 13)) | DBG_FUNC_END, ! 654: lblkno, ! 655: io_size, ! 656: num, ! 657: error, ! 658: 0); ! 659: return(error); ! 660: } ! 661: ! 662: ! 663: /* ! 664: * If blocks are contiguous on disk, use this to provide clustered ! 665: * read ahead. We will read as many blocks as possible sequentially ! 666: * and then parcel them up into logical blocks in the buffer hash table. ! 667: */ ! 668: struct buf * ! 669: cluster_rbuild(vp, filesize, bp, lbn, blkno, size, run, flags, secsize) ! 670: struct vnode *vp; ! 671: u_quad_t filesize; ! 672: struct buf *bp; ! 673: daddr_t lbn; ! 674: daddr_t blkno; ! 675: long size; ! 676: int run; ! 677: long flags; ! 678: long secsize; ! 679: { ! 680: struct cluster_save *b_save; ! 681: struct buf *tbp, *cbp; ! 682: caddr_t cp; ! 683: daddr_t bn; ! 684: int i, inc; ! 685: ! 686: #if DIAGNOSTIC ! 687: if (size != vp->v_mount->mnt_stat.f_iosize) ! 688: panic("cluster_rbuild: size %d != filesize %d\n", ! 689: size, vp->v_mount->mnt_stat.f_iosize); ! 690: #endif ! 691: if ((u_quad_t)size * ((u_quad_t)(lbn + run + 1)) > filesize) ! 692: --run; ! 693: if (run == 0) { ! 694: if (!bp) { ! 695: bp = getblk(vp, lbn, size, 0, 0); ! 696: bp->b_blkno = blkno; ! 697: bp->b_flags |= flags; ! 698: } ! 699: return(bp); ! 700: } ! 701: b_save = _MALLOC(sizeof(struct buf *) * (run + 1) + sizeof(struct cluster_save), ! 702: M_SEGMENT, M_NOWAIT); ! 703: ! 704: if (b_save) ! 705: cbp = alloc_io_buf(vp); ! 706: else ! 707: cbp = NULL; ! 708: ! 709: if (b_save == NULL || cbp == NULL) { ! 710: if (b_save) ! 711: _FREE(b_save, M_SEGMENT); ! 712: if (cbp) ! 713: free_io_buf(cbp); ! 714: return (bp); ! 715: } ! 716: b_save->bs_bufsize = size; ! 717: b_save->bs_nchildren = 0; ! 718: b_save->bs_children = (struct buf **)(b_save + 1); ! 719: ! 720: cbp->b_saveaddr = (caddr_t)b_save; ! 721: cbp->b_iodone = cluster_callback; ! 722: cbp->b_blkno = blkno; ! 723: cbp->b_lblkno = lbn; ! 724: cbp->b_flags |= flags | B_CALL; ! 725: ! 726: inc = btodb(size, secsize); ! 727: cp = (char *)cbp->b_data; ! 728: tbp = bp; ! 729: ! 730: for (bn = blkno, i = 0; i <= run; ++i, bn += inc) { ! 731: if (tbp == NULL) { ! 732: if (incore(vp, lbn + i)) ! 733: /* ! 734: * A component of the cluster is already in core, ! 735: * terminate the cluster early. ! 736: */ ! 737: break; ! 738: tbp = getblk(vp, lbn + i, size, 0, 0); ! 739: } ! 740: pagemove(tbp->b_data, cp, size); ! 741: cbp->b_bcount += size; ! 742: cbp->b_bufsize += size; ! 743: cp += size; ! 744: ! 745: if (bp != tbp) ! 746: tbp->b_flags |= flags | B_READ | B_ASYNC; ! 747: tbp->b_bufsize -= size; ! 748: tbp->b_blkno = bn; ! 749: ! 750: b_save->bs_children[i] = tbp; ! 751: b_save->bs_nchildren++; ! 752: ! 753: tbp = NULL; ! 754: } ! 755: /* ! 756: * The cluster may have been terminated early ! 757: * If no cluster could be formed, deallocate the cluster save info. ! 758: */ ! 759: if (i == 0) { ! 760: _FREE(b_save, M_SEGMENT); ! 761: free_io_buf(cbp); ! 762: return(bp); ! 763: } ! 764: return(cbp); ! 765: } ! 766: ! 767: ! 768: ! 769: struct buf * ! 770: cluster_create(vp, bp, lbn, blkno, size, run, secsize, ioblkno, flags) ! 771: struct vnode *vp; ! 772: struct buf *bp; ! 773: daddr_t lbn; ! 774: daddr_t blkno; ! 775: long size; ! 776: int run; ! 777: long secsize; ! 778: daddr_t *ioblkno; ! 779: int flags; ! 780: { ! 781: struct cluster_save *b_save; ! 782: struct buf *tbp, *cbp; ! 783: caddr_t cp; ! 784: daddr_t bn; ! 785: int i, inc; ! 786: ! 787: inc = btodb(size, secsize); ! 788: ! 789: if (bp == NULL) { ! 790: while (run && (tbp = incore(vp, lbn))) { ! 791: /* ! 792: * if a block is already in core ! 793: * and is not busy ! 794: * then get and release to freshen it in the LRU ! 795: */ ! 796: if ( !(tbp->b_flags & B_BUSY)) { ! 797: tbp = getblk(vp, lbn, size, 0, 0); ! 798: brelse(tbp); ! 799: } ! 800: lbn++; ! 801: run--; ! 802: blkno += inc; ! 803: } ! 804: if (run == 0) { ! 805: *ioblkno = lbn; ! 806: return (NULL); ! 807: } ! 808: } ! 809: b_save = _MALLOC((sizeof(struct buf *) * run) + sizeof(struct cluster_save), M_SEGMENT, M_NOWAIT); ! 810: ! 811: if (b_save) ! 812: cbp = alloc_io_buf(vp); ! 813: else ! 814: cbp = NULL; ! 815: ! 816: if (b_save == NULL || cbp == NULL) { ! 817: if (b_save) ! 818: _FREE(b_save, M_SEGMENT); ! 819: if (cbp) ! 820: free_io_buf(cbp); ! 821: *ioblkno = lbn; ! 822: ! 823: return (NULL); ! 824: } ! 825: b_save->bs_bufsize = size; ! 826: b_save->bs_nchildren = 0; ! 827: b_save->bs_children = (struct buf **)(b_save + 1); ! 828: ! 829: cbp->b_saveaddr = (caddr_t)b_save; ! 830: cbp->b_iodone = cluster_callback; ! 831: cbp->b_blkno = blkno; ! 832: cbp->b_lblkno = lbn; ! 833: cbp->b_flags |= (B_READ | B_ASYNC | B_CALL); ! 834: ! 835: cp = (char *)cbp->b_data; ! 836: ! 837: for (bn = blkno, i = 0; i < run; ++i, bn += inc, ++lbn) { ! 838: if (bp && bp->b_lblkno == lbn) ! 839: tbp = bp; ! 840: else { ! 841: if (tbp = incore(vp, lbn)) { ! 842: /* ! 843: * A component of the cluster is already in core, ! 844: * terminate the cluster early. ! 845: * if its not busy then also ! 846: * get and release to freshen it in the LRU ! 847: */ ! 848: if ( !(tbp->b_flags & B_BUSY)) { ! 849: tbp = getblk(vp, lbn, size, 0, 0); ! 850: brelse(tbp); ! 851: } ! 852: break; ! 853: } ! 854: tbp = getblk(vp, lbn, size, 0, 0); ! 855: } ! 856: pagemove(tbp->b_data, cp, size); ! 857: ! 858: tbp->b_bufsize -= size; ! 859: tbp->b_blkno = bn; ! 860: cbp->b_bcount += size; ! 861: cbp->b_bufsize += size; ! 862: cp += size; ! 863: ! 864: if (tbp != bp) ! 865: tbp->b_flags |= (B_READ | B_ASYNC | flags); ! 866: b_save->bs_children[i] = tbp; ! 867: b_save->bs_nchildren++; ! 868: } ! 869: *ioblkno = lbn; ! 870: /* ! 871: * The cluster may have been terminated early ! 872: * If no cluster could be formed, deallocate the cluster save info. ! 873: */ ! 874: if (cbp->b_bcount == 0) { ! 875: _FREE(b_save, M_SEGMENT); ! 876: free_io_buf(cbp); ! 877: return(NULL); ! 878: } ! 879: return(cbp); ! 880: } ! 881: ! 882: ! 883: /* ! 884: * Cleanup after a clustered read or write. ! 885: * This is complicated by the fact that any of the buffers might have ! 886: * extra memory (if there were no empty buffer headers at allocbuf time) ! 887: * that we will need to shift around. ! 888: */ ! 889: void ! 890: cluster_callback(bp) ! 891: struct buf *bp; ! 892: { ! 893: struct cluster_save *b_save; ! 894: struct buf **bpp, *tbp; ! 895: long bsize; ! 896: int xsize; ! 897: int n; ! 898: caddr_t cp; ! 899: int error = 0; ! 900: ! 901: /* ! 902: * Must propogate errors to all the components. ! 903: */ ! 904: if (bp->b_flags & B_ERROR) ! 905: error = bp->b_error; ! 906: b_save = (struct cluster_save *)(bp->b_saveaddr); ! 907: ! 908: bsize = b_save->bs_bufsize; ! 909: xsize = bp->b_bcount - bp->b_resid; ! 910: cp = (char *)bp->b_data; ! 911: /* ! 912: * Move memory from the large cluster buffer into the component ! 913: * buffers and mark IO as done on these. ! 914: */ ! 915: for (bpp = b_save->bs_children; b_save->bs_nchildren--; ++bpp) { ! 916: tbp = *bpp; ! 917: pagemove(cp, tbp->b_data, bsize); ! 918: tbp->b_bufsize += bsize; ! 919: ! 920: n = min(bsize, xsize); ! 921: xsize -= n; ! 922: ! 923: if ((tbp->b_bcount = n) == 0) ! 924: tbp->b_flags |= B_INVAL; ! 925: tbp->b_resid = bsize - n; ! 926: ! 927: if (error) { ! 928: tbp->b_flags |= B_ERROR; ! 929: tbp->b_error = error; ! 930: } ! 931: biodone(tbp); ! 932: bp->b_bufsize -= bsize; ! 933: cp += bsize; ! 934: } ! 935: _FREE(b_save, M_SEGMENT); ! 936: ! 937: free_io_buf(bp); ! 938: } ! 939: ! 940: ! 941: /* ! 942: * on close, flush out any remaining cluster ! 943: * ! 944: */ ! 945: cluster_close(vp, bsize, secsize) ! 946: struct vnode *vp; ! 947: int bsize; ! 948: long secsize; ! 949: { ! 950: int cursize; ! 951: ! 952: if (vp->v_clen) { ! 953: cursize = vp->v_lastw - vp->v_cstart + 1; ! 954: ! 955: cluster_wbuild(vp, NULL, bsize, vp->v_cstart, cursize, -1, secsize, 0); ! 956: ! 957: vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0; ! 958: } ! 959: } ! 960: ! 961: ! 962: /* ! 963: * Do clustered write for FFS. ! 964: * ! 965: * Three cases: ! 966: * 1. Write is not sequential (write asynchronously) ! 967: * Write is sequential: ! 968: * 2. beginning of cluster - begin cluster ! 969: * 3. middle of a cluster - add to cluster ! 970: * 4. end of a cluster - asynchronously write cluster ! 971: */ ! 972: ! 973: cluster_write(bp, filesize, secsize) ! 974: struct buf *bp; ! 975: u_quad_t filesize; ! 976: long secsize; ! 977: { ! 978: struct vnode *vp; ! 979: daddr_t lbn; ! 980: daddr_t bn; ! 981: int cursize; ! 982: int need_commit; ! 983: int need_sync; ! 984: int bsize; ! 985: int error = 0; ! 986: ! 987: need_commit = (bp->b_flags & B_CLUST_COMMIT); ! 988: need_sync = (bp->b_flags & B_CLUST_SYNC); ! 989: bp->b_flags &= ~(B_CLUST_COMMIT | B_CLUST_SYNC); ! 990: ! 991: vp = bp->b_vp; ! 992: bn = bp->b_blkno; ! 993: lbn = bp->b_lblkno; ! 994: bsize = bp->b_bcount; ! 995: ! 996: if ((bsize & (PAGE_SIZE - 1)) || bsize > MAXBSIZE) { ! 997: bp->b_flags |= B_AGE; ! 998: bawrite(bp); ! 999: ! 1000: return (error); ! 1001: } ! 1002: /* Initialize vnode to beginning of file. */ ! 1003: if (lbn == 0) ! 1004: vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0; ! 1005: ! 1006: ! 1007: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START, ! 1008: bp->b_lblkno, ! 1009: bp->b_bcount, ! 1010: vp, ! 1011: 0, ! 1012: 0); ! 1013: ! 1014: if (vp->v_clen == 0 || lbn != vp->v_lastw + 1 || (bn != vp->v_lasta + btodb(bsize, secsize))) ! 1015: { ! 1016: if (vp->v_clen) { ! 1017: /* ! 1018: * Current block is neither logically or physically sequential to last written ! 1019: * ! 1020: * If we are not writing at the end of file, or the process ! 1021: * seeked to another point in the file since its ! 1022: * last write, then push the previous cluster. ! 1023: * Otherwise try reallocating to make it sequential. ! 1024: */ ! 1025: cursize = vp->v_lastw - vp->v_cstart + 1; ! 1026: ! 1027: if (((u_quad_t)(lbn + 1)) * (u_quad_t)bsize != filesize || lbn != vp->v_lastw + 1) { ! 1028: cluster_wbuild(vp, NULL, bsize, ! 1029: vp->v_cstart, cursize, lbn, secsize, need_sync); ! 1030: } else { ! 1031: struct buf **bpp, **endbp; ! 1032: struct cluster_save *buflist; ! 1033: ! 1034: buflist = cluster_collectbufs(vp, bp); ! 1035: ! 1036: if (buflist == NULL) { ! 1037: cluster_wbuild(vp, NULL, bsize, ! 1038: vp->v_cstart, cursize, lbn, secsize, need_sync); ! 1039: } else { ! 1040: ! 1041: endbp = &buflist->bs_children ! 1042: [buflist->bs_nchildren - 1]; ! 1043: if (VOP_REALLOCBLKS(vp, buflist)) { ! 1044: /* ! 1045: * Failed, push the previous cluster. ! 1046: */ ! 1047: for (bpp = buflist->bs_children; ! 1048: bpp < endbp; bpp++) ! 1049: brelse(*bpp); ! 1050: _FREE(buflist, M_SEGMENT); ! 1051: ! 1052: cluster_wbuild(vp, NULL, bsize, ! 1053: vp->v_cstart, cursize, lbn, secsize, need_sync); ! 1054: } else { ! 1055: /* ! 1056: * Succeeded, keep building cluster. ! 1057: * don't bdwrite the last bp, we'll ! 1058: * first check to see if we now have a full ! 1059: * cluster, or the caller has requested a SYNC write ! 1060: */ ! 1061: for (bpp = buflist->bs_children; ! 1062: bpp < endbp; bpp++) ! 1063: bdwrite(*bpp); ! 1064: _FREE(buflist, M_SEGMENT); ! 1065: /* ! 1066: * update the physical block number because, ! 1067: * VOP_REALLOCBLKS will have changed it ! 1068: */ ! 1069: bn = bp->b_blkno; ! 1070: goto chk_cluster_full; ! 1071: } ! 1072: } ! 1073: } ! 1074: } ! 1075: if (need_commit) { /* we're being asked to do IO_SYNC and this is the last */ ! 1076: vp->v_clen = 0; /* chunk of the I/O request, so we can't start a new cluster yet */ ! 1077: ! 1078: if (need_sync) ! 1079: bwrite(bp); ! 1080: else ! 1081: bawrite(bp); ! 1082: ! 1083: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END, ! 1084: bp->b_lblkno, ! 1085: bp->b_blkno, ! 1086: bp->b_bcount, ! 1087: 2, ! 1088: 0 ); ! 1089: } else { ! 1090: /* ! 1091: * begin a new cluster... limiting the size to MAXPHYSIO ! 1092: */ ! 1093: vp->v_cstart = lbn; ! 1094: vp->v_clen = (MAXPHYSIO / bsize) - 1; ! 1095: ! 1096: bdwrite(bp); ! 1097: ! 1098: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END, ! 1099: bp->b_lblkno, ! 1100: bp->b_blkno, ! 1101: bp->b_bcount, ! 1102: 3, ! 1103: 0 ); ! 1104: } ! 1105: goto check_for_commit; ! 1106: } ! 1107: chk_cluster_full: ! 1108: if ((lbn == vp->v_cstart + vp->v_clen) || need_commit) { ! 1109: /* ! 1110: * At end of cluster, write it out. ! 1111: */ ! 1112: cluster_wbuild(vp, bp, bsize, vp->v_cstart, ! 1113: (lbn - vp->v_cstart) + 1, lbn, secsize, need_sync); ! 1114: ! 1115: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END, ! 1116: vp->v_cstart, ! 1117: vp->v_clen + 1, ! 1118: lbn, ! 1119: 4, ! 1120: 0 ); ! 1121: vp->v_clen = 0; ! 1122: } else { ! 1123: /* ! 1124: * In the middle of a cluster, so just delay the ! 1125: * I/O for now. ! 1126: */ ! 1127: bdwrite(bp); ! 1128: ! 1129: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END, ! 1130: bp->b_lblkno, ! 1131: bp->b_blkno, ! 1132: vp->v_cstart, ! 1133: 5, ! 1134: 0); ! 1135: } ! 1136: check_for_commit: ! 1137: vp->v_lastw = lbn; ! 1138: vp->v_lasta = bn; ! 1139: ! 1140: if (need_commit) { ! 1141: bp = getblk(vp, lbn, bsize, 0, 0); ! 1142: ! 1143: if (bp->b_flags & B_ERROR) ! 1144: error = (bp->b_error ? bp->b_error : EIO); ! 1145: brelse(bp); ! 1146: } ! 1147: return (error); ! 1148: } ! 1149: ! 1150: ! 1151: /* ! 1152: * This is an awful lot like cluster_rbuild...wish they could be combined. ! 1153: * The last lbn argument is the current block on which I/O is being ! 1154: * performed. Check to see that it doesn't fall in the middle of ! 1155: * the current block (if last_bp == NULL). ! 1156: */ ! 1157: void ! 1158: cluster_wbuild(vp, last_bp, size, start_lbn, len, lbn, secsize, need_sync) ! 1159: struct vnode *vp; ! 1160: struct buf *last_bp; ! 1161: long size; ! 1162: daddr_t start_lbn; ! 1163: int len; ! 1164: daddr_t lbn; ! 1165: long secsize; ! 1166: int need_sync; ! 1167: { ! 1168: struct cluster_save *b_save; ! 1169: struct buf *bp, *tbp; ! 1170: caddr_t cp; ! 1171: int i, s; ! 1172: ! 1173: #if DIAGNOSTIC ! 1174: if (size != vp->v_mount->mnt_stat.f_iosize) ! 1175: panic("cluster_wbuild: size %d != filesize %d\n", ! 1176: size, vp->v_mount->mnt_stat.f_iosize); ! 1177: #endif ! 1178: redo: ! 1179: while ((!incore(vp, start_lbn) || start_lbn == lbn) && len) { ! 1180: ++start_lbn; ! 1181: --len; ! 1182: } ! 1183: /* Get more memory for current buffer */ ! 1184: if (len <= 1) { ! 1185: if (last_bp) { ! 1186: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_NONE, ! 1187: last_bp->b_lblkno, ! 1188: last_bp->b_blkno, ! 1189: last_bp->b_bcount, ! 1190: 10, ! 1191: 0 ); ! 1192: if (need_sync) ! 1193: bwrite(last_bp); ! 1194: else ! 1195: bawrite(last_bp); ! 1196: } else if (len) { ! 1197: bp = getblk(vp, start_lbn, size, 0, 0); ! 1198: ! 1199: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_NONE, ! 1200: bp->b_lblkno, ! 1201: bp->b_blkno, ! 1202: bp->b_bcount, ! 1203: 11, ! 1204: 0 ); ! 1205: if (bp->b_flags & B_DELWRI) { ! 1206: if (need_sync) ! 1207: bwrite(bp); ! 1208: else ! 1209: bawrite(bp); ! 1210: } else ! 1211: brelse(bp); ! 1212: } ! 1213: return; ! 1214: } ! 1215: b_save = _MALLOC(sizeof(struct buf *) * len + sizeof(struct cluster_save), ! 1216: M_SEGMENT, M_NOWAIT); ! 1217: if (b_save) ! 1218: bp = alloc_io_buf(vp); ! 1219: else ! 1220: bp = NULL; ! 1221: ! 1222: if (b_save == NULL || bp == NULL) { ! 1223: if (bp) ! 1224: free_io_buf(bp); ! 1225: if (b_save) ! 1226: _FREE(b_save, M_SEGMENT); ! 1227: ! 1228: for (i = 0; i < len; ++i, ++start_lbn) { ! 1229: if (!incore(vp, start_lbn)) ! 1230: continue; ! 1231: if (last_bp == NULL || start_lbn != lbn) { ! 1232: tbp = getblk(vp, start_lbn, size, 0, 0); ! 1233: ! 1234: if (tbp->b_flags & B_DELWRI) { ! 1235: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_NONE, ! 1236: tbp->b_lblkno, ! 1237: tbp->b_blkno, ! 1238: tbp->b_bcount, ! 1239: 12, ! 1240: 0 ); ! 1241: ! 1242: if (need_sync) ! 1243: bwrite(tbp); ! 1244: else ! 1245: bawrite(tbp); ! 1246: } else ! 1247: brelse(tbp); ! 1248: } ! 1249: } ! 1250: if (last_bp) { ! 1251: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_NONE, ! 1252: last_bp->b_lblkno, ! 1253: last_bp->b_blkno, ! 1254: last_bp->b_bcount, ! 1255: 13, ! 1256: 0 ); ! 1257: if (need_sync) ! 1258: bwrite(last_bp); ! 1259: else ! 1260: bawrite(last_bp); ! 1261: } ! 1262: return; ! 1263: } ! 1264: b_save->bs_bufsize = size; ! 1265: b_save->bs_nchildren = 0; ! 1266: b_save->bs_children = (struct buf **)(b_save + 1); ! 1267: ! 1268: bp->b_saveaddr = (caddr_t)b_save; ! 1269: bp->b_iodone = cluster_callback; ! 1270: bp->b_flags |= (B_WRITEINPROG | B_CALL | B_ASYNC); ! 1271: ! 1272: cp = (char *)bp->b_data; ! 1273: ! 1274: for (start_lbn, i = 0; i < len; ++i, ++start_lbn) { ! 1275: /* ! 1276: * Block is not in core or the non-sequential block ! 1277: * ending our cluster was part of the cluster (in which ! 1278: * case we don't want to write it twice). ! 1279: */ ! 1280: if (!incore(vp, start_lbn) || ! 1281: (last_bp == NULL && start_lbn == lbn)) ! 1282: break; ! 1283: ! 1284: /* ! 1285: * Get the desired block buffer (unless it is the final ! 1286: * sequential block whose buffer was passed in explictly ! 1287: * as last_bp). ! 1288: */ ! 1289: if (last_bp == NULL || start_lbn != lbn) { ! 1290: tbp = getblk(vp, start_lbn, size, 0, 0); ! 1291: if (!(tbp->b_flags & B_DELWRI)) { ! 1292: brelse(tbp); ! 1293: break; ! 1294: } ! 1295: } else ! 1296: tbp = last_bp; ! 1297: ! 1298: if (i == 0) { ! 1299: bp->b_blkno = tbp->b_blkno; ! 1300: bp->b_lblkno= tbp->b_lblkno; ! 1301: } else { ! 1302: if (tbp->b_blkno != (bp->b_blkno + btodb(bp->b_bufsize, secsize))) { ! 1303: brelse(tbp); ! 1304: break; ! 1305: } ! 1306: } ! 1307: /* Move memory from children to parent */ ! 1308: pagemove(tbp->b_data, cp, size); ! 1309: bp->b_bcount += size; ! 1310: bp->b_bufsize += size; ! 1311: cp += size; ! 1312: ! 1313: tbp->b_bufsize -= size; ! 1314: tbp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); ! 1315: tbp->b_flags |= (B_ASYNC | B_AGE); ! 1316: ! 1317: s = splbio(); ! 1318: reassignbuf(tbp, tbp->b_vp); /* put on clean list */ ! 1319: ++tbp->b_vp->v_numoutput; ! 1320: splx(s); ! 1321: ! 1322: b_save->bs_children[i] = tbp; ! 1323: b_save->bs_nchildren++; ! 1324: } ! 1325: ! 1326: if (i == 0) { ! 1327: /* None to cluster */ ! 1328: free_io_buf(bp); ! 1329: _FREE(b_save, M_SEGMENT); ! 1330: } else { ! 1331: if (bp->b_bcount > MAXPHYSIO) ! 1332: panic("cluster_wbuild: bp->b_bcount = %x\n", bp->b_bcount); ! 1333: ! 1334: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_NONE, ! 1335: bp->b_lblkno, ! 1336: bp->b_bcount, ! 1337: vp, ! 1338: 0xbbbbbbaa, ! 1339: 0 ); ! 1340: VOP_STRATEGY(bp); ! 1341: } ! 1342: if (i < len) { ! 1343: len -= i + 1; ! 1344: start_lbn += 1; ! 1345: goto redo; ! 1346: } ! 1347: } ! 1348: ! 1349: /* ! 1350: * Collect together all the buffers in a cluster. ! 1351: * Plus add one additional buffer. ! 1352: */ ! 1353: struct cluster_save * ! 1354: cluster_collectbufs(vp, last_bp) ! 1355: struct vnode *vp; ! 1356: struct buf *last_bp; ! 1357: { ! 1358: struct cluster_save *buflist; ! 1359: daddr_t lbn; ! 1360: int i, j, len; ! 1361: ! 1362: len = vp->v_lastw - vp->v_cstart + 1; ! 1363: buflist = _MALLOC(sizeof(struct buf *) * (len + 1) + sizeof(*buflist), ! 1364: M_SEGMENT, M_NOWAIT); ! 1365: ! 1366: if (buflist == NULL) ! 1367: return (NULL); ! 1368: ! 1369: buflist->bs_nchildren = 0; ! 1370: buflist->bs_children = (struct buf **)(buflist + 1); ! 1371: for (lbn = vp->v_cstart, i = 0; i < len; lbn++, i++) { ! 1372: (void)bread(vp, lbn, last_bp->b_bcount, NOCRED, ! 1373: &buflist->bs_children[i]); ! 1374: if(!(buflist->bs_children[i]->b_flags & B_DELWRI)) { ! 1375: for (j=0; j<=i; j++) ! 1376: brelse(buflist->bs_children[j]); ! 1377: _FREE(buflist, M_SEGMENT); ! 1378: return(NULL); ! 1379: } ! 1380: } ! 1381: buflist->bs_children[i] = last_bp; ! 1382: buflist->bs_nchildren = i + 1; ! 1383: return (buflist); ! 1384: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.