|
|
1.1 root 1: /*
2: * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3: *
4: * @APPLE_LICENSE_HEADER_START@
5: *
6: * The contents of this file constitute Original Code as defined in and
7: * are subject to the Apple Public Source License Version 1.1 (the
8: * "License"). You may not use this file except in compliance with the
9: * License. Please obtain a copy of the License at
10: * http://www.apple.com/publicsource and read it before using this file.
11: *
12: * This Original Code and all software distributed under the License are
13: * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14: * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15: * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16: * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17: * License for the specific language governing rights and limitations
18: * under the License.
19: *
20: * @APPLE_LICENSE_HEADER_END@
21: */
22: /* $NetBSD: lfs_segment.c,v 1.3 1994/08/21 03:15:32 cgd Exp $ */
23:
24: /*
25: * Copyright (c) 1991, 1993
26: * The Regents of the University of California. All rights reserved.
27: *
28: * Redistribution and use in source and binary forms, with or without
29: * modification, are permitted provided that the following conditions
30: * are met:
31: * 1. Redistributions of source code must retain the above copyright
32: * notice, this list of conditions and the following disclaimer.
33: * 2. Redistributions in binary form must reproduce the above copyright
34: * notice, this list of conditions and the following disclaimer in the
35: * documentation and/or other materials provided with the distribution.
36: * 3. All advertising materials mentioning features or use of this software
37: * must display the following acknowledgement:
38: * This product includes software developed by the University of
39: * California, Berkeley and its contributors.
40: * 4. Neither the name of the University nor the names of its contributors
41: * may be used to endorse or promote products derived from this software
42: * without specific prior written permission.
43: *
44: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54: * SUCH DAMAGE.
55: *
56: * @(#)lfs_segment.c 8.5 (Berkeley) 1/4/94
57: */
58:
59: #include <sys/param.h>
60: #include <sys/systm.h>
61: #include <sys/namei.h>
62: #include <sys/kernel.h>
63: #include <sys/resourcevar.h>
64: #include <sys/file.h>
65: #include <sys/stat.h>
66: #include <sys/buf.h>
67: #include <sys/proc.h>
68: #include <sys/conf.h>
69: #include <sys/vnode.h>
70: #include <sys/malloc.h>
71: #include <sys/mount.h>
72:
73: #include <miscfs/specfs/specdev.h>
74: #include <miscfs/fifofs/fifo.h>
75:
76: #include <ufs/ufs/quota.h>
77: #include <ufs/ufs/inode.h>
78: #include <ufs/ufs/dir.h>
79: #include <ufs/ufs/ufsmount.h>
80: #include <ufs/ufs/ufs_extern.h>
81:
82: #include <ufs/lfs/lfs.h>
83: #include <ufs/lfs/lfs_extern.h>
84:
85: extern int count_lock_queue __P((void));
86:
87: #define MAX_ACTIVE 10
88: /*
89: * Determine if it's OK to start a partial in this segment, or if we need
90: * to go on to a new segment.
91: */
92: #define LFS_PARTIAL_FITS(fs) \
93: ((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \
94: 1 << (fs)->lfs_fsbtodb)
95:
96: void lfs_callback __P((struct buf *));
97: void lfs_gather __P((struct lfs *, struct segment *,
98: struct vnode *, int (*) __P((struct lfs *, struct buf *))));
99: int lfs_gatherblock __P((struct segment *, struct buf *, int *));
100: void lfs_iset __P((struct inode *, daddr_t, time_t));
101: int lfs_match_data __P((struct lfs *, struct buf *));
102: int lfs_match_dindir __P((struct lfs *, struct buf *));
103: int lfs_match_indir __P((struct lfs *, struct buf *));
104: int lfs_match_tindir __P((struct lfs *, struct buf *));
105: void lfs_newseg __P((struct lfs *));
106: void lfs_shellsort __P((struct buf **, daddr_t *, register int));
107: void lfs_supercallback __P((struct buf *));
108: void lfs_updatemeta __P((struct segment *));
109: int lfs_vref __P((struct vnode *));
110: void lfs_vunref __P((struct vnode *));
111: void lfs_writefile __P((struct lfs *, struct segment *, struct vnode *));
112: int lfs_writeinode __P((struct lfs *, struct segment *, struct inode *));
113: int lfs_writeseg __P((struct lfs *, struct segment *));
114: void lfs_writesuper __P((struct lfs *));
115: void lfs_writevnodes __P((struct lfs *fs, struct mount *mp,
116: struct segment *sp, int dirops));
117:
118: int lfs_allclean_wakeup; /* Cleaner wakeup address. */
119:
120: /* Statistics Counters */
121: #define DOSTATS
122: struct lfs_stats lfs_stats;
123:
124: /* op values to lfs_writevnodes */
125: #define VN_REG 0
126: #define VN_DIROP 1
127: #define VN_EMPTY 2
128:
129: /*
130: * Ifile and meta data blocks are not marked busy, so segment writes MUST be
131: * single threaded. Currently, there are two paths into lfs_segwrite, sync()
132: * and getnewbuf(). They both mark the file system busy. Lfs_vflush()
133: * explicitly marks the file system busy. So lfs_segwrite is safe. I think.
134: */
135:
136: int
137: lfs_vflush(vp)
138: struct vnode *vp;
139: {
140: struct inode *ip;
141: struct lfs *fs;
142: struct segment *sp;
143:
144: fs = VFSTOUFS(vp->v_mount)->um_lfs;
145: if (fs->lfs_nactive > MAX_ACTIVE)
146: return(lfs_segwrite(vp->v_mount, SEGM_SYNC|SEGM_CKP));
147: lfs_seglock(fs, SEGM_SYNC);
148: sp = fs->lfs_sp;
149:
150:
151: ip = VTOI(vp);
152: if (vp->v_dirtyblkhd.lh_first == NULL)
153: lfs_writevnodes(fs, vp->v_mount, sp, VN_EMPTY);
154:
155: do {
156: do {
157: if (vp->v_dirtyblkhd.lh_first != NULL)
158: lfs_writefile(fs, sp, vp);
159: } while (lfs_writeinode(fs, sp, ip));
160:
161: } while (lfs_writeseg(fs, sp) && ip->i_number == LFS_IFILE_INUM);
162:
163: #ifdef DOSTATS
164: ++lfs_stats.nwrites;
165: if (sp->seg_flags & SEGM_SYNC)
166: ++lfs_stats.nsync_writes;
167: if (sp->seg_flags & SEGM_CKP)
168: ++lfs_stats.ncheckpoints;
169: #endif
170: lfs_segunlock(fs);
171: return (0);
172: }
173:
174: void
175: lfs_writevnodes(fs, mp, sp, op)
176: struct lfs *fs;
177: struct mount *mp;
178: struct segment *sp;
179: int op;
180: {
181: struct inode *ip;
182: struct vnode *vp;
183:
184: loop:
185: for (vp = mp->mnt_vnodelist.lh_first;
186: vp != NULL;
187: vp = vp->v_mntvnodes.le_next) {
188: /*
189: * If the vnode that we are about to sync is no longer
190: * associated with this mount point, start over.
191: */
192: if (vp->v_mount != mp)
193: goto loop;
194:
195: /* XXX ignore dirops for now
196: if (op == VN_DIROP && !(vp->v_flag & VDIROP) ||
197: op != VN_DIROP && (vp->v_flag & VDIROP))
198: continue;
199: */
200:
201: if (op == VN_EMPTY && vp->v_dirtyblkhd.lh_first)
202: continue;
203:
204: if (vp->v_type == VNON)
205: continue;
206:
207: if (lfs_vref(vp))
208: continue;
209:
210: /*
211: * Write the inode/file if dirty and it's not the
212: * the IFILE.
213: */
214: ip = VTOI(vp);
215: if ((ip->i_flag &
216: (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE) ||
217: vp->v_dirtyblkhd.lh_first != NULL) &&
218: ip->i_number != LFS_IFILE_INUM) {
219: if (vp->v_dirtyblkhd.lh_first != NULL)
220: lfs_writefile(fs, sp, vp);
221: (void) lfs_writeinode(fs, sp, ip);
222: }
223: vp->v_flag &= ~VDIROP;
224: lfs_vunref(vp);
225: }
226: }
227:
228: int
229: lfs_segwrite(mp, flags)
230: struct mount *mp;
231: int flags; /* Do a checkpoint. */
232: {
233: struct buf *bp;
234: struct inode *ip;
235: struct lfs *fs;
236: struct segment *sp;
237: struct vnode *vp;
238: SEGUSE *segusep;
239: daddr_t ibno;
240: CLEANERINFO *cip;
241: int clean, do_ckp, error, i;
242:
243: fs = VFSTOUFS(mp)->um_lfs;
244:
245: /*
246: * If we have fewer than 2 clean segments, wait until cleaner
247: * writes.
248: */
249: do {
250: LFS_CLEANERINFO(cip, fs, bp);
251: clean = cip->clean;
252: brelse(bp);
253: if (clean <= 2) {
254: printf ("segs clean: %d\n", clean);
255: wakeup(&lfs_allclean_wakeup);
256: if (error = tsleep(&fs->lfs_avail, PRIBIO + 1,
257: "lfs writer", 0))
258: return (error);
259: }
260: } while (clean <= 2 );
261:
262: /*
263: * Allocate a segment structure and enough space to hold pointers to
264: * the maximum possible number of buffers which can be described in a
265: * single summary block.
266: */
267: do_ckp = flags & SEGM_CKP || fs->lfs_nactive > MAX_ACTIVE;
268: lfs_seglock(fs, flags | (do_ckp ? SEGM_CKP : 0));
269: sp = fs->lfs_sp;
270:
271: lfs_writevnodes(fs, mp, sp, VN_REG);
272:
273: /* XXX ignore ordering of dirops for now */
274: /* XXX
275: fs->lfs_writer = 1;
276: if (fs->lfs_dirops && (error =
277: tsleep(&fs->lfs_writer, PRIBIO + 1, "lfs writer", 0))) {
278: free(sp->bpp, M_SEGMENT);
279: free(sp, M_SEGMENT);
280: fs->lfs_writer = 0;
281: return (error);
282: }
283:
284: lfs_writevnodes(fs, mp, sp, VN_DIROP);
285: */
286:
287: /*
288: * If we are doing a checkpoint, mark everything since the
289: * last checkpoint as no longer ACTIVE.
290: */
291: if (do_ckp)
292: for (ibno = fs->lfs_cleansz + fs->lfs_segtabsz;
293: --ibno >= fs->lfs_cleansz; ) {
294: if (bread(fs->lfs_ivnode, ibno, fs->lfs_bsize,
295: NOCRED, &bp))
296:
297: panic("lfs: ifile read");
298: segusep = (SEGUSE *)bp->b_data;
299: for (i = fs->lfs_sepb; i--; segusep++)
300: segusep->su_flags &= ~SEGUSE_ACTIVE;
301:
302: error = VOP_BWRITE(bp);
303: }
304:
305: if (do_ckp || fs->lfs_doifile) {
306: redo:
307: vp = fs->lfs_ivnode;
308: while (vget(vp, 1));
309: ip = VTOI(vp);
310: if (vp->v_dirtyblkhd.lh_first != NULL)
311: lfs_writefile(fs, sp, vp);
312: (void)lfs_writeinode(fs, sp, ip);
313: vput(vp);
314: if (lfs_writeseg(fs, sp) && do_ckp)
315: goto redo;
316: } else
317: (void) lfs_writeseg(fs, sp);
318:
319: /*
320: * If the I/O count is non-zero, sleep until it reaches zero. At the
321: * moment, the user's process hangs around so we can sleep.
322: */
323: /* XXX ignore dirops for now
324: fs->lfs_writer = 0;
325: fs->lfs_doifile = 0;
326: wakeup(&fs->lfs_dirops);
327: */
328:
329: #ifdef DOSTATS
330: ++lfs_stats.nwrites;
331: if (sp->seg_flags & SEGM_SYNC)
332: ++lfs_stats.nsync_writes;
333: if (sp->seg_flags & SEGM_CKP)
334: ++lfs_stats.ncheckpoints;
335: #endif
336: lfs_segunlock(fs);
337: return (0);
338: }
339:
340: /*
341: * Write the dirty blocks associated with a vnode.
342: */
343: void
344: lfs_writefile(fs, sp, vp)
345: struct lfs *fs;
346: struct segment *sp;
347: struct vnode *vp;
348: {
349: struct buf *bp;
350: struct finfo *fip;
351: IFILE *ifp;
352:
353: if (sp->seg_bytes_left < fs->lfs_bsize ||
354: sp->sum_bytes_left < sizeof(struct finfo))
355: (void) lfs_writeseg(fs, sp);
356:
357: sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(daddr_t);
358: ++((SEGSUM *)(sp->segsum))->ss_nfinfo;
359:
360: fip = sp->fip;
361: fip->fi_nblocks = 0;
362: fip->fi_ino = VTOI(vp)->i_number;
363: LFS_IENTRY(ifp, fs, fip->fi_ino, bp);
364: fip->fi_version = ifp->if_version;
365: brelse(bp);
366:
367: /*
368: * It may not be necessary to write the meta-data blocks at this point,
369: * as the roll-forward recovery code should be able to reconstruct the
370: * list.
371: */
372: lfs_gather(fs, sp, vp, lfs_match_data);
373: lfs_gather(fs, sp, vp, lfs_match_indir);
374: lfs_gather(fs, sp, vp, lfs_match_dindir);
375: #ifdef TRIPLE
376: lfs_gather(fs, sp, vp, lfs_match_tindir);
377: #endif
378:
379: fip = sp->fip;
380: if (fip->fi_nblocks != 0) {
381: sp->fip =
382: (struct finfo *)((caddr_t)fip + sizeof(struct finfo) +
383: sizeof(daddr_t) * (fip->fi_nblocks - 1));
384: sp->start_lbp = &sp->fip->fi_blocks[0];
385: } else {
386: sp->sum_bytes_left += sizeof(struct finfo) - sizeof(daddr_t);
387: --((SEGSUM *)(sp->segsum))->ss_nfinfo;
388: }
389: }
390:
391: int
392: lfs_writeinode(fs, sp, ip)
393: struct lfs *fs;
394: struct segment *sp;
395: struct inode *ip;
396: {
397: struct buf *bp, *ibp;
398: IFILE *ifp;
399: SEGUSE *sup;
400: daddr_t daddr;
401: ino_t ino;
402: int error, i, ndx;
403: int redo_ifile = 0;
404:
405: if (!(ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)))
406: return(0);
407:
408: /* Allocate a new inode block if necessary. */
409: if (sp->ibp == NULL) {
410: /* Allocate a new segment if necessary. */
411: if (sp->seg_bytes_left < fs->lfs_bsize ||
412: sp->sum_bytes_left < sizeof(daddr_t))
413: (void) lfs_writeseg(fs, sp);
414:
415: /* Get next inode block. */
416: daddr = fs->lfs_offset;
417: fs->lfs_offset += fsbtodb(fs, 1);
418: sp->ibp = *sp->cbpp++ =
419: lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, daddr,
420: fs->lfs_bsize);
421: /* Zero out inode numbers */
422: for (i = 0; i < INOPB(fs); ++i)
423: ((struct dinode *)sp->ibp->b_data)[i].di_inumber = 0;
424: ++sp->start_bpp;
425: fs->lfs_avail -= fsbtodb(fs, 1);
426: /* Set remaining space counters. */
427: sp->seg_bytes_left -= fs->lfs_bsize;
428: sp->sum_bytes_left -= sizeof(daddr_t);
429: ndx = LFS_SUMMARY_SIZE / sizeof(daddr_t) -
430: sp->ninodes / INOPB(fs) - 1;
431: ((daddr_t *)(sp->segsum))[ndx] = daddr;
432: }
433:
434: /* Update the inode times and copy the inode onto the inode page. */
435: if (ip->i_flag & IN_MODIFIED)
436: --fs->lfs_uinodes;
437: ITIMES(ip, &time, &time);
438: ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
439: bp = sp->ibp;
440: ((struct dinode *)bp->b_data)[sp->ninodes % INOPB(fs)] = ip->i_din;
441: /* Increment inode count in segment summary block. */
442: ++((SEGSUM *)(sp->segsum))->ss_ninos;
443:
444: /* If this page is full, set flag to allocate a new page. */
445: if (++sp->ninodes % INOPB(fs) == 0)
446: sp->ibp = NULL;
447:
448: /*
449: * If updating the ifile, update the super-block. Update the disk
450: * address and access times for this inode in the ifile.
451: */
452: ino = ip->i_number;
453: if (ino == LFS_IFILE_INUM) {
454: daddr = fs->lfs_idaddr;
455: fs->lfs_idaddr = bp->b_blkno;
456: } else {
457: LFS_IENTRY(ifp, fs, ino, ibp);
458: daddr = ifp->if_daddr;
459: ifp->if_daddr = bp->b_blkno;
460: error = VOP_BWRITE(ibp);
461: }
462:
463: /*
464: * No need to update segment usage if there was no former inode address
465: * or if the last inode address is in the current partial segment.
466: */
467: if (daddr != LFS_UNUSED_DADDR &&
468: !(daddr >= fs->lfs_lastpseg && daddr <= bp->b_blkno)) {
469: LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp);
470: #if DIAGNOSTIC
471: if (sup->su_nbytes < sizeof(struct dinode)) {
472: /* XXX -- Change to a panic. */
473: printf("lfs: negative bytes (segment %d)\n",
474: datosn(fs, daddr));
475: panic("negative bytes");
476: }
477: #endif
478: sup->su_nbytes -= sizeof(struct dinode);
479: redo_ifile =
480: (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED));
481: error = VOP_BWRITE(bp);
482: }
483: return (redo_ifile);
484: }
485:
486: int
487: lfs_gatherblock(sp, bp, sptr)
488: struct segment *sp;
489: struct buf *bp;
490: int *sptr;
491: {
492: struct lfs *fs;
493: int version;
494:
495: /*
496: * If full, finish this segment. We may be doing I/O, so
497: * release and reacquire the splbio().
498: */
499: #if DIAGNOSTIC
500: if (sp->vp == NULL)
501: panic ("lfs_gatherblock: Null vp in segment");
502: #endif
503: fs = sp->fs;
504: if (sp->sum_bytes_left < sizeof(daddr_t) ||
505: sp->seg_bytes_left < fs->lfs_bsize) {
506: if (sptr)
507: splx(*sptr);
508: lfs_updatemeta(sp);
509:
510: version = sp->fip->fi_version;
511: (void) lfs_writeseg(fs, sp);
512:
513: sp->fip->fi_version = version;
514: sp->fip->fi_ino = VTOI(sp->vp)->i_number;
515: /* Add the current file to the segment summary. */
516: ++((SEGSUM *)(sp->segsum))->ss_nfinfo;
517: sp->sum_bytes_left -=
518: sizeof(struct finfo) - sizeof(daddr_t);
519:
520: if (sptr)
521: *sptr = splbio();
522: return(1);
523: }
524:
525: /* Insert into the buffer list, update the FINFO block. */
526: bp->b_flags |= B_GATHERED;
527: *sp->cbpp++ = bp;
528: sp->fip->fi_blocks[sp->fip->fi_nblocks++] = bp->b_lblkno;
529:
530: sp->sum_bytes_left -= sizeof(daddr_t);
531: sp->seg_bytes_left -= fs->lfs_bsize;
532: return(0);
533: }
534:
535: void
536: lfs_gather(fs, sp, vp, match)
537: struct lfs *fs;
538: struct segment *sp;
539: struct vnode *vp;
540: int (*match) __P((struct lfs *, struct buf *));
541: {
542: struct buf *bp;
543: int s;
544:
545: sp->vp = vp;
546: s = splbio();
547: loop: for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = bp->b_vnbufs.le_next) {
548: if (bp->b_flags & B_BUSY || !match(fs, bp) ||
549: bp->b_flags & B_GATHERED)
550: continue;
551: #if DIAGNOSTIC
552: if (!(bp->b_flags & B_DELWRI))
553: panic("lfs_gather: bp not B_DELWRI");
554: if (!(bp->b_flags & B_LOCKED))
555: panic("lfs_gather: bp not B_LOCKED");
556: #endif
557: if (lfs_gatherblock(sp, bp, &s))
558: goto loop;
559: }
560: splx(s);
561: lfs_updatemeta(sp);
562: sp->vp = NULL;
563: }
564:
565:
566: /*
567: * Update the metadata that points to the blocks listed in the FINFO
568: * array.
569: */
570: void
571: lfs_updatemeta(sp)
572: struct segment *sp;
573: {
574: SEGUSE *sup;
575: struct buf *bp;
576: struct lfs *fs;
577: struct vnode *vp;
578: struct indir a[NIADDR + 2], *ap;
579: struct inode *ip;
580: daddr_t daddr, lbn, off;
581: int db_per_fsb, error, i, nblocks, num;
582:
583: vp = sp->vp;
584: nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp;
585: if (vp == NULL || nblocks == 0)
586: return;
587:
588: /* Sort the blocks. */
589: if (!(sp->seg_flags & SEGM_CLEAN))
590: lfs_shellsort(sp->start_bpp, sp->start_lbp, nblocks);
591:
592: /*
593: * Assign disk addresses, and update references to the logical
594: * block and the segment usage information.
595: */
596: fs = sp->fs;
597: db_per_fsb = fsbtodb(fs, 1);
598: for (i = nblocks; i--; ++sp->start_bpp) {
599: lbn = *sp->start_lbp++;
600: (*sp->start_bpp)->b_blkno = off = fs->lfs_offset;
601: fs->lfs_offset += db_per_fsb;
602:
603: if (error = ufs_bmaparray(vp, lbn, &daddr, a, &num, NULL))
604: panic("lfs_updatemeta: ufs_bmaparray %d", error);
605: ip = VTOI(vp);
606: switch (num) {
607: case 0:
608: ip->i_db[lbn] = off;
609: break;
610: case 1:
611: ip->i_ib[a[0].in_off] = off;
612: break;
613: default:
614: ap = &a[num - 1];
615: if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp))
616: panic("lfs_updatemeta: bread bno %d",
617: ap->in_lbn);
618: /*
619: * Bread may create a new indirect block which needs
620: * to get counted for the inode.
621: */
622: if (bp->b_blkno == -1 && !(bp->b_flags & B_CACHE)) {
623: printf ("Updatemeta allocating indirect block: shouldn't happen\n");
624: ip->i_blocks += btodb(fs->lfs_bsize);
625: fs->lfs_bfree -= btodb(fs->lfs_bsize);
626: }
627: ((daddr_t *)bp->b_data)[ap->in_off] = off;
628: VOP_BWRITE(bp);
629: }
630:
631: /* Update segment usage information. */
632: if (daddr != UNASSIGNED &&
633: !(daddr >= fs->lfs_lastpseg && daddr <= off)) {
634: LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp);
635: #if DIAGNOSTIC
636: if (sup->su_nbytes < fs->lfs_bsize) {
637: /* XXX -- Change to a panic. */
638: printf("lfs: negative bytes (segment %d)\n",
639: datosn(fs, daddr));
640: panic ("Negative Bytes");
641: }
642: #endif
643: sup->su_nbytes -= fs->lfs_bsize;
644: error = VOP_BWRITE(bp);
645: }
646: }
647: }
648:
649: /*
650: * Start a new segment.
651: */
652: int
653: lfs_initseg(fs)
654: struct lfs *fs;
655: {
656: struct segment *sp;
657: SEGUSE *sup;
658: SEGSUM *ssp;
659: struct buf *bp;
660: int repeat;
661:
662: sp = fs->lfs_sp;
663:
664: repeat = 0;
665: /* Advance to the next segment. */
666: if (!LFS_PARTIAL_FITS(fs)) {
667: /* Wake up any cleaning procs waiting on this file system. */
668: wakeup(&lfs_allclean_wakeup);
669:
670: lfs_newseg(fs);
671: repeat = 1;
672: fs->lfs_offset = fs->lfs_curseg;
673: sp->seg_number = datosn(fs, fs->lfs_curseg);
674: sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE;
675:
676: /*
677: * If the segment contains a superblock, update the offset
678: * and summary address to skip over it.
679: */
680: LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
681: if (sup->su_flags & SEGUSE_SUPERBLOCK) {
682: fs->lfs_offset += LFS_SBPAD / DEV_BSIZE;
683: sp->seg_bytes_left -= LFS_SBPAD;
684: }
685: brelse(bp);
686: } else {
687: sp->seg_number = datosn(fs, fs->lfs_curseg);
688: sp->seg_bytes_left = (fs->lfs_dbpseg -
689: (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE;
690: }
691: fs->lfs_lastpseg = fs->lfs_offset;
692:
693: sp->fs = fs;
694: sp->ibp = NULL;
695: sp->ninodes = 0;
696:
697: /* Get a new buffer for SEGSUM and enter it into the buffer list. */
698: sp->cbpp = sp->bpp;
699: *sp->cbpp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_offset,
700: LFS_SUMMARY_SIZE);
701: sp->segsum = (*sp->cbpp)->b_data;
702: bzero(sp->segsum, LFS_SUMMARY_SIZE);
703: sp->start_bpp = ++sp->cbpp;
704: fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE;
705:
706: /* Set point to SEGSUM, initialize it. */
707: ssp = sp->segsum;
708: ssp->ss_next = fs->lfs_nextseg;
709: ssp->ss_nfinfo = ssp->ss_ninos = 0;
710:
711: /* Set pointer to first FINFO, initialize it. */
712: sp->fip = (struct finfo *)((caddr_t)sp->segsum + sizeof(SEGSUM));
713: sp->fip->fi_nblocks = 0;
714: sp->start_lbp = &sp->fip->fi_blocks[0];
715:
716: sp->seg_bytes_left -= LFS_SUMMARY_SIZE;
717: sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM);
718:
719: return(repeat);
720: }
721:
722: /*
723: * Return the next segment to write.
724: */
725: void
726: lfs_newseg(fs)
727: struct lfs *fs;
728: {
729: CLEANERINFO *cip;
730: SEGUSE *sup;
731: struct buf *bp;
732: int curseg, isdirty, sn;
733:
734: LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp);
735: sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
736: sup->su_nbytes = 0;
737: sup->su_nsums = 0;
738: sup->su_ninos = 0;
739: (void) VOP_BWRITE(bp);
740:
741: LFS_CLEANERINFO(cip, fs, bp);
742: --cip->clean;
743: ++cip->dirty;
744: (void) VOP_BWRITE(bp);
745:
746: fs->lfs_lastseg = fs->lfs_curseg;
747: fs->lfs_curseg = fs->lfs_nextseg;
748: for (sn = curseg = datosn(fs, fs->lfs_curseg);;) {
749: sn = (sn + 1) % fs->lfs_nseg;
750: if (sn == curseg)
751: panic("lfs_nextseg: no clean segments");
752: LFS_SEGENTRY(sup, fs, sn, bp);
753: isdirty = sup->su_flags & SEGUSE_DIRTY;
754: brelse(bp);
755: if (!isdirty)
756: break;
757: }
758:
759: ++fs->lfs_nactive;
760: fs->lfs_nextseg = sntoda(fs, sn);
761: #ifdef DOSTATS
762: ++lfs_stats.segsused;
763: #endif
764: }
765:
766: int
767: lfs_writeseg(fs, sp)
768: struct lfs *fs;
769: struct segment *sp;
770: {
771: extern int locked_queue_count;
772: struct buf **bpp, *bp, *cbp;
773: SEGUSE *sup;
774: SEGSUM *ssp;
775: dev_t i_dev;
776: size_t size;
777: u_long *datap, *dp;
778: int ch_per_blk, do_again, i, nblocks, num, s;
779: int (*strategy)__P((struct vop_strategy_args *));
780: struct vop_strategy_args vop_strategy_a;
781: u_short ninos;
782: char *p;
783:
784: /*
785: * If there are no buffers other than the segment summary to write
786: * and it is not a checkpoint, don't do anything. On a checkpoint,
787: * even if there aren't any buffers, you need to write the superblock.
788: */
789: if ((nblocks = sp->cbpp - sp->bpp) == 1)
790: return (0);
791:
792: ssp = (SEGSUM *)sp->segsum;
793:
794: /* Update the segment usage information. */
795: LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
796: ninos = (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs);
797: sup->su_nbytes += nblocks - 1 - ninos << fs->lfs_bshift;
798: sup->su_nbytes += ssp->ss_ninos * sizeof(struct dinode);
799: sup->su_nbytes += LFS_SUMMARY_SIZE;
800: sup->su_lastmod = time.tv_sec;
801: sup->su_ninos += ninos;
802: ++sup->su_nsums;
803: do_again = !(bp->b_flags & B_GATHERED);
804: (void)VOP_BWRITE(bp);
805: /*
806: * Compute checksum across data and then across summary; the first
807: * block (the summary block) is skipped. Set the create time here
808: * so that it's guaranteed to be later than the inode mod times.
809: *
810: * XXX
811: * Fix this to do it inline, instead of malloc/copy.
812: */
813: // datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK);
814: MALLOC(dp, caddr_t, nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK);
815: datap = dp;
816: for (bpp = sp->bpp, i = nblocks - 1; i--;) {
817: if ((*++bpp)->b_flags & B_INVAL) {
818: if (copyin((*bpp)->b_saveaddr, dp++, sizeof(u_long)))
819: panic("lfs_writeseg: copyin failed");
820: } else
821: *dp++ = ((u_long *)(*bpp)->b_data)[0];
822: }
823: ssp->ss_create = time.tv_sec;
824: ssp->ss_datasum = cksum(datap, (nblocks - 1) * sizeof(u_long));
825: ssp->ss_sumsum =
826: cksum(&ssp->ss_datasum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_sumsum));
827: free(datap, M_SEGMENT);
828: #if DIAGNOSTIC
829: if (fs->lfs_bfree < fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE)
830: panic("lfs_writeseg: No diskspace for summary");
831: #endif
832: fs->lfs_bfree -= (fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE);
833:
834: i_dev = VTOI(fs->lfs_ivnode)->i_dev;
835: strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)];
836:
837: /*
838: * When we simply write the blocks we lose a rotation for every block
839: * written. To avoid this problem, we allocate memory in chunks, copy
840: * the buffers into the chunk and write the chunk. MAXPHYS is the
841: * largest size I/O devices can handle.
842: * When the data is copied to the chunk, turn off the the B_LOCKED bit
843: * and brelse the buffer (which will move them to the LRU list). Add
844: * the B_CALL flag to the buffer header so we can count I/O's for the
845: * checkpoints and so we can release the allocated memory.
846: *
847: * XXX
848: * This should be removed if the new virtual memory system allows us to
849: * easily make the buffers contiguous in kernel memory and if that's
850: * fast enough.
851: */
852: ch_per_blk = MAXPHYS / fs->lfs_bsize;
853: for (bpp = sp->bpp, i = nblocks; i;) {
854: num = ch_per_blk;
855: if (num > i)
856: num = i;
857: i -= num;
858: size = num * fs->lfs_bsize;
859:
860: cbp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp,
861: (*bpp)->b_blkno, size);
862: cbp->b_dev = i_dev;
863: cbp->b_flags |= B_ASYNC | B_BUSY;
864:
865: s = splbio();
866: ++fs->lfs_iocount;
867: for (p = cbp->b_data; num--;) {
868: bp = *bpp++;
869: /*
870: * Fake buffers from the cleaner are marked as B_INVAL.
871: * We need to copy the data from user space rather than
872: * from the buffer indicated.
873: * XXX == what do I do on an error?
874: */
875: if (bp->b_flags & B_INVAL) {
876: if (copyin(bp->b_saveaddr, p, bp->b_bcount))
877: panic("lfs_writeseg: copyin failed");
878: } else
879: bcopy(bp->b_data, p, bp->b_bcount);
880: p += bp->b_bcount;
881: if (bp->b_flags & B_LOCKED)
882: --locked_queue_count;
883: bp->b_flags &= ~(B_ERROR | B_READ | B_DELWRI |
884: B_LOCKED | B_GATHERED);
885: if (bp->b_flags & B_CALL) {
886: /* if B_CALL, it was created with newbuf */
887: brelvp(bp);
888: if (!(bp->b_flags & B_INVAL))
889: free(bp->b_data, M_SEGMENT);
890: free(bp, M_SEGMENT);
891: } else {
892: bremfree(bp);
893: bp->b_flags |= B_DONE;
894: reassignbuf(bp, bp->b_vp);
895: brelse(bp);
896: }
897: }
898: ++cbp->b_vp->v_numoutput;
899: splx(s);
900: cbp->b_bcount = p - (char *)cbp->b_data;
901: /*
902: * XXXX This is a gross and disgusting hack. Since these
903: * buffers are physically addressed, they hang off the
904: * device vnode (devvp). As a result, they have no way
905: * of getting to the LFS superblock or lfs structure to
906: * keep track of the number of I/O's pending. So, I am
907: * going to stuff the fs into the saveaddr field of
908: * the buffer (yuk).
909: */
910: cbp->b_saveaddr = (caddr_t)fs;
911: vop_strategy_a.a_desc = VDESC(vop_strategy);
912: vop_strategy_a.a_bp = cbp;
913: (strategy)(&vop_strategy_a);
914: }
915: /*
916: * XXX
917: * Vinvalbuf can move locked buffers off the locked queue
918: * and we have no way of knowing about this. So, after
919: * doing a big write, we recalculate how many bufers are
920: * really still left on the locked queue.
921: */
922: locked_queue_count = count_lock_queue();
923: wakeup(&locked_queue_count);
924: #ifdef DOSTATS
925: ++lfs_stats.psegwrites;
926: lfs_stats.blocktot += nblocks - 1;
927: if (fs->lfs_sp->seg_flags & SEGM_SYNC)
928: ++lfs_stats.psyncwrites;
929: if (fs->lfs_sp->seg_flags & SEGM_CLEAN) {
930: ++lfs_stats.pcleanwrites;
931: lfs_stats.cleanblocks += nblocks - 1;
932: }
933: #endif
934: return (lfs_initseg(fs) || do_again);
935: }
936:
937: void
938: lfs_writesuper(fs)
939: struct lfs *fs;
940: {
941: struct buf *bp;
942: dev_t i_dev;
943: int (*strategy) __P((struct vop_strategy_args *));
944: int s;
945: struct vop_strategy_args vop_strategy_a;
946:
947: i_dev = VTOI(fs->lfs_ivnode)->i_dev;
948: strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)];
949:
950: /* Checksum the superblock and copy it into a buffer. */
951: fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum));
952: bp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_sboffs[0],
953: LFS_SBPAD);
954: *(struct lfs *)bp->b_data = *fs;
955:
956: /* XXX Toggle between first two superblocks; for now just write first */
957: bp->b_dev = i_dev;
958: bp->b_flags |= B_BUSY | B_CALL | B_ASYNC;
959: bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI);
960: bp->b_iodone = lfs_supercallback;
961: vop_strategy_a.a_desc = VDESC(vop_strategy);
962: vop_strategy_a.a_bp = bp;
963: s = splbio();
964: ++bp->b_vp->v_numoutput;
965: splx(s);
966: (strategy)(&vop_strategy_a);
967: }
968:
969: /*
970: * Logical block number match routines used when traversing the dirty block
971: * chain.
972: */
973: int
974: lfs_match_data(fs, bp)
975: struct lfs *fs;
976: struct buf *bp;
977: {
978: return (bp->b_lblkno >= 0);
979: }
980:
981: int
982: lfs_match_indir(fs, bp)
983: struct lfs *fs;
984: struct buf *bp;
985: {
986: int lbn;
987:
988: lbn = bp->b_lblkno;
989: return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0);
990: }
991:
992: int
993: lfs_match_dindir(fs, bp)
994: struct lfs *fs;
995: struct buf *bp;
996: {
997: int lbn;
998:
999: lbn = bp->b_lblkno;
1000: return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1);
1001: }
1002:
1003: int
1004: lfs_match_tindir(fs, bp)
1005: struct lfs *fs;
1006: struct buf *bp;
1007: {
1008: int lbn;
1009:
1010: lbn = bp->b_lblkno;
1011: return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2);
1012: }
1013:
1014: /*
1015: * Allocate a new buffer header.
1016: */
1017: struct buf *
1018: lfs_newbuf(vp, daddr, size)
1019: struct vnode *vp;
1020: daddr_t daddr;
1021: size_t size;
1022: {
1023: struct buf *bp;
1024: size_t nbytes;
1025:
1026: nbytes = roundup(size, DEV_BSIZE);
1027: // bp = malloc(sizeof(struct buf), M_SEGMENT, M_WAITOK);
1028: MALLOC(bp, struct buf *, sizeof(struct buf), M_SEGMENT, M_WAITOK);
1029: bzero(bp, sizeof(struct buf));
1030: if (nbytes)
1031: // bp->b_data = malloc(nbytes, M_SEGMENT, M_WAITOK);
1032: MALLOC(bp->d_data, caddr_t, nbytes, M_SEGMENT, M_WAITOK);
1033: bgetvp(vp, bp);
1034: bp->b_bufsize = size;
1035: bp->b_bcount = size;
1036: bp->b_lblkno = daddr;
1037: bp->b_blkno = daddr;
1038: bp->b_error = 0;
1039: bp->b_resid = 0;
1040: bp->b_iodone = lfs_callback;
1041: bp->b_flags |= B_BUSY | B_CALL | B_NOCACHE;
1042: return (bp);
1043: }
1044:
1045: void
1046: lfs_callback(bp)
1047: struct buf *bp;
1048: {
1049: struct lfs *fs;
1050:
1051: fs = (struct lfs *)bp->b_saveaddr;
1052: #if DIAGNOSTIC
1053: if (fs->lfs_iocount == 0)
1054: panic("lfs_callback: zero iocount\n");
1055: #endif
1056: if (--fs->lfs_iocount == 0)
1057: wakeup(&fs->lfs_iocount);
1058:
1059: brelvp(bp);
1060: free(bp->b_data, M_SEGMENT);
1061: free(bp, M_SEGMENT);
1062: }
1063:
1064: void
1065: lfs_supercallback(bp)
1066: struct buf *bp;
1067: {
1068: brelvp(bp);
1069: free(bp->b_data, M_SEGMENT);
1070: free(bp, M_SEGMENT);
1071: }
1072:
1073: /*
1074: * Shellsort (diminishing increment sort) from Data Structures and
1075: * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290;
1076: * see also Knuth Vol. 3, page 84. The increments are selected from
1077: * formula (8), page 95. Roughly O(N^3/2).
1078: */
1079: /*
1080: * This is our own private copy of shellsort because we want to sort
1081: * two parallel arrays (the array of buffer pointers and the array of
1082: * logical block numbers) simultaneously. Note that we cast the array
1083: * of logical block numbers to a unsigned in this routine so that the
1084: * negative block numbers (meta data blocks) sort AFTER the data blocks.
1085: */
1086: void
1087: lfs_shellsort(bp_array, lb_array, nmemb)
1088: struct buf **bp_array;
1089: daddr_t *lb_array;
1090: register int nmemb;
1091: {
1092: static int __rsshell_increments[] = { 4, 1, 0 };
1093: register int incr, *incrp, t1, t2;
1094: struct buf *bp_temp;
1095: u_long lb_temp;
1096:
1097: for (incrp = __rsshell_increments; incr = *incrp++;)
1098: for (t1 = incr; t1 < nmemb; ++t1)
1099: for (t2 = t1 - incr; t2 >= 0;)
1100: if (lb_array[t2] > lb_array[t2 + incr]) {
1101: lb_temp = lb_array[t2];
1102: lb_array[t2] = lb_array[t2 + incr];
1103: lb_array[t2 + incr] = lb_temp;
1104: bp_temp = bp_array[t2];
1105: bp_array[t2] = bp_array[t2 + incr];
1106: bp_array[t2 + incr] = bp_temp;
1107: t2 -= incr;
1108: } else
1109: break;
1110: }
1111:
1112: /*
1113: * Check VXLOCK. Return 1 if the vnode is locked. Otherwise, vget it.
1114: */
1115: lfs_vref(vp)
1116: register struct vnode *vp;
1117: {
1118:
1119: if (vp->v_flag & VXLOCK)
1120: return(1);
1121: return (vget(vp, 0));
1122: }
1123:
1124: void
1125: lfs_vunref(vp)
1126: register struct vnode *vp;
1127: {
1128: extern int lfs_no_inactive;
1129:
1130: /*
1131: * This is vrele except that we do not want to VOP_INACTIVE
1132: * this vnode. Rather than inline vrele here, we use a global
1133: * flag to tell lfs_inactive not to run. Yes, its gross.
1134: */
1135: lfs_no_inactive = 1;
1136: vrele(vp);
1137: lfs_no_inactive = 0;
1138: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.