|
|
1.1 root 1: /*
2: * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3: *
4: * @APPLE_LICENSE_HEADER_START@
5: *
6: * The contents of this file constitute Original Code as defined in and
7: * are subject to the Apple Public Source License Version 1.1 (the
8: * "License"). You may not use this file except in compliance with the
9: * License. Please obtain a copy of the License at
10: * http://www.apple.com/publicsource and read it before using this file.
11: *
12: * This Original Code and all software distributed under the License are
13: * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14: * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15: * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16: * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17: * License for the specific language governing rights and limitations
18: * under the License.
19: *
20: * @APPLE_LICENSE_HEADER_END@
21: */
22: /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23: /*
24: * Copyright (c) 1989, 1993
25: * The Regents of the University of California. All rights reserved.
26: * (c) UNIX System Laboratories, Inc.
27: * All or some portions of this file are derived from material licensed
28: * to the University of California by American Telephone and Telegraph
29: * Co. or Unix System Laboratories, Inc. and are reproduced herein with
30: * the permission of UNIX System Laboratories, Inc.
31: *
32: * Redistribution and use in source and binary forms, with or without
33: * modification, are permitted provided that the following conditions
34: * are met:
35: * 1. Redistributions of source code must retain the above copyright
36: * notice, this list of conditions and the following disclaimer.
37: * 2. Redistributions in binary form must reproduce the above copyright
38: * notice, this list of conditions and the following disclaimer in the
39: * documentation and/or other materials provided with the distribution.
40: * 3. All advertising materials mentioning features or use of this software
41: * must display the following acknowledgement:
42: * This product includes software developed by the University of
43: * California, Berkeley and its contributors.
44: * 4. Neither the name of the University nor the names of its contributors
45: * may be used to endorse or promote products derived from this software
46: * without specific prior written permission.
47: *
48: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58: * SUCH DAMAGE.
59: *
60: * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
61: */
62:
63: /*
64: * External virtual filesystem routines
65: */
66:
67: #include <mach_nbc.h>
68: #include <sys/param.h>
69: #include <sys/systm.h>
70: #include <sys/proc.h>
71: #include <sys/mount.h>
72: #include <sys/time.h>
73: #include <sys/vnode.h>
74: #include <sys/stat.h>
75: #include <sys/namei.h>
76: #include <sys/ucred.h>
77: #include <sys/buf.h>
78: #include <sys/errno.h>
79: #include <sys/malloc.h>
80: #include <sys/domain.h>
81: #include <sys/mbuf.h>
82: #include <sys/syslog.h>
83:
84: #include <sys/vm.h>
85: #include <sys/sysctl.h>
86:
87: #include <miscfs/specfs/specdev.h>
88:
89: #if MACH_NBC
90: #include <kern/mapfs.h>
91: #endif /* MACH_NBC */
92:
93: enum vtype iftovt_tab[16] = {
94: VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
95: VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
96: };
97: int vttoif_tab[9] = {
98: 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
99: S_IFSOCK, S_IFIFO, S_IFMT,
100: };
101:
102: /*
103: * Insq/Remq for the vnode usage lists.
104: */
105: #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
106: #define bufremvn(bp) { \
107: LIST_REMOVE(bp, b_vnbufs); \
108: (bp)->b_vnbufs.le_next = NOLIST; \
109: }
110: TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
111: struct mntlist mountlist; /* mounted filesystem list */
112:
113: /*
114: * Have to declare first two locks as actual data even if !MACH_SLOCKS, since
115: * a pointers to them get passed around.
116: */
117: simple_lock_data_t mountlist_slock;
118: simple_lock_data_t mntvnode_slock;
119: decl_simple_lock_data(,mntid_slock);
120: decl_simple_lock_data(,vnode_free_list_slock);
121: decl_simple_lock_data(,spechash_slock);
122:
123: extern struct lock__bsd__ exchangelock;
124:
125: /*
126: * vnodetarget is the amount of vnodes we expect to get back from the
127: * VM object cache. As vm_object_cache_steal() is a cpu bound operation
128: * for faster processers this number could be higher.
129: * Having this number too high introduces longer delays in the execution
130: * of getnewvnode().
131: */
132: unsigned long vnodetarget; /* target for vm_object_cache_steal() */
133: #define VNODE_FREE_TARGET 20 /* Default value for vnodetarget */
134:
135: /*
136: * We need quite a few vnodes on the free list to sustain the
137: * rapid stat() the compilation process does, and still benefit from the name
138: * cache. Having too few vnodes on the free list causes serious disk
139: * thrashing as we cycle through them.
140: */
141: #define VNODE_FREE_MIN 100 /* freelist should have at least these many */
142:
143: /*
144: * We need to get vnodes back from the VM object cache when a certain #
145: * of vnodes are reused from the freelist. This is essential for the
146: * caching to be effective in the namecache and the buffer cache [for the
147: * metadata].
148: */
149: #define VNODE_TOOMANY_REUSED (VNODE_FREE_MIN/4)
150:
151: /*
152: * If we have enough vnodes on the freelist we do not want to reclaim
153: * the vnodes from the VM object cache.
154: */
155: #define VNODE_FREE_ENOUGH (VNODE_FREE_MIN + (VNODE_FREE_MIN/2))
156: /*
157: * Initialize the vnode management data structures.
158: */
159: void
160: vntblinit()
161: {
162: simple_lock_init(&mountlist_slock);
163: simple_lock_init(&mntvnode_slock);
164: simple_lock_init(&mntid_slock);
165: simple_lock_init(&spechash_slock);
166: TAILQ_INIT(&vnode_free_list);
167: simple_lock_init(&vnode_free_list_slock);
168: CIRCLEQ_INIT(&mountlist);
169: lockinit(&exchangelock, PVFS, "exchange", 0, 0);
170:
171: if (!vnodetarget)
172: vnodetarget = VNODE_FREE_TARGET;
173: #ifdef FIXME
174: adjust_vm_object_cache(0, desiredvnodes - VNODE_FREE_MIN);
175: #endif /* FIXME */
176: }
177:
178: /* Reset the VM Object Cache with the values passed in */
179: kern_return_t
180: reset_vmobjectcache(unsigned int val1, unsigned int val2)
181: {
182: #ifdef FIXME
183: return(adjust_vm_object_cache(val1-VNODE_FREE_MIN, val2 - VNODE_FREE_MIN));
184: #else
185: return(KERN_SUCCESS);
186: #endif /* FIXME */
187: }
188:
189:
190:
191: /*
192: * Mark a mount point as busy. Used to synchronize access and to delay
193: * unmounting. Interlock is not released on failure.
194: */
195: int
196: vfs_busy(mp, flags, interlkp, p)
197: struct mount *mp;
198: int flags;
199: struct slock *interlkp;
200: struct proc *p;
201: {
202: int lkflags;
203:
204: if (mp->mnt_flag & MNT_UNMOUNT) {
205: if (flags & LK_NOWAIT)
206: return (ENOENT);
207: mp->mnt_flag |= MNT_MWAIT;
208: if (interlkp)
209: simple_unlock(interlkp);
210: /*
211: * Since all busy locks are shared except the exclusive
212: * lock granted when unmounting, the only place that a
213: * wakeup needs to be done is at the release of the
214: * exclusive lock at the end of dounmount.
215: */
216: sleep((caddr_t)mp, PVFS);
217: if (interlkp)
218: simple_lock(interlkp);
219: return (ENOENT);
220: }
221: lkflags = LK_SHARED;
222: if (interlkp)
223: lkflags |= LK_INTERLOCK;
224: if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
225: panic("vfs_busy: unexpected lock failure");
226: return (0);
227: }
228:
229: /*
230: * Free a busy filesystem.
231: */
232: void
233: vfs_unbusy(mp, p)
234: struct mount *mp;
235: struct proc *p;
236: {
237:
238: lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
239: }
240:
241: /*
242: * Lookup a filesystem type, and if found allocate and initialize
243: * a mount structure for it.
244: *
245: * Devname is usually updated by mount(8) after booting.
246: */
247: int
248: vfs_rootmountalloc(fstypename, devname, mpp)
249: char *fstypename;
250: char *devname;
251: struct mount **mpp;
252: {
253: struct proc *p = current_proc(); /* XXX */
254: struct vfsconf *vfsp;
255: struct mount *mp;
256:
257: for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
258: if (!strcmp(vfsp->vfc_name, fstypename))
259: break;
260: if (vfsp == NULL)
261: return (ENODEV);
262: mp = _MALLOC_ZONE((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
263: bzero((char *)mp, (u_long)sizeof(struct mount));
264: lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
265: (void)vfs_busy(mp, LK_NOWAIT, 0, p);
266: LIST_INIT(&mp->mnt_vnodelist);
267: mp->mnt_vfc = vfsp;
268: mp->mnt_op = vfsp->vfc_vfsops;
269: mp->mnt_flag = MNT_RDONLY;
270: mp->mnt_vnodecovered = NULLVP;
271: vfsp->vfc_refcount++;
272: mp->mnt_stat.f_type = vfsp->vfc_typenum;
273: mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
274: strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
275: mp->mnt_stat.f_mntonname[0] = '/';
276: (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
277: *mpp = mp;
278: return (0);
279: }
280:
281: /*
282: * Find an appropriate filesystem to use for the root. If a filesystem
283: * has not been preselected, walk through the list of known filesystems
284: * trying those that have mountroot routines, and try them until one
285: * works or we have tried them all.
286: */
287: int
288: vfs_mountroot()
289: {
290: struct vfsconf *vfsp;
291: extern int (*mountroot)(void);
292: int error;
293:
294: if (mountroot != NULL) {
295: error = (*mountroot)();
296: return (error);
297: }
298:
299: for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
300: if (vfsp->vfc_mountroot == NULL)
301: continue;
302: if ((error = (*vfsp->vfc_mountroot)()) == 0)
303: return (0);
304: if (error == EINVAL)
305: printf("not a %s disk, trying next file system\n", vfsp->vfc_name);
306: else
307: printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
308: }
309: return (ENODEV);
310: }
311:
312: /*
313: * Lookup a mount point by filesystem identifier.
314: */
315: struct mount *
316: vfs_getvfs(fsid)
317: fsid_t *fsid;
318: {
319: register struct mount *mp;
320:
321: simple_lock(&mountlist_slock);
322: for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
323: mp = mp->mnt_list.cqe_next) {
324: if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
325: mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
326: simple_unlock(&mountlist_slock);
327: return (mp);
328: }
329: }
330: simple_unlock(&mountlist_slock);
331: return ((struct mount *)0);
332: }
333:
334: /*
335: * Get a new unique fsid
336: */
337: void
338: vfs_getnewfsid(mp)
339: struct mount *mp;
340: {
341: static u_short xxxfs_mntid;
342:
343: fsid_t tfsid;
344: int mtype;
345:
346: simple_lock(&mntid_slock);
347: mtype = mp->mnt_vfc->vfc_typenum;
348: mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
349: mp->mnt_stat.f_fsid.val[1] = mtype;
350: if (xxxfs_mntid == 0)
351: ++xxxfs_mntid;
352: tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
353: tfsid.val[1] = mtype;
354: if (mountlist.cqh_first != (void *)&mountlist) {
355: while (vfs_getvfs(&tfsid)) {
356: tfsid.val[0]++;
357: xxxfs_mntid++;
358: }
359: }
360: mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
361: simple_unlock(&mntid_slock);
362: }
363:
364: /*
365: * Set vnode attributes to VNOVAL
366: */
367: void
368: vattr_null(vap)
369: register struct vattr *vap;
370: {
371:
372: vap->va_type = VNON;
373: vap->va_size = vap->va_bytes = VNOVAL;
374: vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
375: vap->va_fsid = vap->va_fileid =
376: vap->va_blocksize = vap->va_rdev =
377: vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
378: vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
379: vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
380: vap->va_flags = vap->va_gen = VNOVAL;
381: vap->va_vaflags = 0;
382: }
383:
384: /*
385: * Routines having to do with the management of the vnode table.
386: */
387: extern int (**dead_vnodeop_p)();
388: static void vclean __P((struct vnode *vp, int flag, struct proc *p));
389: extern void vgonel __P((struct vnode *vp, struct proc *p));
390: long numvnodes, freevnodes;
391:
392: extern struct vattr va_null;
393:
394: /*
395: * Return the next vnode from the free list.
396: */
397: int
398: getnewvnode(tag, mp, vops, vpp)
399: enum vtagtype tag;
400: struct mount *mp;
401: int (**vops)();
402: struct vnode **vpp;
403: {
404: struct proc *p = current_proc(); /* XXX */
405: struct vnode *vp;
406: int cnt, didretry = 0;
407: static int reused = 0; /* track the reuse rate */
408:
409: retry:
410: simple_lock(&vnode_free_list_slock);
411: /*
412: * MALLOC a vnode if the number of vnodes is not reached the desired
413: * value. There might be vnodes on the free list, but we do not
414: * reuse from the freelist because reusing a vnode implies reusing
415: * the name cache entry.
416: */
417: if (numvnodes < desiredvnodes) {
418: numvnodes++;
419: simple_unlock(&vnode_free_list_slock);
420: MALLOC_ZONE(vp, struct vnode *, sizeof *vp, M_VNODE, M_WAITOK);
421: bzero((char *)vp, sizeof *vp);
422: simple_lock_init(&vp->v_interlock);
423: } else {
424: /*
425: * Once the desired number of vnodes are allocated, we start reusing
426: * from the freelist.
427: */
428: if (freevnodes < VNODE_FREE_MIN) {
429: /*
430: * if we are low on vnodes on the freelist attempt to get
431: * some back from the VM object cache
432: */
433: simple_unlock(&vnode_free_list_slock);
434: vm_object_cache_steal(vnodetarget);
435: simple_lock(&vnode_free_list_slock);
436: }
437:
438: for (cnt = 0, vp = vnode_free_list.tqh_first;
439: vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
440: if (simple_lock_try(&vp->v_interlock))
441: break;
442: }
443: /*
444: * Unless this is a bad time of the month, at most
445: * the first NCPUS items on the free list are
446: * locked, so this is close enough to being empty.
447: */
448: if (vp == NULLVP) {
449: simple_unlock(&vnode_free_list_slock);
450: if (!(didretry++) && (vm_object_cache_steal(vnodetarget) > 0))
451: goto retry;
452: tablefull("vnode");
453: log(LOG_EMERG, "%d vnodes locked, %d desired, %d numvnodes\n",
454: cnt, desiredvnodes, numvnodes);
455: *vpp = 0;
456: return (ENFILE);
457: }
458:
459: if (vp->v_usecount)
460: panic("free vnode isn't: v_type = %d, v_usecount = %d?",
461: vp->v_type, vp->v_usecount);
462:
463: if (reused > VNODE_TOOMANY_REUSED) {
464: reused = 0;
465: if (freevnodes < VNODE_FREE_ENOUGH) {
466: simple_unlock(&vnode_free_list_slock);
467: simple_unlock(&vp->v_interlock);
468: vm_object_cache_steal(vnodetarget);
469: /*
470: * The vnode we have right now can potentially have dirty
471: * buffers associated with it. So we do not want to reuse it
472: * given a choice. The vnodes reclimed from VM object cache are
473: * put on the front of the freelist. So retry can potentially
474: * avoid IO, which is a good thing.
475: */
476: goto retry;
477: }
478: }
479:
480: TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
481: /* see comment on why 0xdeadb is set at end of vgone (below) */
482: vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
483: freevnodes--;
484: reused++;
485: simple_unlock(&vnode_free_list_slock);
486: vp->v_lease = NULL;
487: if (vp->v_type != VBAD)
488: vgonel(vp, p);
489: else
490: simple_unlock(&vp->v_interlock);
491: #if DIAGNOSTIC
492: if (vp->v_data)
493: panic("cleaned vnode isn't");
494: {
495: int s = splbio();
496: if (vp->v_numoutput)
497: panic("Clean vnode has pending I/O's");
498: splx(s);
499: }
500: #endif
501: vp->v_flag = 0;
502: vp->v_lastr = 0;
503: vp->v_ralen = 0;
504: vp->v_maxra = 0;
505: vp->v_lastw = 0;
506: vp->v_lasta = 0;
507: vp->v_cstart = 0;
508: vp->v_clen = 0;
509: vp->v_socket = 0;
510: vp->v_bread = vp->v_consumed = 0;
511: }
512: vp->v_power = 5; /* 32k speculative reads */
513: vp->v_trigger = 16 * 8;
514: vp->v_type = VNON;
515: cache_purge(vp);
516: vp->v_tag = tag;
517: vp->v_op = vops;
518: insmntque(vp, mp);
519: *vpp = vp;
520: vp->v_usecount = 1;
521: vp->v_data = 0;
522: return (0);
523: }
524:
525: /*
526: * Move a vnode from one mount queue to another.
527: */
528: void
529: insmntque(vp, mp)
530: struct vnode *vp;
531: struct mount *mp;
532: {
533:
534: simple_lock(&mntvnode_slock);
535: /*
536: * Delete from old mount point vnode list, if on one.
537: */
538: if (vp->v_mount != NULL)
539: LIST_REMOVE(vp, v_mntvnodes);
540: /*
541: * Insert into list of vnodes for the new mount point, if available.
542: */
543: if ((vp->v_mount = mp) != NULL)
544: LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
545: simple_unlock(&mntvnode_slock);
546: }
547:
548: /*
549: * Update outstanding I/O count and do wakeup if requested.
550: */
551: void
552: vwakeup(bp)
553: register struct buf *bp;
554: {
555: register struct vnode *vp;
556:
557: bp->b_flags &= ~B_WRITEINPROG;
558: if (vp = bp->b_vp) {
559: if (--vp->v_numoutput < 0)
560: panic("vwakeup: neg numoutput");
561: if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
562: if (vp->v_numoutput < 0)
563: panic("vwakeup: neg numoutput 2");
564: vp->v_flag &= ~VBWAIT;
565: wakeup((caddr_t)&vp->v_numoutput);
566: }
567: }
568: }
569:
570: /*
571: * Flush out and invalidate all buffers associated with a vnode.
572: * Called with the underlying object locked.
573: */
574: int
575: vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
576: register struct vnode *vp;
577: int flags;
578: struct ucred *cred;
579: struct proc *p;
580: int slpflag, slptimeo;
581: {
582: register struct buf *bp;
583: struct buf *nbp, *blist;
584: int s, error = 0;
585:
586: if (flags & V_SAVE) {
587: #if MACH_NBC
588: if ((vp->v_type == VREG) && (vp->v_vm_info) && (vp->v_vm_info->mapped))
589: if ((error = mapfs_fsync(vp)))
590: return (error);
591: #endif /* MACH_NBC */
592: if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p))
593: return (error);
594: if (vp->v_dirtyblkhd.lh_first != NULL)
595: panic("vinvalbuf: dirty bufs");
596: }
597:
598: /*
599: * make sure we don't have any lingering state
600: * associated with cluster writes
601: */
602: vp->v_cstart = 0;
603: vp->v_clen = 0;
604: vp->v_lasta = 0;
605: vp->v_lastw = 0;
606:
607: #if MACH_NBC
608: if (vp->v_type == VREG) {
609: error = mapfs_invalidate(vp);
610: #if DIAGNOSTIC
611: if (error)
612: kprintf("vinvalbuf: mapfs_invalidate(0x%x) returned %d\n", vp, error);
613: #endif
614: }
615: #endif /* MACH_NBC */
616:
617: for (;;) {
618: if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
619: while (blist && blist->b_lblkno < 0)
620: blist = blist->b_vnbufs.le_next;
621: if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
622: (flags & V_SAVEMETA))
623: while (blist && blist->b_lblkno < 0)
624: blist = blist->b_vnbufs.le_next;
625: if (!blist)
626: break;
627:
628: for (bp = blist; bp; bp = nbp) {
629: nbp = bp->b_vnbufs.le_next;
630: if (flags & V_SAVEMETA && bp->b_lblkno < 0)
631: continue;
632: s = splbio();
633: if (bp->b_flags & B_BUSY) {
634: bp->b_flags |= B_WANTED;
635: error = tsleep((caddr_t)bp,
636: slpflag | (PRIBIO + 1), "vinvalbuf",
637: slptimeo);
638: splx(s);
639: if (error)
640: return (error);
641: break;
642: }
643: bremfree(bp);
644: bp->b_flags |= B_BUSY;
645: splx(s);
646: /*
647: * XXX Since there are no node locks for NFS, I believe
648: * there is a slight chance that a delayed write will
649: * occur while sleeping just above, so check for it.
650: */
651: if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
652: (void) VOP_BWRITE(bp);
653: break;
654: }
655: bp->b_flags |= B_INVAL;
656: brelse(bp);
657: }
658: }
659: if (!(flags & V_SAVEMETA) &&
660: (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
661: panic("vinvalbuf: flush failed");
662: return (0);
663: }
664:
665: /*
666: * Associate a buffer with a vnode.
667: */
668: void
669: bgetvp(vp, bp)
670: register struct vnode *vp;
671: register struct buf *bp;
672: {
673:
674: if (bp->b_vp)
675: panic("bgetvp: not free");
676: VHOLD(vp);
677: bp->b_vp = vp;
678: if (vp->v_type == VBLK || vp->v_type == VCHR)
679: bp->b_dev = vp->v_rdev;
680: else
681: bp->b_dev = NODEV;
682: /*
683: * Insert onto list for new vnode.
684: */
685: bufinsvn(bp, &vp->v_cleanblkhd);
686: }
687:
688: /*
689: * Disassociate a buffer from a vnode.
690: */
691: void
692: brelvp(bp)
693: register struct buf *bp;
694: {
695: struct vnode *vp;
696:
697: if (bp->b_vp == (struct vnode *) 0)
698: panic("brelvp: NULL");
699: /*
700: * Delete from old vnode list, if on one.
701: */
702: if (bp->b_vnbufs.le_next != NOLIST)
703: bufremvn(bp);
704: vp = bp->b_vp;
705: bp->b_vp = (struct vnode *) 0;
706: HOLDRELE(vp);
707: }
708:
709: /*
710: * Reassign a buffer from one vnode to another.
711: * Used to assign file specific control information
712: * (indirect blocks) to the vnode to which they belong.
713: */
714: void
715: reassignbuf(bp, newvp)
716: register struct buf *bp;
717: register struct vnode *newvp;
718: {
719: register struct buflists *listheadp;
720:
721: if (newvp == NULL) {
722: printf("reassignbuf: NULL");
723: return;
724: }
725: /*
726: * Delete from old vnode list, if on one.
727: */
728: if (bp->b_vnbufs.le_next != NOLIST)
729: bufremvn(bp);
730: /*
731: * If dirty, put on list of dirty buffers;
732: * otherwise insert onto list of clean buffers.
733: */
734: if (bp->b_flags & B_DELWRI)
735: listheadp = &newvp->v_dirtyblkhd;
736: else
737: listheadp = &newvp->v_cleanblkhd;
738: bufinsvn(bp, listheadp);
739: }
740:
741: /*
742: * Create a vnode for a block device.
743: * Used for root filesystem, argdev, and swap areas.
744: * Also used for memory file system special devices.
745: */
746: int
747: bdevvp(dev, vpp)
748: dev_t dev;
749: struct vnode **vpp;
750: {
751: register struct vnode *vp;
752: struct vnode *nvp;
753: int error;
754:
755: if (dev == NODEV) {
756: *vpp = NULLVP;
757: return (ENODEV);
758: }
759: error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
760: if (error) {
761: *vpp = NULLVP;
762: return (error);
763: }
764: vp = nvp;
765: vp->v_type = VBLK;
766: if (nvp = checkalias(vp, dev, (struct mount *)0)) {
767: vput(vp);
768: vp = nvp;
769: }
770: *vpp = vp;
771: return (0);
772: }
773:
774: /*
775: * Check to see if the new vnode represents a special device
776: * for which we already have a vnode (either because of
777: * bdevvp() or because of a different vnode representing
778: * the same block device). If such an alias exists, deallocate
779: * the existing contents and return the aliased vnode. The
780: * caller is responsible for filling it with its new contents.
781: */
782: struct vnode *
783: checkalias(nvp, nvp_rdev, mp)
784: register struct vnode *nvp;
785: dev_t nvp_rdev;
786: struct mount *mp;
787: {
788: struct proc *p = current_proc(); /* XXX */
789: struct vnode *vp;
790: struct vnode **vpp;
791: struct specinfo * bufhold;
792: int buffree = 1;
793:
794: if (nvp->v_type != VBLK && nvp->v_type != VCHR)
795: return (NULLVP);
796:
797: bufhold = (struct specinfo *)_MALLOC_ZONE(sizeof(struct specinfo),
798: M_VNODE, M_WAITOK);
799: vpp = &speclisth[SPECHASH(nvp_rdev)];
800: loop:
801: simple_lock(&spechash_slock);
802: for (vp = *vpp; vp; vp = vp->v_specnext) {
803: if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
804: continue;
805: /*
806: * Alias, but not in use, so flush it out.
807: */
808: simple_lock(&vp->v_interlock);
809: if (vp->v_usecount == 0) {
810: simple_unlock(&spechash_slock);
811: vgonel(vp, p);
812: goto loop;
813: }
814: if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
815: simple_unlock(&spechash_slock);
816: goto loop;
817: }
818: break;
819: }
820: if (vp == NULL || vp->v_tag != VT_NON) {
821: nvp->v_specinfo = bufhold;
822: buffree = 0; /* buffer used */
823: bzero(nvp->v_specinfo, sizeof(struct specinfo));
824: nvp->v_rdev = nvp_rdev;
825: nvp->v_hashchain = vpp;
826: nvp->v_specnext = *vpp;
827: nvp->v_specflags = 0;
828: simple_unlock(&spechash_slock);
829: *vpp = nvp;
830: if (vp != NULLVP) {
831: nvp->v_flag |= VALIASED;
832: vp->v_flag |= VALIASED;
833: vput(vp);
834: }
835: /* Since buffer is used just return */
836: return (NULLVP);
837: }
838: simple_unlock(&spechash_slock);
839: VOP_UNLOCK(vp, 0, p);
840: simple_lock(&vp->v_interlock);
841: vclean(vp, 0, p);
842: vp->v_op = nvp->v_op;
843: vp->v_tag = nvp->v_tag;
844: nvp->v_type = VNON;
845: insmntque(vp, mp);
846: if (buffree)
847: _FREE_ZONE((void *)bufhold, sizeof (struct specinfo), M_VNODE);
848: return (vp);
849: }
850:
851: /*
852: * Grab a particular vnode from the free list, increment its
853: * reference count and lock it. The vnode lock bit is set the
854: * vnode is being eliminated in vgone. The process is awakened
855: * when the transition is completed, and an error returned to
856: * indicate that the vnode is no longer usable (possibly having
857: * been changed to a new file system type).
858: */
859: int
860: vget(vp, flags, p)
861: struct vnode *vp;
862: int flags;
863: struct proc *p;
864: {
865: int error;
866:
867: /*
868: * If the vnode is in the process of being cleaned out for
869: * another use, we wait for the cleaning to finish and then
870: * return failure. Cleaning is determined by checking that
871: * the VXLOCK flag is set.
872: */
873: if ((flags & LK_INTERLOCK) == 0)
874: simple_lock(&vp->v_interlock);
875: if (vp->v_flag & VXLOCK) {
876: vp->v_flag |= VXWANT;
877: simple_unlock(&vp->v_interlock);
878: tsleep((caddr_t)vp, PINOD, "vget", 0);
879: return (ENOENT);
880: }
881: if (vp->v_usecount == 0) {
882: simple_lock(&vnode_free_list_slock);
883: TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
884: freevnodes--;
885: simple_unlock(&vnode_free_list_slock);
886: }
887: if (++vp->v_usecount <= 0)
888: panic("vget: v_usecount");
889: if (flags & LK_TYPE_MASK) {
890: if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
891: vrele(vp);
892: return (error);
893: }
894: simple_unlock(&vp->v_interlock);
895: return (0);
896: }
897:
898: /*
899: * Stubs to use when there is no locking to be done on the underlying object.
900: * A minimal shared lock is necessary to ensure that the underlying object
901: * is not revoked while an operation is in progress. So, an active shared
902: * count is maintained in an auxillary vnode lock structure.
903: */
904: int
905: vop_nolock(ap)
906: struct vop_lock_args /* {
907: struct vnode *a_vp;
908: int a_flags;
909: struct proc *a_p;
910: } */ *ap;
911: {
912: #ifdef notyet
913: /*
914: * This code cannot be used until all the non-locking filesystems
915: * (notably NFS) are converted to properly lock and release nodes.
916: * Also, certain vnode operations change the locking state within
917: * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
918: * and symlink). Ideally these operations should not change the
919: * lock state, but should be changed to let the caller of the
920: * function unlock them. Otherwise all intermediate vnode layers
921: * (such as union, umapfs, etc) must catch these functions to do
922: * the necessary locking at their layer. Note that the inactive
923: * and lookup operations also change their lock state, but this
924: * cannot be avoided, so these two operations will always need
925: * to be handled in intermediate layers.
926: */
927: struct vnode *vp = ap->a_vp;
928: int vnflags, flags = ap->a_flags;
929:
930: if (vp->v_vnlock == NULL) {
931: if ((flags & LK_TYPE_MASK) == LK_DRAIN)
932: return (0);
933: MALLOC_ZONE(vp->v_vnlock, struct lock__bsd__ *,
934: sizeof(struct lock__bsd__), M_VNODE, M_WAITOK);
935: lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
936: }
937: switch (flags & LK_TYPE_MASK) {
938: case LK_DRAIN:
939: vnflags = LK_DRAIN;
940: break;
941: case LK_EXCLUSIVE:
942: case LK_SHARED:
943: vnflags = LK_SHARED;
944: break;
945: case LK_UPGRADE:
946: case LK_EXCLUPGRADE:
947: case LK_DOWNGRADE:
948: return (0);
949: case LK_RELEASE:
950: default:
951: panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
952: }
953: if (flags & LK_INTERLOCK)
954: vnflags |= LK_INTERLOCK;
955: return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
956: #else /* for now */
957: /*
958: * Since we are not using the lock manager, we must clear
959: * the interlock here.
960: */
961: if (ap->a_flags & LK_INTERLOCK)
962: simple_unlock(&ap->a_vp->v_interlock);
963: return (0);
964: #endif
965: }
966:
967: /*
968: * Decrement the active use count.
969: */
970: int
971: vop_nounlock(ap)
972: struct vop_unlock_args /* {
973: struct vnode *a_vp;
974: int a_flags;
975: struct proc *a_p;
976: } */ *ap;
977: {
978: struct vnode *vp = ap->a_vp;
979:
980: if (vp->v_vnlock == NULL)
981: return (0);
982: return (lockmgr(vp->v_vnlock, LK_RELEASE, NULL, ap->a_p));
983: }
984:
985: /*
986: * Return whether or not the node is in use.
987: */
988: int
989: vop_noislocked(ap)
990: struct vop_islocked_args /* {
991: struct vnode *a_vp;
992: } */ *ap;
993: {
994: struct vnode *vp = ap->a_vp;
995:
996: if (vp->v_vnlock == NULL)
997: return (0);
998: return (lockstatus(vp->v_vnlock));
999: }
1000:
1001: /*
1002: * Vnode reference.
1003: */
1004: void
1005: vref(vp)
1006: struct vnode *vp;
1007: {
1008:
1009: simple_lock(&vp->v_interlock);
1010: if (vp->v_usecount <= 0)
1011: panic("vref used where vget required");
1012: if (++vp->v_usecount <= 0)
1013: panic("vref v_usecount");
1014: simple_unlock(&vp->v_interlock);
1015: }
1016:
1017: /*
1018: * vput(), just unlock and vrele()
1019: */
1020: void
1021: vput(vp)
1022: struct vnode *vp;
1023: {
1024: struct proc *p = current_proc(); /* XXX */
1025:
1026: #if DIAGNOSTIC
1027: if (vp == NULL)
1028: panic("vput: null vp");
1029: #endif
1030: simple_lock(&vp->v_interlock);
1031: vp->v_usecount--;
1032: if (vp->v_usecount > 0) {
1033: simple_unlock(&vp->v_interlock);
1034: VOP_UNLOCK(vp, 0, p);
1035: return;
1036: }
1037: #if DIAGNOSTIC
1038: if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1039: vprint("vput: bad ref count", vp);
1040: panic("vput: ref cnt");
1041: }
1042: #endif
1043: /*
1044: * insert at tail of LRU list
1045: */
1046: simple_lock(&vnode_free_list_slock);
1047: TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1048: freevnodes++;
1049: simple_unlock(&vnode_free_list_slock);
1050: simple_unlock(&vp->v_interlock);
1051: VOP_INACTIVE(vp, p);
1052: }
1053:
1054: /*
1055: * Vnode release.
1056: * If count drops to zero, call inactive routine and return to freelist.
1057: */
1058: void
1059: vrele(vp)
1060: struct vnode *vp;
1061: {
1062: struct proc *p = current_proc(); /* XXX */
1063:
1064: #if DIAGNOSTIC
1065: if (vp == NULL)
1066: panic("vrele: null vp");
1067: #endif
1068: simple_lock(&vp->v_interlock);
1069: vp->v_usecount--;
1070: if (vp->v_usecount > 0) {
1071: simple_unlock(&vp->v_interlock);
1072: return;
1073: }
1074: #if DIAGNOSTIC
1075: if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1076: vprint("vrele: bad ref count", vp);
1077: panic("vrele: ref cnt");
1078: }
1079: #endif
1080: /*
1081: * insert at tail of LRU list
1082: */
1083: simple_lock(&vnode_free_list_slock);
1084: if (vp->v_flag & VAGE) {
1085: TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1086: vp->v_flag &= ~VAGE;
1087: } else
1088: TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1089: freevnodes++;
1090: simple_unlock(&vnode_free_list_slock);
1091: if ((vp->v_flag & VXLOCK) == 0) {
1092: if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0)
1093: VOP_INACTIVE(vp, p);
1094: #if DIAGNOSTIC
1095: else
1096: kprintf("vrele: vn_lock() failed for vp = 0x%08x\n", vp);
1097: } else {
1098: kprintf("vrele: attempted deadlock!\n");
1099: simple_unlock(&vp->v_interlock);
1100: #endif
1101: }
1102:
1103: }
1104:
1105: /*
1106: * Page or buffer structure gets a reference.
1107: */
1108: void
1109: vhold(vp)
1110: register struct vnode *vp;
1111: {
1112:
1113: simple_lock(&vp->v_interlock);
1114: vp->v_holdcnt++;
1115: simple_unlock(&vp->v_interlock);
1116: }
1117:
1118: /*
1119: * Page or buffer structure frees a reference.
1120: */
1121: void
1122: holdrele(vp)
1123: register struct vnode *vp;
1124: {
1125:
1126: simple_lock(&vp->v_interlock);
1127: if (vp->v_holdcnt <= 0)
1128: panic("holdrele: holdcnt");
1129: vp->v_holdcnt--;
1130: simple_unlock(&vp->v_interlock);
1131: }
1132:
1133: /*
1134: * Remove any vnodes in the vnode table belonging to mount point mp.
1135: *
1136: * If MNT_NOFORCE is specified, there should not be any active ones,
1137: * return error if any are found (nb: this is a user error, not a
1138: * system error). If MNT_FORCE is specified, detach any active vnodes
1139: * that are found.
1140: */
1141: #if DIAGNOSTIC
1142: int busyprt = 0; /* print out busy vnodes */
1143: struct ctldebug debug1 = { "busyprt", &busyprt };
1144: #endif
1145:
1146: int
1147: vflush(mp, skipvp, flags)
1148: struct mount *mp;
1149: struct vnode *skipvp;
1150: int flags;
1151: {
1152: struct proc *p = current_proc(); /* XXX */
1153: struct vnode *vp, *nvp;
1154: int busy = 0;
1155:
1156: simple_lock(&mntvnode_slock);
1157: loop:
1158: for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1159: if (vp->v_mount != mp)
1160: goto loop;
1161: nvp = vp->v_mntvnodes.le_next;
1162: /*
1163: * Skip over a selected vnode.
1164: */
1165: if (vp == skipvp)
1166: continue;
1167:
1168: simple_lock(&vp->v_interlock);
1169: /*
1170: * Skip over a vnodes marked VSYSTEM.
1171: */
1172: if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1173: simple_unlock(&vp->v_interlock);
1174: continue;
1175: }
1176: /*
1177: * If WRITECLOSE is set, only flush out regular file
1178: * vnodes open for writing.
1179: */
1180: if ((flags & WRITECLOSE) &&
1181: (vp->v_writecount == 0 || vp->v_type != VREG)) {
1182: simple_unlock(&vp->v_interlock);
1183: continue;
1184: }
1185: /*
1186: * With v_usecount == 0, all we need to do is clear
1187: * out the vnode data structures and we are done.
1188: */
1189: if (vp->v_usecount == 0) {
1190: simple_unlock(&mntvnode_slock);
1191: vgonel(vp, p);
1192: simple_lock(&mntvnode_slock);
1193: continue;
1194: }
1195: /*
1196: * If FORCECLOSE is set, forcibly close the vnode.
1197: * For block or character devices, revert to an
1198: * anonymous device. For all other files, just kill them.
1199: */
1200: if (flags & FORCECLOSE) {
1201: simple_unlock(&mntvnode_slock);
1202: if (vp->v_type != VBLK && vp->v_type != VCHR) {
1203: vgonel(vp, p);
1204: } else {
1205: vclean(vp, 0, p);
1206: vp->v_op = spec_vnodeop_p;
1207: insmntque(vp, (struct mount *)0);
1208: }
1209: simple_lock(&mntvnode_slock);
1210: continue;
1211: }
1212: #if DIAGNOSTIC
1213: if (busyprt)
1214: vprint("vflush: busy vnode", vp);
1215: #endif
1216: simple_unlock(&vp->v_interlock);
1217: busy++;
1218: }
1219: simple_unlock(&mntvnode_slock);
1220: if (busy)
1221: return (EBUSY);
1222: return (0);
1223: }
1224:
1225: /*
1226: * Disassociate the underlying file system from a vnode.
1227: * The vnode interlock is held on entry.
1228: */
1229: static void
1230: vclean(vp, flags, p)
1231: struct vnode *vp;
1232: int flags;
1233: struct proc *p;
1234: {
1235: int active;
1236:
1237: /*
1238: * Check to see if the vnode is in use.
1239: * If so we have to reference it before we clean it out
1240: * so that its count cannot fall to zero and generate a
1241: * race against ourselves to recycle it.
1242: */
1243: if (active = vp->v_usecount)
1244: if (++vp->v_usecount <= 0)
1245: panic("vclean: v_usecount");
1246: /*
1247: * Prevent the vnode from being recycled or
1248: * brought into use while we clean it out.
1249: */
1250: if (vp->v_flag & VXLOCK)
1251: panic("vclean: deadlock");
1252: vp->v_flag |= VXLOCK;
1253: /*
1254: * Even if the count is zero, the VOP_INACTIVE routine may still
1255: * have the object locked while it cleans it out. The VOP_LOCK
1256: * ensures that the VOP_INACTIVE routine is done with its work.
1257: * For active vnodes, it ensures that no other activity can
1258: * occur while the underlying object is being cleaned out.
1259: */
1260: VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1261: /*
1262: * Clean out any buffers associated with the vnode.
1263: */
1264: if (flags & DOCLOSE)
1265: vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1266:
1267: if ((vp->v_type == VREG) && (vp->v_vm_info != NULL))
1268: {
1269: vm_info_free(vp);
1270: vp->v_vm_info = NULL;
1271: }
1272: /*
1273: * If purging an active vnode, it must be closed and
1274: * deactivated before being reclaimed. Note that the
1275: * VOP_INACTIVE will unlock the vnode.
1276: */
1277: if (active) {
1278: if (flags & DOCLOSE)
1279: VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1280: VOP_INACTIVE(vp, p);
1281: } else {
1282: /*
1283: * Any other processes trying to obtain this lock must first
1284: * wait for VXLOCK to clear, then call the new lock operation.
1285: */
1286: VOP_UNLOCK(vp, 0, p);
1287: }
1288: /*
1289: * Reclaim the vnode.
1290: */
1291: if (VOP_RECLAIM(vp, p))
1292: panic("vclean: cannot reclaim");
1293: if (active)
1294: vrele(vp);
1295: cache_purge(vp);
1296: if (vp->v_vnlock) {
1297: if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1298: vprint("vclean: lock not drained", vp);
1299: FREE_ZONE(vp->v_vnlock, sizeof (struct lock__bsd__), M_VNODE);
1300: vp->v_vnlock = NULL;
1301: }
1302:
1303:
1304: /*
1305: * Done with purge, notify sleepers of the grim news.
1306: */
1307: vp->v_op = dead_vnodeop_p;
1308: vp->v_tag = VT_NON;
1309: vp->v_flag &= ~VXLOCK;
1310: if (vp->v_flag & VXWANT) {
1311: vp->v_flag &= ~VXWANT;
1312: wakeup((caddr_t)vp);
1313: }
1314: }
1315:
1316: /*
1317: * Eliminate all activity associated with the requested vnode
1318: * and with all vnodes aliased to the requested vnode.
1319: */
1320: int
1321: vop_revoke(ap)
1322: struct vop_revoke_args /* {
1323: struct vnode *a_vp;
1324: int a_flags;
1325: } */ *ap;
1326: {
1327: struct vnode *vp, *vq;
1328: struct proc *p = current_proc(); /* XXX */
1329:
1330: #if DIAGNOSTIC
1331: if ((ap->a_flags & REVOKEALL) == 0)
1332: panic("vop_revoke");
1333: #endif
1334:
1335: vp = ap->a_vp;
1336: simple_lock(&vp->v_interlock);
1337:
1338: if (vp->v_flag & VALIASED) {
1339: /*
1340: * If a vgone (or vclean) is already in progress,
1341: * wait until it is done and return.
1342: */
1343: if (vp->v_flag & VXLOCK) {
1344: vp->v_flag |= VXWANT;
1345: simple_unlock(&vp->v_interlock);
1346: tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1347: return (0);
1348: }
1349: /*
1350: * Ensure that vp will not be vgone'd while we
1351: * are eliminating its aliases.
1352: */
1353: vp->v_flag |= VXLOCK;
1354: simple_unlock(&vp->v_interlock);
1355: while (vp->v_flag & VALIASED) {
1356: simple_lock(&spechash_slock);
1357: for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1358: if (vq->v_rdev != vp->v_rdev ||
1359: vq->v_type != vp->v_type || vp == vq)
1360: continue;
1361: simple_unlock(&spechash_slock);
1362: vgone(vq);
1363: break;
1364: }
1365: if (vq == NULLVP)
1366: simple_unlock(&spechash_slock);
1367: }
1368: /*
1369: * Remove the lock so that vgone below will
1370: * really eliminate the vnode after which time
1371: * vgone will awaken any sleepers.
1372: */
1373: simple_lock(&vp->v_interlock);
1374: vp->v_flag &= ~VXLOCK;
1375: }
1376: vgonel(vp, p);
1377: return (0);
1378: }
1379:
1380: /*
1381: * Recycle an unused vnode to the front of the free list.
1382: * Release the passed interlock if the vnode will be recycled.
1383: */
1384: int
1385: vrecycle(vp, inter_lkp, p)
1386: struct vnode *vp;
1387: struct slock *inter_lkp;
1388: struct proc *p;
1389: {
1390:
1391: simple_lock(&vp->v_interlock);
1392: if (vp->v_usecount == 0) {
1393: if (inter_lkp)
1394: simple_unlock(inter_lkp);
1395: vgonel(vp, p);
1396: return (1);
1397: }
1398: simple_unlock(&vp->v_interlock);
1399: return (0);
1400: }
1401:
1402: /*
1403: * Eliminate all activity associated with a vnode
1404: * in preparation for reuse.
1405: */
1406: void
1407: vgone(vp)
1408: struct vnode *vp;
1409: {
1410: struct proc *p = current_proc(); /* XXX */
1411:
1412: simple_lock(&vp->v_interlock);
1413: vgonel(vp, p);
1414: }
1415:
1416: /*
1417: * vgone, with the vp interlock held.
1418: */
1419: void
1420: vgonel(vp, p)
1421: struct vnode *vp;
1422: struct proc *p;
1423: {
1424: struct vnode *vq;
1425: struct vnode *vx;
1426:
1427: /*
1428: * If a vgone (or vclean) is already in progress,
1429: * wait until it is done and return.
1430: */
1431: if (vp->v_flag & VXLOCK) {
1432: vp->v_flag |= VXWANT;
1433: simple_unlock(&vp->v_interlock);
1434: tsleep((caddr_t)vp, PINOD, "vgone", 0);
1435: return;
1436: }
1437: /*
1438: * Clean out the filesystem specific data.
1439: */
1440: vclean(vp, DOCLOSE, p);
1441: /*
1442: * Delete from old mount point vnode list, if on one.
1443: */
1444: if (vp->v_mount != NULL)
1445: insmntque(vp, (struct mount *)0);
1446: /*
1447: * If special device, remove it from special device alias list
1448: * if it is on one.
1449: */
1450: if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1451: simple_lock(&spechash_slock);
1452: if (*vp->v_hashchain == vp) {
1453: *vp->v_hashchain = vp->v_specnext;
1454: } else {
1455: for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1456: if (vq->v_specnext != vp)
1457: continue;
1458: vq->v_specnext = vp->v_specnext;
1459: break;
1460: }
1461: if (vq == NULL)
1462: panic("missing bdev");
1463: }
1464: if (vp->v_flag & VALIASED) {
1465: vx = NULL;
1466: for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1467: if (vq->v_rdev != vp->v_rdev ||
1468: vq->v_type != vp->v_type)
1469: continue;
1470: if (vx)
1471: break;
1472: vx = vq;
1473: }
1474: if (vx == NULL)
1475: panic("missing alias");
1476: if (vq == NULL)
1477: vx->v_flag &= ~VALIASED;
1478: vp->v_flag &= ~VALIASED;
1479: }
1480: simple_unlock(&spechash_slock);
1481: FREE_ZONE(vp->v_specinfo, sizeof (struct specinfo), M_VNODE);
1482: vp->v_specinfo = NULL;
1483: }
1484: /*
1485: * If it is on the freelist and not already at the head,
1486: * move it to the head of the list. The test of the back
1487: * pointer and the reference count of zero is because
1488: * it will be removed from the free list by getnewvnode,
1489: * but will not have its reference count incremented until
1490: * after calling vgone. If the reference count were
1491: * incremented first, vgone would (incorrectly) try to
1492: * close the previous instance of the underlying object.
1493: * So, the back pointer is explicitly set to `0xdeadb' in
1494: * getnewvnode after removing it from the freelist to ensure
1495: * that we do not try to move it here.
1496: */
1497: if (vp->v_usecount == 0) {
1498: simple_lock(&vnode_free_list_slock);
1499: if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1500: vnode_free_list.tqh_first != vp) {
1501: TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1502: TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1503: }
1504: simple_unlock(&vnode_free_list_slock);
1505: }
1506: vp->v_type = VBAD;
1507: }
1508:
1509: /*
1510: * Lookup a vnode by device number.
1511: */
1512: int
1513: vfinddev(dev, type, vpp)
1514: dev_t dev;
1515: enum vtype type;
1516: struct vnode **vpp;
1517: {
1518: struct vnode *vp;
1519: int rc = 0;
1520:
1521: simple_lock(&spechash_slock);
1522: for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1523: if (dev != vp->v_rdev || type != vp->v_type)
1524: continue;
1525: *vpp = vp;
1526: rc = 1;
1527: break;
1528: }
1529: simple_unlock(&spechash_slock);
1530: return (rc);
1531: }
1532:
1533: /*
1534: * Calculate the total number of references to a special device.
1535: */
1536: int
1537: vcount(vp)
1538: struct vnode *vp;
1539: {
1540: struct vnode *vq, *vnext;
1541: int count;
1542:
1543: loop:
1544: if ((vp->v_flag & VALIASED) == 0)
1545: return (vp->v_usecount);
1546: simple_lock(&spechash_slock);
1547: for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1548: vnext = vq->v_specnext;
1549: if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1550: continue;
1551: /*
1552: * Alias, but not in use, so flush it out.
1553: */
1554: if (vq->v_usecount == 0 && vq != vp) {
1555: simple_unlock(&spechash_slock);
1556: vgone(vq);
1557: goto loop;
1558: }
1559: count += vq->v_usecount;
1560: }
1561: simple_unlock(&spechash_slock);
1562: return (count);
1563: }
1564:
1565: /*
1566: * Print out a description of a vnode.
1567: */
1568: static char *typename[] =
1569: { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1570:
1571: void
1572: vprint(label, vp)
1573: char *label;
1574: register struct vnode *vp;
1575: {
1576: char buf[64];
1577:
1578: if (label != NULL)
1579: printf("%s: ", label);
1580: printf("type %s, usecount %d, writecount %d, refcount %d,",
1581: typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1582: vp->v_holdcnt);
1583: buf[0] = '\0';
1584: if (vp->v_flag & VROOT)
1585: strcat(buf, "|VROOT");
1586: if (vp->v_flag & VTEXT)
1587: strcat(buf, "|VTEXT");
1588: if (vp->v_flag & VSYSTEM)
1589: strcat(buf, "|VSYSTEM");
1590: if (vp->v_flag & VXLOCK)
1591: strcat(buf, "|VXLOCK");
1592: if (vp->v_flag & VXWANT)
1593: strcat(buf, "|VXWANT");
1594: if (vp->v_flag & VBWAIT)
1595: strcat(buf, "|VBWAIT");
1596: if (vp->v_flag & VALIASED)
1597: strcat(buf, "|VALIASED");
1598: if (buf[0] != '\0')
1599: printf(" flags (%s)", &buf[1]);
1600: if (vp->v_data == NULL) {
1601: printf("\n");
1602: } else {
1603: printf("\n\t");
1604: VOP_PRINT(vp);
1605: }
1606: }
1607:
1608: #ifdef DEBUG
1609: /*
1610: * List all of the locked vnodes in the system.
1611: * Called when debugging the kernel.
1612: */
1613: void
1614: printlockedvnodes()
1615: {
1616: struct proc *p = current_proc(); /* XXX */
1617: struct mount *mp, *nmp;
1618: struct vnode *vp;
1619:
1620: printf("Locked vnodes\n");
1621: simple_lock(&mountlist_slock);
1622: for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1623: if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1624: nmp = mp->mnt_list.cqe_next;
1625: continue;
1626: }
1627: for (vp = mp->mnt_vnodelist.lh_first;
1628: vp != NULL;
1629: vp = vp->v_mntvnodes.le_next) {
1630: if (VOP_ISLOCKED(vp))
1631: vprint((char *)0, vp);
1632: }
1633: simple_lock(&mountlist_slock);
1634: nmp = mp->mnt_list.cqe_next;
1635: vfs_unbusy(mp, p);
1636: }
1637: simple_unlock(&mountlist_slock);
1638: }
1639: #endif
1640:
1641: /*
1642: * Top level filesystem related information gathering.
1643: */
1644: int
1645: vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1646: int *name;
1647: u_int namelen;
1648: void *oldp;
1649: size_t *oldlenp;
1650: void *newp;
1651: size_t newlen;
1652: struct proc *p;
1653: {
1654: struct ctldebug *cdp;
1655: struct vfsconf *vfsp;
1656:
1657: #ifdef NeXT
1658: if (name[0] == VFS_NUMMNTOPS) {
1659: extern unsigned int vfs_nummntops;
1660: return (sysctl_rdint(oldp, oldlenp, newp, vfs_nummntops));
1661: }
1662: #endif
1663: /* all sysctl names at this level are at least name and field */
1664: if (namelen < 2)
1665: return (ENOTDIR); /* overloaded */
1666: if (name[0] != VFS_GENERIC) {
1667: for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1668: if (vfsp->vfc_typenum == name[0])
1669: break;
1670: if (vfsp == NULL)
1671: return (EOPNOTSUPP);
1672: return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1673: oldp, oldlenp, newp, newlen, p));
1674: }
1675: switch (name[1]) {
1676: case VFS_MAXTYPENUM:
1677: return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
1678: case VFS_CONF:
1679: if (namelen < 3)
1680: return (ENOTDIR); /* overloaded */
1681: for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1682: if (vfsp->vfc_typenum == name[2])
1683: break;
1684: if (vfsp == NULL)
1685: return (EOPNOTSUPP);
1686: return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
1687: sizeof(struct vfsconf)));
1688: }
1689: return (EOPNOTSUPP);
1690: }
1691:
1692: int kinfo_vdebug = 1;
1693: int kinfo_vgetfailed;
1694: #define KINFO_VNODESLOP 10
1695: /*
1696: * Dump vnode list (via sysctl).
1697: * Copyout address of vnode followed by vnode.
1698: */
1699: /* ARGSUSED */
1700: int
1701: sysctl_vnode(where, sizep, p)
1702: char *where;
1703: size_t *sizep;
1704: struct proc *p;
1705: {
1706: struct mount *mp, *nmp;
1707: struct vnode *nvp, *vp;
1708: char *bp = where, *savebp;
1709: char *ewhere;
1710: int error;
1711:
1712: #define VPTRSZ sizeof (struct vnode *)
1713: #define VNODESZ sizeof (struct vnode)
1714: if (where == NULL) {
1715: *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1716: return (0);
1717: }
1718: ewhere = where + *sizep;
1719:
1720: simple_lock(&mountlist_slock);
1721: for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1722: if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1723: nmp = mp->mnt_list.cqe_next;
1724: continue;
1725: }
1726: savebp = bp;
1727: again:
1728: simple_lock(&mntvnode_slock);
1729: for (vp = mp->mnt_vnodelist.lh_first;
1730: vp != NULL;
1731: vp = nvp) {
1732: /*
1733: * Check that the vp is still associated with
1734: * this filesystem. RACE: could have been
1735: * recycled onto the same filesystem.
1736: */
1737: if (vp->v_mount != mp) {
1738: simple_unlock(&mntvnode_slock);
1739: if (kinfo_vdebug)
1740: printf("kinfo: vp changed\n");
1741: bp = savebp;
1742: goto again;
1743: }
1744: nvp = vp->v_mntvnodes.le_next;
1745: if (bp + VPTRSZ + VNODESZ > ewhere) {
1746: simple_unlock(&mntvnode_slock);
1747: *sizep = bp - where;
1748: return (ENOMEM);
1749: }
1750: simple_unlock(&mntvnode_slock);
1751: if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1752: (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
1753: return (error);
1754: bp += VPTRSZ + VNODESZ;
1755: simple_lock(&mntvnode_slock);
1756: }
1757: simple_unlock(&mntvnode_slock);
1758: simple_lock(&mountlist_slock);
1759: nmp = mp->mnt_list.cqe_next;
1760: vfs_unbusy(mp, p);
1761: }
1762: simple_unlock(&mountlist_slock);
1763:
1764: *sizep = bp - where;
1765: return (0);
1766: }
1767:
1768: /*
1769: * Check to see if a filesystem is mounted on a block device.
1770: */
1771: int
1772: vfs_mountedon(vp)
1773: struct vnode *vp;
1774: {
1775: struct vnode *vq;
1776: int error = 0;
1777:
1778: if (vp->v_specflags & SI_MOUNTEDON)
1779: return (EBUSY);
1780: if (vp->v_flag & VALIASED) {
1781: simple_lock(&spechash_slock);
1782: for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1783: if (vq->v_rdev != vp->v_rdev ||
1784: vq->v_type != vp->v_type)
1785: continue;
1786: if (vq->v_specflags & SI_MOUNTEDON) {
1787: error = EBUSY;
1788: break;
1789: }
1790: }
1791: simple_unlock(&spechash_slock);
1792: }
1793: return (error);
1794: }
1795:
1796: /*
1797: * Unmount all filesystems. The list is traversed in reverse order
1798: * of mounting to avoid dependencies.
1799: */
1800: void
1801: vfs_unmountall()
1802: {
1803: struct mount *mp, *nmp;
1804: struct proc *p = current_proc(); /* XXX */
1805:
1806: /*
1807: * Since this only runs when rebooting, it is not interlocked.
1808: */
1809: for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
1810: nmp = mp->mnt_list.cqe_prev;
1811: (void) dounmount(mp, MNT_FORCE, p);
1812: }
1813: }
1814:
1815: /*
1816: * Build hash lists of net addresses and hang them off the mount point.
1817: * Called by ufs_mount() to set up the lists of export addresses.
1818: */
1819: static int
1820: vfs_hang_addrlist(mp, nep, argp)
1821: struct mount *mp;
1822: struct netexport *nep;
1823: struct export_args *argp;
1824: {
1825: register struct netcred *np;
1826: register struct radix_node_head *rnh;
1827: register int i;
1828: struct radix_node *rn;
1829: struct sockaddr *saddr, *smask = 0;
1830: struct domain *dom;
1831: int error;
1832:
1833: if (argp->ex_addrlen == 0) {
1834: if (mp->mnt_flag & MNT_DEFEXPORTED)
1835: return (EPERM);
1836: np = &nep->ne_defexported;
1837: np->netc_exflags = argp->ex_flags;
1838: np->netc_anon = argp->ex_anon;
1839: np->netc_anon.cr_ref = 1;
1840: mp->mnt_flag |= MNT_DEFEXPORTED;
1841: return (0);
1842: }
1843: i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1844: MALLOC(np, struct netcred *, i, M_NETADDR, M_WAITOK);
1845: bzero((caddr_t)np, i);
1846: saddr = (struct sockaddr *)(np + 1);
1847: if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
1848: goto out;
1849: if (saddr->sa_len > argp->ex_addrlen)
1850: saddr->sa_len = argp->ex_addrlen;
1851: if (argp->ex_masklen) {
1852: smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1853: error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
1854: if (error)
1855: goto out;
1856: if (smask->sa_len > argp->ex_masklen)
1857: smask->sa_len = argp->ex_masklen;
1858: }
1859: i = saddr->sa_family;
1860: if ((rnh = nep->ne_rtable[i]) == 0) {
1861: /*
1862: * Seems silly to initialize every AF when most are not
1863: * used, do so on demand here
1864: */
1865: for (dom = domains; dom; dom = dom->dom_next)
1866: if (dom->dom_family == i && dom->dom_rtattach) {
1867: dom->dom_rtattach((void **)&nep->ne_rtable[i],
1868: dom->dom_rtoffset);
1869: break;
1870: }
1871: if ((rnh = nep->ne_rtable[i]) == 0) {
1872: error = ENOBUFS;
1873: goto out;
1874: }
1875: }
1876: rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
1877: np->netc_rnodes);
1878: if (rn == 0) {
1879: /*
1880: * One of the reasons that rnh_addaddr may fail is that
1881: * the entry already exists. To check for this case, we
1882: * look up the entry to see if it is there. If so, we
1883: * do not need to make a new entry but do return success.
1884: */
1885: _FREE(np, M_NETADDR);
1886: rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh);
1887: if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 &&
1888: ((struct netcred *)rn)->netc_exflags == argp->ex_flags &&
1889: !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon,
1890: (caddr_t)&argp->ex_anon, sizeof(struct ucred)))
1891: return (0);
1892: return (EPERM);
1893: }
1894: np->netc_exflags = argp->ex_flags;
1895: np->netc_anon = argp->ex_anon;
1896: np->netc_anon.cr_ref = 1;
1897: return (0);
1898: out:
1899: _FREE(np, M_NETADDR);
1900: return (error);
1901: }
1902:
1903: /* ARGSUSED */
1904: static int
1905: vfs_free_netcred(rn, w)
1906: struct radix_node *rn;
1907: caddr_t w;
1908: {
1909: register struct radix_node_head *rnh = (struct radix_node_head *)w;
1910:
1911: (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
1912: _FREE((caddr_t)rn, M_NETADDR);
1913: return (0);
1914: }
1915:
1916: /*
1917: * Free the net address hash lists that are hanging off the mount points.
1918: */
1919: static void
1920: vfs_free_addrlist(nep)
1921: struct netexport *nep;
1922: {
1923: register int i;
1924: register struct radix_node_head *rnh;
1925:
1926: for (i = 0; i <= AF_MAX; i++)
1927: if (rnh = nep->ne_rtable[i]) {
1928: (*rnh->rnh_walktree)(rnh, vfs_free_netcred,
1929: (caddr_t)rnh);
1930: _FREE((caddr_t)rnh, M_RTABLE);
1931: nep->ne_rtable[i] = 0;
1932: }
1933: }
1934:
1935: int
1936: vfs_export(mp, nep, argp)
1937: struct mount *mp;
1938: struct netexport *nep;
1939: struct export_args *argp;
1940: {
1941: int error;
1942:
1943: if (argp->ex_flags & MNT_DELEXPORT) {
1944: vfs_free_addrlist(nep);
1945: mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1946: }
1947: if (argp->ex_flags & MNT_EXPORTED) {
1948: if (error = vfs_hang_addrlist(mp, nep, argp))
1949: return (error);
1950: mp->mnt_flag |= MNT_EXPORTED;
1951: }
1952: return (0);
1953: }
1954:
1955: struct netcred *
1956: vfs_export_lookup(mp, nep, nam)
1957: register struct mount *mp;
1958: struct netexport *nep;
1959: struct mbuf *nam;
1960: {
1961: register struct netcred *np;
1962: register struct radix_node_head *rnh;
1963: struct sockaddr *saddr;
1964:
1965: np = NULL;
1966: if (mp->mnt_flag & MNT_EXPORTED) {
1967: /*
1968: * Lookup in the export list first.
1969: */
1970: if (nam != NULL) {
1971: saddr = mtod(nam, struct sockaddr *);
1972: rnh = nep->ne_rtable[saddr->sa_family];
1973: if (rnh != NULL) {
1974: np = (struct netcred *)
1975: (*rnh->rnh_matchaddr)((caddr_t)saddr,
1976: rnh);
1977: if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1978: np = NULL;
1979: }
1980: }
1981: /*
1982: * If no address match, use the default if it exists.
1983: */
1984: if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1985: np = &nep->ne_defexported;
1986: }
1987: return (np);
1988: }
1989:
1990:
1991: #if 0
1992: int ram_debug=0;
1993: call_ramdebug(struct vnode * vp)
1994: {
1995: if(ram_debug) {
1996: printf("vp is %x\n",vp);
1997: }
1998:
1999: }
2000:
2001: void
2002: walk_allvnodes()
2003: {
2004: struct proc *p = current_proc(); /* XXX */
2005: struct mount *mp, *nmp;
2006: struct vnode *vp;
2007: int cnt=0;
2008:
2009: for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
2010: for (vp = mp->mnt_vnodelist.lh_first;
2011: vp != NULL;
2012: vp = vp->v_mntvnodes.le_next) {
2013: if (vp->v_usecount < 0){
2014: call_ramdebug(vp);
2015: }
2016: }
2017: nmp = mp->mnt_list.cqe_next;
2018: }
2019: for (cnt = 0, vp = vnode_free_list.tqh_first;
2020: vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
2021: if ((vp->v_usecount < 0) && ram_debug){
2022: call_ramdebug(vp);
2023: }
2024:
2025: }
2026: }
2027: #endif /* 0 */
2028:
2029: int vm_object_cache_steal(int count)
2030: {
2031: int cnt;
2032: void vnode_pager_release_from_cache(int);
2033:
2034: cnt=count;
2035: vnode_pager_release_from_cache(&cnt);
2036: return(cnt);
2037: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.