|
|
1.1 root 1: /*
2: * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3: *
4: * @APPLE_LICENSE_HEADER_START@
5: *
6: * The contents of this file constitute Original Code as defined in and
7: * are subject to the Apple Public Source License Version 1.1 (the
8: * "License"). You may not use this file except in compliance with the
9: * License. Please obtain a copy of the License at
10: * http://www.apple.com/publicsource and read it before using this file.
11: *
12: * This Original Code and all software distributed under the License are
13: * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14: * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15: * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16: * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17: * License for the specific language governing rights and limitations
18: * under the License.
19: *
20: * @APPLE_LICENSE_HEADER_END@
21: */
22: /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23: /*
24: * Copyright (c) 1989, 1993
25: * The Regents of the University of California. All rights reserved.
26: *
27: * This code is derived from software contributed to Berkeley by
28: * Rick Macklem at The University of Guelph.
29: *
30: * Redistribution and use in source and binary forms, with or without
31: * modification, are permitted provided that the following conditions
32: * are met:
33: * 1. Redistributions of source code must retain the above copyright
34: * notice, this list of conditions and the following disclaimer.
35: * 2. Redistributions in binary form must reproduce the above copyright
36: * notice, this list of conditions and the following disclaimer in the
37: * documentation and/or other materials provided with the distribution.
38: * 3. All advertising materials mentioning features or use of this software
39: * must display the following acknowledgement:
40: * This product includes software developed by the University of
41: * California, Berkeley and its contributors.
42: * 4. Neither the name of the University nor the names of its contributors
43: * may be used to endorse or promote products derived from this software
44: * without specific prior written permission.
45: *
46: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56: * SUCH DAMAGE.
57: *
58: * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95
59: * FreeBSD-Id: nfs_bio.c,v 1.44 1997/09/10 19:52:25 phk Exp $
60: */
61:
62: #include <mach_nbc.h>
63: #include <sys/param.h>
64: #include <sys/systm.h>
65: #include <sys/resourcevar.h>
66: #include <sys/signalvar.h>
67: #include <sys/proc.h>
68: #include <sys/buf.h>
69: #include <sys/vnode.h>
70: #include <sys/mount.h>
71: #include <sys/kernel.h>
72: #include <sys/sysctl.h>
73:
74: #include <sys/vm.h>
75: #include <sys/vmparam.h>
76:
77: #include <nfs/rpcv2.h>
78: #include <nfs/nfsproto.h>
79: #include <nfs/nfs.h>
80: #include <nfs/nfsmount.h>
81: #include <nfs/nqnfs.h>
82: #include <nfs/nfsnode.h>
83:
84: #if MACH_NBC
85: #include <kern/mapfs.h>
86: #endif /* MACH_NBC */
87:
88: /* XXX CSM 11/25/97 Revisit when Ramesh merges vm with buffer cache
89: * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these
90: * calls are not in getblk() and brelse() so that they would not be necessary
91: * here.
92: */
93: #ifndef B_VMIO
94: #define vfs_busy_pages(bp, f)
95: #define vfs_unbusy_pages(bp)
96: #endif
97:
98: static struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size,
99: struct proc *p));
100: static struct buf *nfs_getwriteblk __P((struct vnode *vp, daddr_t bn,
101: int size, struct proc *p,
102: struct ucred *cred, int off, int len));
103:
104: extern int nfs_numasync;
105: extern struct nfsstats nfsstats;
106:
107: /* XXX CSM 12/3/97 Revisit when Ramesh merges vm with buffer cache */
108: #ifdef B_VMIO
109: /*
110: * Vnode op for VM getpages.
111: */
112: int
113: nfs_getpages(ap)
114: struct vop_getpages_args *ap;
115: {
116: int i, bsize;
117: vm_object_t obj;
118: int pcount;
119: struct uio auio;
120: struct iovec aiov;
121: int error;
122: vm_page_t m;
123:
124: if (!(ap->a_vp->v_flag & VVMIO)) {
125: printf("nfs_getpages: called with non-VMIO vnode??\n");
126: return EOPNOTSUPP;
127: }
128:
129: pcount = round_page(ap->a_count) / PAGE_SIZE;
130:
131: obj = ap->a_m[ap->a_reqpage]->object;
132: bsize = ap->a_vp->v_mount->mnt_stat.f_iosize;
133:
134: for (i = 0; i < pcount; i++) {
135: if (i != ap->a_reqpage) {
136: vnode_pager_freepage(ap->a_m[i]);
137: }
138: }
139: m = ap->a_m[ap->a_reqpage];
140:
141: m->busy++;
142: m->flags &= ~PG_BUSY;
143:
144: auio.uio_iov = &aiov;
145: auio.uio_iovcnt = 1;
146: aiov.iov_base = 0;
147: aiov.iov_len = PAGE_SIZE;
148: auio.uio_resid = PAGE_SIZE;
149: auio.uio_offset = IDX_TO_OFF(m->pindex);
150: auio.uio_segflg = UIO_NOCOPY;
151: auio.uio_rw = UIO_READ;
152: auio.uio_procp = curproc;
153: error = nfs_bioread(ap->a_vp, &auio, IO_NODELOCKED, curproc->p_ucred, 1);
154:
155: m->flags |= PG_BUSY;
156: m->busy--;
157:
158: if (error && (auio.uio_resid == PAGE_SIZE))
159: return VM_PAGER_ERROR;
160: return 0;
161: }
162: #endif
163:
164: /*
165: * Vnode op for read using bio
166: * Any similarity to readip() is purely coincidental
167: */
168: int
169: nfs_bioread(vp, uio, ioflag, cred, getpages)
170: register struct vnode *vp;
171: register struct uio *uio;
172: int ioflag;
173: struct ucred *cred;
174: int getpages;
175: {
176: register struct nfsnode *np = VTONFS(vp);
177: register int biosize, diff, i;
178: struct buf *bp = 0, *rabp;
179: struct vattr vattr;
180: struct proc *p;
181: struct nfsmount *nmp = VFSTONFS(vp->v_mount);
182: daddr_t lbn, rabn;
183: int bufsize;
184: int nra, error = 0, n = 0, on = 0, not_readin;
185:
186: #if DIAGNOSTIC
187: if (uio->uio_rw != UIO_READ)
188: panic("nfs_read mode");
189: #endif
190: if (uio->uio_resid == 0)
191: return (0);
192: if (uio->uio_offset < 0)
193: return (EINVAL);
194: p = uio->uio_procp;
195: if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
196: (void)nfs_fsinfo(nmp, vp, cred, p);
197: biosize = vp->v_mount->mnt_stat.f_iosize;
198: /*
199: * For nfs, cache consistency can only be maintained approximately.
200: * Although RFC1094 does not specify the criteria, the following is
201: * believed to be compatible with the reference port.
202: * For nqnfs, full cache consistency is maintained within the loop.
203: * For nfs:
204: * If the file's modify time on the server has changed since the
205: * last read rpc or you have written to the file,
206: * you may have lost data cache consistency with the
207: * server, so flush all of the file's data out of the cache.
208: * Then force a getattr rpc to ensure that you have up to date
209: * attributes.
210: * NB: This implies that cache data can be read when up to
211: * NFS_ATTRTIMEO seconds out of date. If you find that you need current
212: * attributes this could be forced by setting n_attrstamp to 0 before
213: * the VOP_GETATTR() call.
214: */
215: if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) {
216: if (np->n_flag & NMODIFIED) {
217: if (vp->v_type != VREG) {
218: if (vp->v_type != VDIR)
219: panic("nfs: bioread, not dir");
220: nfs_invaldir(vp);
221: error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
222: if (error)
223: return (error);
224: }
225: np->n_attrstamp = 0;
226: error = VOP_GETATTR(vp, &vattr, cred, p);
227: if (error)
228: return (error);
229: np->n_mtime = vattr.va_mtime.tv_sec;
230: } else {
231: error = VOP_GETATTR(vp, &vattr, cred, p);
232: if (error)
233: return (error);
234: if (np->n_mtime != vattr.va_mtime.tv_sec) {
235: if (vp->v_type == VDIR)
236: nfs_invaldir(vp);
237: error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
238: if (error)
239: return (error);
240: np->n_mtime = vattr.va_mtime.tv_sec;
241: }
242: }
243: }
244: do {
245:
246: /*
247: * Get a valid lease. If cached data is stale, flush it.
248: */
249: if (nmp->nm_flag & NFSMNT_NQNFS) {
250: if (NQNFS_CKINVALID(vp, np, ND_READ)) {
251: do {
252: error = nqnfs_getlease(vp, ND_READ, cred, p);
253: } while (error == NQNFS_EXPIRED);
254: if (error)
255: return (error);
256: if (np->n_lrev != np->n_brev ||
257: (np->n_flag & NQNFSNONCACHE) ||
258: ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
259: if (vp->v_type == VDIR)
260: nfs_invaldir(vp);
261: error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
262: if (error)
263: return (error);
264: np->n_brev = np->n_lrev;
265: }
266: } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
267: nfs_invaldir(vp);
268: error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
269: if (error)
270: return (error);
271: }
272: }
273: if (np->n_flag & NQNFSNONCACHE) {
274: switch (vp->v_type) {
275: case VREG:
276: return (nfs_readrpc(vp, uio, cred));
277: case VLNK:
278: return (nfs_readlinkrpc(vp, uio, cred));
279: case VDIR:
280: break;
281: default:
282: printf(" NQNFSNONCACHE: type %x unexpected\n",
283: vp->v_type);
284: };
285: }
286: switch (vp->v_type) {
287: case VREG:
288: nfsstats.biocache_reads++;
289: lbn = uio->uio_offset / biosize;
290: on = uio->uio_offset & (biosize - 1);
291: not_readin = 1;
292:
293: /*
294: * Start the read ahead(s), as required.
295: */
296: if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
297: for (nra = 0; nra < nmp->nm_readahead &&
298: (off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) {
299: rabn = lbn + 1 + nra;
300: if (!incore(vp, rabn)) {
301: rabp = nfs_getcacheblk(vp, rabn, biosize, p);
302: if (!rabp)
303: return (EINTR);
304: if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
305: rabp->b_flags |= (B_READ | B_ASYNC);
306: vfs_busy_pages(rabp, 0);
307: if (nfs_asyncio(rabp, cred)) {
308: rabp->b_flags |= B_INVAL|B_ERROR;
309: vfs_unbusy_pages(rabp);
310: brelse(rabp);
311: }
312: } else
313: brelse(rabp);
314: }
315: }
316: }
317:
318: /*
319: * If the block is in the cache and has the required data
320: * in a valid region, just copy it out.
321: * Otherwise, get the block and write back/read in,
322: * as required.
323: */
324: again:
325: bufsize = biosize;
326: if ((off_t)(lbn + 1) * biosize > np->n_size &&
327: (off_t)(lbn + 1) * biosize - np->n_size < biosize) {
328: bufsize = np->n_size - lbn * biosize;
329: bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
330: }
331: bp = nfs_getcacheblk(vp, lbn, bufsize, p);
332: if (!bp)
333: return (EINTR);
334: /* XXX CSM 12/3/97 Revisit when Ramesh merges vm with buffer cache */
335: #ifdef B_VMIO
336: /*
337: * If we are being called from nfs_getpages, we must
338: * make sure the buffer is a vmio buffer. The vp will
339: * already be setup for vmio but there may be some old
340: * non-vmio buffers attached to it.
341: */
342: if (getpages && !(bp->b_flags & B_VMIO)) {
343: #if DIAGNOSTIC
344: printf("nfs_bioread: non vmio buf found, discarding\n");
345: #endif
346: bp->b_flags |= B_NOCACHE;
347: bp->b_flags |= B_INVAFTERWRITE;
348: if (bp->b_dirtyend > 0) {
349: if ((bp->b_flags & B_DELWRI) == 0)
350: panic("nfsbioread");
351: if (VOP_BWRITE(bp) == EINTR)
352: return (EINTR);
353: } else
354: brelse(bp);
355: goto again;
356: }
357: #endif /* B_VMIO */
358: if ((bp->b_flags & B_CACHE) == 0) {
359: bp->b_flags |= B_READ;
360: bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
361: not_readin = 0;
362: vfs_busy_pages(bp, 0);
363: error = nfs_doio(bp, cred, p);
364: if (error) {
365: brelse(bp);
366: return (error);
367: }
368: }
369: if (bufsize > on) {
370: n = min((unsigned)(bufsize - on), uio->uio_resid);
371: } else {
372: n = 0;
373: }
374: diff = np->n_size - uio->uio_offset;
375: if (diff < n)
376: n = diff;
377: if (not_readin && n > 0) {
378: if (on < bp->b_validoff || (on + n) > bp->b_validend) {
379: bp->b_flags |= B_NOCACHE;
380: bp->b_flags |= B_INVAFTERWRITE;
381: if (bp->b_dirtyend > 0) {
382: if ((bp->b_flags & B_DELWRI) == 0)
383: panic("nfsbioread");
384: if (VOP_BWRITE(bp) == EINTR)
385: return (EINTR);
386: } else
387: brelse(bp);
388: goto again;
389: }
390: }
391: vp->v_lastr = lbn;
392: diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
393: if (diff < n)
394: n = diff;
395: break;
396: case VLNK:
397: nfsstats.biocache_readlinks++;
398: bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
399: if (!bp)
400: return (EINTR);
401: if ((bp->b_flags & B_CACHE) == 0) {
402: bp->b_flags |= B_READ;
403: vfs_busy_pages(bp, 0);
404: error = nfs_doio(bp, cred, p);
405: if (error) {
406: bp->b_flags |= B_ERROR;
407: brelse(bp);
408: return (error);
409: }
410: }
411: n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
412: on = 0;
413: break;
414: case VDIR:
415: nfsstats.biocache_readdirs++;
416: if (np->n_direofoffset
417: && uio->uio_offset >= np->n_direofoffset) {
418: return (0);
419: }
420: lbn = uio->uio_offset / NFS_DIRBLKSIZ;
421: on = uio->uio_offset & (NFS_DIRBLKSIZ - 1);
422: bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p);
423: if (!bp)
424: return (EINTR);
425: if ((bp->b_flags & B_CACHE) == 0) {
426: bp->b_flags |= B_READ;
427: vfs_busy_pages(bp, 0);
428: error = nfs_doio(bp, cred, p);
429: if (error) {
430: vfs_unbusy_pages(bp);
431: brelse(bp);
432: while (error == NFSERR_BAD_COOKIE) {
433: nfs_invaldir(vp);
434: error = nfs_vinvalbuf(vp, 0, cred, p, 1);
435: /*
436: * Yuck! The directory has been modified on the
437: * server. The only way to get the block is by
438: * reading from the beginning to get all the
439: * offset cookies.
440: */
441: for (i = 0; i <= lbn && !error; i++) {
442: if (np->n_direofoffset
443: && (i * NFS_DIRBLKSIZ) >= np->n_direofoffset)
444: return (0);
445: bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p);
446: if (!bp)
447: return (EINTR);
448: if ((bp->b_flags & B_DONE) == 0) {
449: bp->b_flags |= B_READ;
450: vfs_busy_pages(bp, 0);
451: error = nfs_doio(bp, cred, p);
452: if (error) {
453: vfs_unbusy_pages(bp);
454: brelse(bp);
455: } else if (i < lbn)
456: brelse(bp);
457: }
458: }
459: }
460: if (error)
461: return (error);
462: }
463: }
464:
465: /*
466: * If not eof and read aheads are enabled, start one.
467: * (You need the current block first, so that you have the
468: * directory offset cookie of the next block.)
469: */
470: if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
471: (np->n_direofoffset == 0 ||
472: (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) &&
473: !(np->n_flag & NQNFSNONCACHE) &&
474: !incore(vp, lbn + 1)) {
475: rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p);
476: if (rabp) {
477: if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
478: rabp->b_flags |= (B_READ | B_ASYNC);
479: vfs_busy_pages(rabp, 0);
480: if (nfs_asyncio(rabp, cred)) {
481: rabp->b_flags |= B_INVAL|B_ERROR;
482: vfs_unbusy_pages(rabp);
483: brelse(rabp);
484: }
485: } else {
486: brelse(rabp);
487: }
488: }
489: }
490: /*
491: * Make sure we use a signed variant of min() since
492: * the second term may be negative.
493: */
494: n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on);
495: break;
496: default:
497: printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
498: break;
499: };
500:
501: if (n > 0) {
502: error = uiomove(bp->b_data + on, (int)n, uio);
503: }
504: switch (vp->v_type) {
505: case VREG:
506: break;
507: case VLNK:
508: n = 0;
509: break;
510: case VDIR:
511: if (np->n_flag & NQNFSNONCACHE)
512: bp->b_flags |= B_INVAL;
513: break;
514: default:
515: printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
516: }
517: brelse(bp);
518: } while (error == 0 && uio->uio_resid > 0 && n > 0);
519: return (error);
520: }
521:
522: /*
523: * Vnode op for write using bio
524: */
525: int
526: nfs_write(ap)
527: struct vop_write_args /* {
528: struct vnode *a_vp;
529: struct uio *a_uio;
530: int a_ioflag;
531: struct ucred *a_cred;
532: } */ *ap;
533: {
534: register int biosize;
535: register struct uio *uio = ap->a_uio;
536: struct proc *p = uio->uio_procp;
537: register struct vnode *vp = ap->a_vp;
538: struct nfsnode *np = VTONFS(vp);
539: register struct ucred *cred = ap->a_cred;
540: int ioflag = ap->a_ioflag;
541: struct buf *bp;
542: struct vattr vattr;
543: struct nfsmount *nmp = VFSTONFS(vp->v_mount);
544: daddr_t lbn;
545: int bufsize;
546: int n, on, error = 0, iomode, must_commit;
547:
548: #if DIAGNOSTIC
549: if (uio->uio_rw != UIO_WRITE)
550: panic("nfs_write mode");
551: if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != current_proc())
552: panic("nfs_write proc");
553: #endif
554: if (vp->v_type != VREG)
555: return (EIO);
556: if (np->n_flag & NWRITEERR) {
557: np->n_flag &= ~NWRITEERR;
558: return (np->n_error);
559: }
560: if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
561: (void)nfs_fsinfo(nmp, vp, cred, p);
562: if (ioflag & (IO_APPEND | IO_SYNC)) {
563: if (np->n_flag & NMODIFIED) {
564: np->n_attrstamp = 0;
565: error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
566: if (error)
567: return (error);
568: }
569: if (ioflag & IO_APPEND) {
570: np->n_attrstamp = 0;
571: error = VOP_GETATTR(vp, &vattr, cred, p);
572: if (error)
573: return (error);
574: uio->uio_offset = np->n_size;
575: }
576: }
577: if (uio->uio_offset < 0)
578: return (EINVAL);
579: if (uio->uio_resid == 0)
580: return (0);
581: /*
582: * Maybe this should be above the vnode op call, but so long as
583: * file servers have no limits, i don't think it matters
584: */
585: if (p && uio->uio_offset + uio->uio_resid >
586: p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
587: psignal(p, SIGXFSZ);
588: return (EFBIG);
589: }
590: /*
591: * I use nm_rsize, not nm_wsize so that all buffer cache blocks
592: * will be the same size within a filesystem. nfs_writerpc will
593: * still use nm_wsize when sizing the rpc's.
594: */
595: biosize = vp->v_mount->mnt_stat.f_iosize;
596: do {
597: /*
598: * Check for a valid write lease.
599: */
600: if ((nmp->nm_flag & NFSMNT_NQNFS) &&
601: NQNFS_CKINVALID(vp, np, ND_WRITE)) {
602: do {
603: error = nqnfs_getlease(vp, ND_WRITE, cred, p);
604: } while (error == NQNFS_EXPIRED);
605: if (error)
606: return (error);
607: if (np->n_lrev != np->n_brev ||
608: (np->n_flag & NQNFSNONCACHE)) {
609: error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
610: if (error)
611: return (error);
612: np->n_brev = np->n_lrev;
613: }
614: }
615: if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) {
616: iomode = NFSV3WRITE_FILESYNC;
617: error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit);
618: if (must_commit)
619: nfs_clearcommit(vp->v_mount);
620: return (error);
621: }
622: nfsstats.biocache_writes++;
623: lbn = uio->uio_offset / biosize;
624: on = uio->uio_offset & (biosize-1);
625: n = min((unsigned)(biosize - on), uio->uio_resid);
626: again:
627: if (uio->uio_offset + n > np->n_size) {
628: np->n_size = uio->uio_offset + n;
629: np->n_flag |= NMODIFIED;
630: #if MACH_NBC
631: if ((vp->v_type == VREG) && (vp->v_vm_info && !(vp->v_vm_info->mapped))) {
632: #endif /* MACH_NBC */
633: vnode_pager_setsize(vp, (u_long)np->n_size);
634: #if MACH_NBC
635: }
636: #endif /* MACH_NBC */
637:
638: }
639: bufsize = biosize;
640: if ((lbn + 1) * biosize > np->n_size) {
641: bufsize = np->n_size - lbn * biosize;
642: bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
643: }
644: bp = nfs_getwriteblk(vp, lbn, bufsize, p, cred, on, n);
645: if (!bp)
646: return (EINTR);
647: if (bp->b_wcred == NOCRED) {
648: crhold(cred);
649: bp->b_wcred = cred;
650: }
651: np->n_flag |= NMODIFIED;
652:
653: /*
654: * Check for valid write lease and get one as required.
655: * In case getblk() and/or bwrite() delayed us.
656: */
657: if ((nmp->nm_flag & NFSMNT_NQNFS) &&
658: NQNFS_CKINVALID(vp, np, ND_WRITE)) {
659: do {
660: error = nqnfs_getlease(vp, ND_WRITE, cred, p);
661: } while (error == NQNFS_EXPIRED);
662: if (error) {
663: brelse(bp);
664: return (error);
665: }
666: if (np->n_lrev != np->n_brev ||
667: (np->n_flag & NQNFSNONCACHE)) {
668: brelse(bp);
669: error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
670: if (error)
671: return (error);
672: np->n_brev = np->n_lrev;
673: goto again;
674: }
675: }
676: error = uiomove((char *)bp->b_data + on, n, uio);
677: if (error) {
678: bp->b_flags |= B_ERROR;
679: brelse(bp);
680: return (error);
681: }
682: if (bp->b_dirtyend > 0) {
683: bp->b_dirtyoff = min(on, bp->b_dirtyoff);
684: bp->b_dirtyend = max((on + n), bp->b_dirtyend);
685: } else {
686: bp->b_dirtyoff = on;
687: bp->b_dirtyend = on + n;
688: }
689: if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
690: bp->b_validoff > bp->b_dirtyend) {
691: bp->b_validoff = bp->b_dirtyoff;
692: bp->b_validend = bp->b_dirtyend;
693: } else {
694: bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
695: bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
696: }
697:
698: /*
699: * Since this block is being modified, it must be written
700: * again and not just committed.
701: */
702: bp->b_flags &= ~B_NEEDCOMMIT;
703:
704: /*
705: * If the lease is non-cachable or IO_SYNC do bwrite().
706: */
707: if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
708: bp->b_proc = p;
709: error = VOP_BWRITE(bp);
710: if (error)
711: return (error);
712: if (np->n_flag & NQNFSNONCACHE) {
713: error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
714: if (error)
715: return (error);
716: }
717: } else if ((n + on) == biosize &&
718: (nmp->nm_flag & NFSMNT_NQNFS) == 0) {
719: bp->b_proc = (struct proc *)0;
720: bp->b_flags |= B_ASYNC;
721: (void)nfs_writebp(bp, 0);
722: } else
723: bdwrite(bp);
724: } while (uio->uio_resid > 0 && n > 0);
725: return (0);
726: }
727:
728: /*
729: * Get a cache block for writing. The range to be written is
730: * (off..off+len) within the block. This routine ensures that the
731: * block is either has no dirty region or that the given range is
732: * contiguous with the existing dirty region.
733: */
734: static struct buf *
735: nfs_getwriteblk(vp, bn, size, p, cred, off, len)
736: struct vnode *vp;
737: daddr_t bn;
738: int size;
739: struct proc *p;
740: struct ucred *cred;
741: int off, len;
742: {
743: struct nfsnode *np = VTONFS(vp);
744: struct buf *bp;
745: int error;
746:
747: again:
748: bp = nfs_getcacheblk(vp, bn, size, p);
749: if (!bp)
750: return (NULL);
751: if (bp->b_wcred == NOCRED) {
752: crhold(cred);
753: bp->b_wcred = cred;
754: }
755:
756: if ((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend > np->n_size) {
757: bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE);
758: }
759:
760: /*
761: * If the new write will leave a contiguous dirty
762: * area, just update the b_dirtyoff and b_dirtyend,
763: * otherwise try to extend the dirty region.
764: */
765: if (bp->b_dirtyend > 0 &&
766: (off > bp->b_dirtyend || (off + len) < bp->b_dirtyoff)) {
767: struct iovec iov;
768: struct uio uio;
769: off_t boff, start, end;
770:
771: boff = ((off_t)bp->b_blkno) * DEV_BSIZE;
772: if (off > bp->b_dirtyend) {
773: start = boff + bp->b_validend;
774: end = boff + off;
775: } else {
776: start = boff + off + len;
777: end = boff + bp->b_validoff;
778: }
779:
780: /*
781: * It may be that the valid region in the buffer
782: * covers the region we want, in which case just
783: * extend the dirty region. Otherwise we try to
784: * extend the valid region.
785: */
786: if (end > start) {
787: uio.uio_iov = &iov;
788: uio.uio_iovcnt = 1;
789: uio.uio_offset = start;
790: uio.uio_resid = end - start;
791: uio.uio_segflg = UIO_SYSSPACE;
792: uio.uio_rw = UIO_READ;
793: uio.uio_procp = p;
794: iov.iov_base = bp->b_data + (start - boff);
795: iov.iov_len = end - start;
796: error = nfs_readrpc(vp, &uio, cred);
797: if (error) {
798: /*
799: * If we couldn't read, fall back to writing
800: * out the old dirty region.
801: */
802: bp->b_proc = p;
803: if (VOP_BWRITE(bp) == EINTR)
804: return (NULL);
805: goto again;
806: } else {
807: /*
808: * The read worked.
809: */
810: if (uio.uio_resid > 0) {
811: /*
812: * If there was a short read,
813: * just zero fill.
814: */
815: bzero(iov.iov_base,
816: uio.uio_resid);
817: }
818: if (off > bp->b_dirtyend)
819: bp->b_validend = off;
820: else
821: bp->b_validoff = off + len;
822: }
823: }
824:
825: /*
826: * We now have a valid region which extends up to the
827: * dirty region which we want.
828: */
829: if (off > bp->b_dirtyend)
830: bp->b_dirtyend = off;
831: else
832: bp->b_dirtyoff = off + len;
833: }
834:
835: return bp;
836: }
837:
838: /*
839: * Get an nfs cache block.
840: * Allocate a new one if the block isn't currently in the cache
841: * and return the block marked busy. If the calling process is
842: * interrupted by a signal for an interruptible mount point, return
843: * NULL.
844: */
845: static struct buf *
846: nfs_getcacheblk(vp, bn, size, p)
847: struct vnode *vp;
848: daddr_t bn;
849: int size;
850: struct proc *p;
851: {
852: register struct buf *bp;
853: struct nfsmount *nmp = VFSTONFS(vp->v_mount);
854: int biosize = vp->v_mount->mnt_stat.f_iosize;
855:
856: if (nmp->nm_flag & NFSMNT_INT) {
857: bp = getblk(vp, bn, size, PCATCH, 0);
858: while (bp == (struct buf *)0) {
859: if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
860: return ((struct buf *)0);
861: bp = getblk(vp, bn, size, 0, 2 * hz);
862: }
863: } else
864: bp = getblk(vp, bn, size, 0, 0);
865:
866: if( vp->v_type == VREG)
867: bp->b_blkno = (bn * biosize) / DEV_BSIZE;
868:
869: return (bp);
870: }
871:
872: /*
873: * Flush and invalidate all dirty buffers. If another process is already
874: * doing the flush, just wait for completion.
875: */
876: int
877: nfs_vinvalbuf(vp, flags, cred, p, intrflg)
878: struct vnode *vp;
879: int flags;
880: struct ucred *cred;
881: struct proc *p;
882: int intrflg;
883: {
884: register struct nfsnode *np = VTONFS(vp);
885: struct nfsmount *nmp = VFSTONFS(vp->v_mount);
886: int error = 0, slpflag, slptimeo;
887:
888: if ((nmp->nm_flag & NFSMNT_INT) == 0)
889: intrflg = 0;
890: if (intrflg) {
891: slpflag = PCATCH;
892: slptimeo = 2 * hz;
893: } else {
894: slpflag = 0;
895: slptimeo = 0;
896: }
897: /*
898: * First wait for any other process doing a flush to complete.
899: */
900: while (np->n_flag & NFLUSHINPROG) {
901: np->n_flag |= NFLUSHWANT;
902: error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
903: slptimeo);
904: if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
905: return (EINTR);
906: }
907:
908: /*
909: * Now, flush as required.
910: */
911: np->n_flag |= NFLUSHINPROG;
912: error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
913: while (error) {
914: if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
915: np->n_flag &= ~NFLUSHINPROG;
916: if (np->n_flag & NFLUSHWANT) {
917: np->n_flag &= ~NFLUSHWANT;
918: wakeup((caddr_t)&np->n_flag);
919: }
920: return (EINTR);
921: }
922: error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
923: }
924: np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
925: if (np->n_flag & NFLUSHWANT) {
926: np->n_flag &= ~NFLUSHWANT;
927: wakeup((caddr_t)&np->n_flag);
928: }
929: return (0);
930: }
931:
932: /*
933: * Initiate asynchronous I/O. Return an error if no nfsiods are available.
934: * This is mainly to avoid queueing async I/O requests when the nfsiods
935: * are all hung on a dead server.
936: */
937: int
938: nfs_asyncio(bp, cred)
939: register struct buf *bp;
940: struct ucred *cred;
941: {
942: struct nfsmount *nmp;
943: int i;
944: int gotiod;
945: int slpflag = 0;
946: int slptimeo = 0;
947: int error;
948:
949: if (nfs_numasync == 0)
950: return (EIO);
951:
952: nmp = VFSTONFS(bp->b_vp->v_mount);
953: again:
954: if (nmp->nm_flag & NFSMNT_INT)
955: slpflag = PCATCH;
956: gotiod = FALSE;
957:
958: /*
959: * Find a free iod to process this request.
960: */
961: for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
962: if (nfs_iodwant[i]) {
963: /*
964: * Found one, so wake it up and tell it which
965: * mount to process.
966: */
967: NFS_DPF(ASYNCIO,
968: ("nfs_asyncio: waking iod %d for mount %p\n",
969: i, nmp));
970: nfs_iodwant[i] = (struct proc *)0;
971: nfs_iodmount[i] = nmp;
972: nmp->nm_bufqiods++;
973: wakeup((caddr_t)&nfs_iodwant[i]);
974: gotiod = TRUE;
975: break;
976: }
977:
978: /*
979: * If none are free, we may already have an iod working on this mount
980: * point. If so, it will process our request.
981: */
982: if (!gotiod) {
983: if (nmp->nm_bufqiods > 0) {
984: NFS_DPF(ASYNCIO,
985: ("nfs_asyncio: %d iods are already processing mount %p\n",
986: nmp->nm_bufqiods, nmp));
987: gotiod = TRUE;
988: }
989: }
990:
991: /*
992: * If we have an iod which can process the request, then queue
993: * the buffer.
994: */
995: if (gotiod) {
996: /*
997: * Ensure that the queue never grows too large.
998: */
999: while (nmp->nm_bufqlen >= 2*nfs_numasync) {
1000: NFS_DPF(ASYNCIO,
1001: ("nfs_asyncio: waiting for mount %p queue to drain\n", nmp));
1002: nmp->nm_bufqwant = TRUE;
1003: error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO,
1004: "nfsaio", slptimeo);
1005: if (error) {
1006: if (nfs_sigintr(nmp, NULL, bp->b_proc))
1007: return (EINTR);
1008: if (slpflag == PCATCH) {
1009: slpflag = 0;
1010: slptimeo = 2 * hz;
1011: }
1012: }
1013: /*
1014: * We might have lost our iod while sleeping,
1015: * so check and loop if nescessary.
1016: */
1017: if (nmp->nm_bufqiods == 0) {
1018: NFS_DPF(ASYNCIO,
1019: ("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp));
1020: goto again;
1021: }
1022: }
1023:
1024: if (bp->b_flags & B_READ) {
1025: if (bp->b_rcred == NOCRED && cred != NOCRED) {
1026: crhold(cred);
1027: bp->b_rcred = cred;
1028: }
1029: } else {
1030: bp->b_flags |= B_WRITEINPROG;
1031: if (bp->b_wcred == NOCRED && cred != NOCRED) {
1032: crhold(cred);
1033: bp->b_wcred = cred;
1034: }
1035: }
1036:
1037: TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
1038: nmp->nm_bufqlen++;
1039: return (0);
1040: }
1041:
1042: /*
1043: * All the iods are busy on other mounts, so return EIO to
1044: * force the caller to process the i/o synchronously.
1045: */
1046: NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n"));
1047: return (EIO);
1048: }
1049:
1050: /*
1051: * Do an I/O operation to/from a cache block. This may be called
1052: * synchronously or from an nfsiod.
1053: */
1054: int
1055: nfs_doio(bp, cr, p)
1056: register struct buf *bp;
1057: struct ucred *cr;
1058: struct proc *p;
1059: {
1060: register struct uio *uiop;
1061: register struct vnode *vp;
1062: struct nfsnode *np;
1063: struct nfsmount *nmp;
1064: int error = 0, diff, len, iomode, must_commit = 0;
1065: struct uio uio;
1066: struct iovec io;
1067:
1068: vp = bp->b_vp;
1069: NFSTRACE(NFSTRC_DIO, vp);
1070: np = VTONFS(vp);
1071: nmp = VFSTONFS(vp->v_mount);
1072: uiop = &uio;
1073: uiop->uio_iov = &io;
1074: uiop->uio_iovcnt = 1;
1075: uiop->uio_segflg = UIO_SYSSPACE;
1076: uiop->uio_procp = p;
1077:
1078: /*
1079: * Historically, paging was done with physio, but no more.
1080: */
1081: if (bp->b_flags & B_PHYS) {
1082: /*
1083: * ...though reading /dev/drum still gets us here.
1084: */
1085: io.iov_len = uiop->uio_resid = bp->b_bcount;
1086: /* mapping was done by vmapbuf() */
1087: io.iov_base = bp->b_data;
1088: uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
1089: if (bp->b_flags & B_READ) {
1090: uiop->uio_rw = UIO_READ;
1091: nfsstats.read_physios++;
1092: error = nfs_readrpc(vp, uiop, cr);
1093: } else {
1094: int com;
1095:
1096: iomode = NFSV3WRITE_DATASYNC;
1097: uiop->uio_rw = UIO_WRITE;
1098: nfsstats.write_physios++;
1099: error = nfs_writerpc(vp, uiop, cr, &iomode, &com);
1100: }
1101: if (error) {
1102: bp->b_flags |= B_ERROR;
1103: bp->b_error = error;
1104: }
1105: } else if (bp->b_flags & B_READ) {
1106: io.iov_len = uiop->uio_resid = bp->b_bcount;
1107: io.iov_base = bp->b_data;
1108: uiop->uio_rw = UIO_READ;
1109: switch (vp->v_type) {
1110: case VREG:
1111: uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
1112: nfsstats.read_bios++;
1113: error = nfs_readrpc(vp, uiop, cr);
1114: if (!error) {
1115: bp->b_validoff = 0;
1116: if (uiop->uio_resid) {
1117: /*
1118: * If len > 0, there is a hole in the file and
1119: * no writes after the hole have been pushed to
1120: * the server yet.
1121: * Just zero fill the rest of the valid area.
1122: */
1123: diff = bp->b_bcount - uiop->uio_resid;
1124: len = np->n_size - (((u_quad_t)bp->b_blkno) * DEV_BSIZE
1125: + diff);
1126: if (len > 0) {
1127: len = min(len, uiop->uio_resid);
1128: bzero((char *)bp->b_data + diff, len);
1129: bp->b_validend = diff + len;
1130: } else
1131: bp->b_validend = diff;
1132: } else
1133: bp->b_validend = bp->b_bcount;
1134: }
1135: if (p && (vp->v_flag & VTEXT) &&
1136: (((nmp->nm_flag & NFSMNT_NQNFS) &&
1137: NQNFS_CKINVALID(vp, np, ND_READ) &&
1138: np->n_lrev != np->n_brev) ||
1139: (!(nmp->nm_flag & NFSMNT_NQNFS) &&
1140: np->n_mtime != np->n_vattr.va_mtime.tv_sec))) {
1141: uprintf("Process killed due to text file modification\n");
1142: psignal(p, SIGKILL);
1143: p->p_flag |= P_NOSWAP;
1144: }
1145: break;
1146: case VLNK:
1147: uiop->uio_offset = (off_t)0;
1148: nfsstats.readlink_bios++;
1149: error = nfs_readlinkrpc(vp, uiop, cr);
1150: break;
1151: case VDIR:
1152: nfsstats.readdir_bios++;
1153: uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ;
1154: if (!(nmp->nm_flag & NFSMNT_NFSV3))
1155: nmp->nm_flag &= ~NFSMNT_RDIRPLUS; /* [email protected] */
1156: if (nmp->nm_flag & NFSMNT_RDIRPLUS) {
1157: error = nfs_readdirplusrpc(vp, uiop, cr);
1158: if (error == NFSERR_NOTSUPP)
1159: nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
1160: }
1161: if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0)
1162: error = nfs_readdirrpc(vp, uiop, cr);
1163: break;
1164: default:
1165: printf("nfs_doio: type %x unexpected\n",vp->v_type);
1166: break;
1167: };
1168: if (error) {
1169: bp->b_flags |= B_ERROR;
1170: bp->b_error = error;
1171: }
1172: } else {
1173: if (((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend) > np->n_size)
1174: bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE);
1175:
1176: if (bp->b_dirtyend > bp->b_dirtyoff) {
1177: io.iov_len = uiop->uio_resid = bp->b_dirtyend
1178: - bp->b_dirtyoff;
1179: uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE
1180: + bp->b_dirtyoff;
1181: io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
1182: uiop->uio_rw = UIO_WRITE;
1183: nfsstats.write_bios++;
1184: /* XXX CSM 12/3/97 Revisit when buffer cache upgraded */
1185: #ifdef notyet
1186: if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == B_ASYNC)
1187: #else
1188: if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC)
1189: #endif
1190: iomode = NFSV3WRITE_UNSTABLE;
1191: else
1192: iomode = NFSV3WRITE_FILESYNC;
1193: bp->b_flags |= B_WRITEINPROG;
1194: error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit);
1195: if (!error && iomode == NFSV3WRITE_UNSTABLE) {
1196: bp->b_flags |= B_NEEDCOMMIT;
1197: /* XXX CSM 12/3/97 Revisit when buffer cache upgraded */
1198: #ifdef notyet
1199: if (bp->b_dirtyoff == 0
1200: && bp->b_dirtyend == bp->b_bufsize)
1201: bp->b_flags |= B_CLUSTEROK;
1202: #endif
1203: } else
1204: bp->b_flags &= ~B_NEEDCOMMIT;
1205: bp->b_flags &= ~B_WRITEINPROG;
1206:
1207: /*
1208: * For an interrupted write, the buffer is still valid
1209: * and the write hasn't been pushed to the server yet,
1210: * so we can't set B_ERROR and report the interruption
1211: * by setting B_EINTR. For the B_ASYNC case, B_EINTR
1212: * is not relevant, so the rpc attempt is essentially
1213: * a noop. For the case of a V3 write rpc not being
1214: * committed to stable storage, the block is still
1215: * dirty and requires either a commit rpc or another
1216: * write rpc with iomode == NFSV3WRITE_FILESYNC before
1217: * the block is reused. This is indicated by setting
1218: * the B_DELWRI and B_NEEDCOMMIT flags.
1219: */
1220: if (error == EINTR
1221: || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
1222: int s;
1223:
1224: bp->b_flags &= ~(B_INVAL|B_NOCACHE);
1225: /* XXX CSM 12/3/97 Revisit when buffer cache upgraded */
1226: #ifdef notyet
1227: ++numdirtybuffers;
1228: #endif
1229: bp->b_flags |= B_DELWRI;
1230:
1231: /*
1232: * Since for the B_ASYNC case, nfs_bwrite() has reassigned the
1233: * buffer to the clean list, we have to reassign it back to the
1234: * dirty one. Ugh.
1235: */
1236: if (bp->b_flags & B_ASYNC) {
1237: s = splbio();
1238: reassignbuf(bp, vp);
1239: splx(s);
1240: } else
1241: bp->b_flags |= B_EINTR;
1242: } else {
1243: if (error) {
1244: bp->b_flags |= B_ERROR;
1245: bp->b_error = np->n_error = error;
1246: np->n_flag |= NWRITEERR;
1247: }
1248: bp->b_dirtyoff = bp->b_dirtyend = 0;
1249: }
1250: } else {
1251: bp->b_resid = 0;
1252: biodone(bp);
1253: NFSTRACE(NFSTRC_DIO_DONE, vp);
1254: return (0);
1255: }
1256: }
1257: bp->b_resid = uiop->uio_resid;
1258: if (must_commit)
1259: nfs_clearcommit(vp->v_mount);
1260: biodone(bp);
1261: NFSTRACE(NFSTRC_DIO_DONE, vp);
1262: return (error);
1263: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.