|
|
1.1 root 1: /*
2: * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3: *
4: * @APPLE_LICENSE_HEADER_START@
5: *
6: * The contents of this file constitute Original Code as defined in and
7: * are subject to the Apple Public Source License Version 1.1 (the
8: * "License"). You may not use this file except in compliance with the
9: * License. Please obtain a copy of the License at
10: * http://www.apple.com/publicsource and read it before using this file.
11: *
12: * This Original Code and all software distributed under the License are
13: * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14: * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15: * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16: * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17: * License for the specific language governing rights and limitations
18: * under the License.
19: *
20: * @APPLE_LICENSE_HEADER_END@
21: */
22: /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23: /*
24: * Copyright (c) 1989, 1991, 1993, 1995
25: * The Regents of the University of California. All rights reserved.
26: *
27: * This code is derived from software contributed to Berkeley by
28: * Rick Macklem at The University of Guelph.
29: *
30: * Redistribution and use in source and binary forms, with or without
31: * modification, are permitted provided that the following conditions
32: * are met:
33: * 1. Redistributions of source code must retain the above copyright
34: * notice, this list of conditions and the following disclaimer.
35: * 2. Redistributions in binary form must reproduce the above copyright
36: * notice, this list of conditions and the following disclaimer in the
37: * documentation and/or other materials provided with the distribution.
38: * 3. All advertising materials mentioning features or use of this software
39: * must display the following acknowledgement:
40: * This product includes software developed by the University of
41: * California, Berkeley and its contributors.
42: * 4. Neither the name of the University nor the names of its contributors
43: * may be used to endorse or promote products derived from this software
44: * without specific prior written permission.
45: *
46: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56: * SUCH DAMAGE.
57: *
58: * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
59: * FreeBSD-Id: nfs_socket.c,v 1.30 1997/10/28 15:59:07 bde Exp $
60: */
61:
62: /*
63: * Socket operations for use by nfs
64: */
65:
66: #include <sys/param.h>
67: #include <sys/systm.h>
68: #include <sys/proc.h>
69: #include <sys/mount.h>
70: #include <sys/kernel.h>
71: #include <sys/mbuf.h>
72: #include <sys/malloc.h>
73: #include <sys/vnode.h>
74: #include <sys/domain.h>
75: #include <sys/protosw.h>
76: #include <sys/socket.h>
77: #include <sys/socketvar.h>
78: #include <sys/syslog.h>
79: #include <sys/tprintf.h>
80: #include <machine/spl.h>
81:
82: #include <netinet/in.h>
83: #include <netinet/tcp.h>
84:
85: #include <nfs/rpcv2.h>
86: #include <nfs/nfsproto.h>
87: #include <nfs/nfs.h>
88: #include <nfs/xdr_subs.h>
89: #include <nfs/nfsm_subs.h>
90: #include <nfs/nfsmount.h>
91: #include <nfs/nfsnode.h>
92: #include <nfs/nfsrtt.h>
93: #include <nfs/nqnfs.h>
94:
95: #define TRUE 1
96: #define FALSE 0
97:
98: /*
99: * Estimate rto for an nfs rpc sent via. an unreliable datagram.
100: * Use the mean and mean deviation of rtt for the appropriate type of rpc
101: * for the frequent rpcs and a default for the others.
102: * The justification for doing "other" this way is that these rpcs
103: * happen so infrequently that timer est. would probably be stale.
104: * Also, since many of these rpcs are
105: * non-idempotent, a conservative timeout is desired.
106: * getattr, lookup - A+2D
107: * read, write - A+4D
108: * other - nm_timeo
109: */
110: #define NFS_RTO(n, t) \
111: ((t) == 0 ? (n)->nm_timeo : \
112: ((t) < 3 ? \
113: (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
114: ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
115: #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
116: #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
117: /*
118: * External data, mostly RPC constants in XDR form
119: */
120: extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
121: rpc_msgaccepted, rpc_call, rpc_autherr,
122: rpc_auth_kerb;
123: extern u_long nfs_prog, nqnfs_prog;
124: extern time_t nqnfsstarttime;
125: extern struct nfsstats nfsstats;
126: extern int nfsv3_procid[NFS_NPROCS];
127: extern int nfs_ticks;
128:
129: /*
130: * Defines which timer to use for the procnum.
131: * 0 - default
132: * 1 - getattr
133: * 2 - lookup
134: * 3 - read
135: * 4 - write
136: */
137: static int proct[NFS_NPROCS] = {
138: 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0,
139: 0, 0, 0,
140: };
141:
142: /*
143: * There is a congestion window for outstanding rpcs maintained per mount
144: * point. The cwnd size is adjusted in roughly the way that:
145: * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
146: * SIGCOMM '88". ACM, August 1988.
147: * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
148: * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
149: * of rpcs is in progress.
150: * (The sent count and cwnd are scaled for integer arith.)
151: * Variants of "slow start" were tried and were found to be too much of a
152: * performance hit (ave. rtt 3 times larger),
153: * I suspect due to the large rtt that nfs rpcs have.
154: */
155: #define NFS_CWNDSCALE 256
156: #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
157: static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
158: int nfsrtton = 0;
159: struct nfsrtt nfsrtt;
160:
161: static int nfs_msg __P((struct proc *,char *,char *));
162: static int nfs_rcvlock __P((struct nfsreq *));
163: static void nfs_rcvunlock __P((int *flagp));
164: static int nfs_receive __P((struct nfsreq *rep, struct mbuf **aname,
165: struct mbuf **mp));
166: static int nfs_reconnect __P((struct nfsreq *rep));
167: #ifndef NFS_NOSERVER
168: static int nfsrv_getstream __P((struct nfssvc_sock *,int));
169:
170: int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd,
171: struct nfssvc_sock *slp,
172: struct proc *procp,
173: struct mbuf **mreqp)) = {
174: nfsrv_null,
175: nfsrv_getattr,
176: nfsrv_setattr,
177: nfsrv_lookup,
178: nfsrv3_access,
179: nfsrv_readlink,
180: nfsrv_read,
181: nfsrv_write,
182: nfsrv_create,
183: nfsrv_mkdir,
184: nfsrv_symlink,
185: nfsrv_mknod,
186: nfsrv_remove,
187: nfsrv_rmdir,
188: nfsrv_rename,
189: nfsrv_link,
190: nfsrv_readdir,
191: nfsrv_readdirplus,
192: nfsrv_statfs,
193: nfsrv_fsinfo,
194: nfsrv_pathconf,
195: nfsrv_commit,
196: nqnfsrv_getlease,
197: nqnfsrv_vacated,
198: nfsrv_noop,
199: nfsrv_noop
200: };
201: #endif /* NFS_NOSERVER */
202:
203: #if DIAGNOSTIC
204: uint nfstraceindx = 0;
205: struct nfstracerec nfstracebuf[NFSTBUFSIZ] = {{0,0,0,0}};
206: uint nfstracemask = 0; /* enables trace points 0 to 31 (right to left) */
207:
208: int nfsprnttimo = 1;
209:
210: int nfsodata[1024];
211: int nfsoprocnum, nfsolen;
212: int nfsbt[32], nfsbtlen;
213:
214:
215: #if defined(__ppc__)
216: int
217: backtrace(int *where, int size)
218: {
219: int register sp, *fp, numsaved;
220:
221: __asm__ volatile("mr %0,r1" : "=r" (sp));
222:
223: fp = (int *)*((int *)sp);
224: size /= sizeof(int);
225: for (numsaved = 0; numsaved < size; numsaved++) {
226: *where++ = fp[2];
227: if ((int)fp <= 0)
228: break;
229: fp = (int *)*fp;
230: }
231: return (numsaved);
232: }
233: #elif defined(__i386__)
234: int
235: backtrace()
236: {
237: return(0); /* Till someone implements a real routine */
238: }
239: #else
240: #error architecture not implemented.
241: #endif
242:
243: void
244: nfsdup(struct nfsreq *rep)
245: {
246: int *ip, i, first = 1, end;
247: char *s, b[240];
248: struct mbuf *mb;
249:
250: if ((nfs_debug & NFS_DEBUG_DUP) == 0)
251: return;
252: /* last mbuf in chain will be nfs content */
253: for (mb = rep->r_mreq; mb->m_next; mb = mb->m_next)
254: ;
255: if (rep->r_procnum == nfsoprocnum && mb->m_len == nfsolen &&
256: !bcmp((caddr_t)nfsodata, mb->m_data, nfsolen)) {
257: s = b + sprintf(b, "nfsdup x=%x p=%d h=", rep->r_xid,
258: rep->r_procnum);
259: end = (int)(VTONFS(rep->r_vp)->n_fhp);
260: ip = (int *)(end & ~3);
261: end += VTONFS(rep->r_vp)->n_fhsize;
262: while ((int)ip < end) {
263: i = *ip++;
264: if (first) { /* avoid leading zeroes */
265: if (i == 0)
266: continue;
267: first = 0;
268: s += sprintf(s, "%x", i);
269: } else
270: s += sprintf(s, "%08x", i);
271: }
272: if (first)
273: sprintf(s, "%x", 0);
274: else /* eliminate trailing zeroes */
275: while (*--s == '0')
276: *s = 0;
277: /*
278: * set a breakpoint here and you can view the
279: * current backtrace and the one saved in nfsbt
280: */
281: kprintf("%s\n", b);
282: }
283: nfsoprocnum = rep->r_procnum;
284: nfsolen = mb->m_len;
285: bcopy(mb->m_data, (caddr_t)nfsodata, mb->m_len);
286: nfsbtlen = backtrace(&nfsbt, sizeof(nfsbt));
287: }
288: #endif
289:
290: /*
291: * Initialize sockets and congestion for a new NFS connection.
292: * We do not free the sockaddr if error.
293: */
294: int
295: nfs_connect(nmp, rep)
296: register struct nfsmount *nmp;
297: struct nfsreq *rep;
298: {
299: register struct socket *so;
300: int s, error, rcvreserve, sndreserve;
301: struct sockaddr *saddr;
302: struct sockaddr_in sin;
303: u_short tport;
304:
305: nmp->nm_so = (struct socket *)0;
306: saddr = mtod(nmp->nm_nam, struct sockaddr *);
307: error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype,
308: nmp->nm_soproto);
309: if (error)
310: goto bad;
311: so = nmp->nm_so;
312: nmp->nm_soflags = so->so_proto->pr_flags;
313:
314: /*
315: * Some servers require that the client port be a reserved port number.
316: */
317: if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
318: sin.sin_len = sizeof (struct sockaddr_in);
319: sin.sin_family = AF_INET;
320: sin.sin_addr.s_addr = INADDR_ANY;
321: tport = IPPORT_RESERVED - 1;
322: sin.sin_port = htons(tport);
323: while ((error = sobind(so, (struct sockaddr *) &sin) == EADDRINUSE) &&
324: (--tport > IPPORT_RESERVED / 2))
325: sin.sin_port = htons(tport);
326:
327: if (error)
328: goto bad;
329: }
330:
331: /*
332: * Protocols that do not require connections may be optionally left
333: * unconnected for servers that reply from a port other than NFS_PORT.
334: */
335: if (nmp->nm_flag & NFSMNT_NOCONN) {
336: if (nmp->nm_soflags & PR_CONNREQUIRED) {
337: error = ENOTCONN;
338: goto bad;
339: }
340: } else {
341: error = soconnect(so, mtod(nmp->nm_nam, struct sockaddr *));
342: if (error)
343: goto bad;
344:
345: /*
346: * Wait for the connection to complete. Cribbed from the
347: * connect system call but with the wait timing out so
348: * that interruptible mounts don't hang here for a long time.
349: */
350: s = splnet();
351: while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
352: (void) tsleep((caddr_t)&so->so_timeo, PSOCK,
353: "nfscon", 2 * hz);
354: if ((so->so_state & SS_ISCONNECTING) &&
355: so->so_error == 0 && rep &&
356: (error = nfs_sigintr(nmp, rep, rep->r_procp))) {
357: so->so_state &= ~SS_ISCONNECTING;
358: splx(s);
359: goto bad;
360: }
361: }
362: if (so->so_error) {
363: error = so->so_error;
364: so->so_error = 0;
365: splx(s);
366: goto bad;
367: }
368: splx(s);
369: }
370: if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
371: so->so_rcv.sb_timeo = (5 * hz);
372: so->so_snd.sb_timeo = (5 * hz);
373: } else {
374: so->so_rcv.sb_timeo = 0;
375: so->so_snd.sb_timeo = 0;
376: }
377: if (nmp->nm_sotype == SOCK_DGRAM) {
378: sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
379: rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2;
380: } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
381: sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
382: rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2;
383: } else {
384: if (nmp->nm_sotype != SOCK_STREAM)
385: panic("nfscon sotype");
386:
387: if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
388: struct sockopt sopt;
389: int val;
390:
391: bzero(&sopt, sizeof sopt);
392: sopt.sopt_level = SOL_SOCKET;
393: sopt.sopt_name = SO_KEEPALIVE;
394: sopt.sopt_val = &val;
395: sopt.sopt_valsize = sizeof val;
396: val = 1;
397: sosetopt(so, &sopt);
398: }
399: if (so->so_proto->pr_protocol == IPPROTO_TCP) {
400: struct sockopt sopt;
401: int val;
402:
403: bzero(&sopt, sizeof sopt);
404: sopt.sopt_level = IPPROTO_TCP;
405: sopt.sopt_name = TCP_NODELAY;
406: sopt.sopt_val = &val;
407: sopt.sopt_valsize = sizeof val;
408: val = 1;
409: sosetopt(so, &sopt);
410: }
411:
412: sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long))
413: * 2;
414: rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long))
415: * 2;
416: }
417: error = soreserve(so, sndreserve, rcvreserve);
418: if (error)
419: goto bad;
420: so->so_rcv.sb_flags |= SB_NOINTR;
421: so->so_snd.sb_flags |= SB_NOINTR;
422:
423: /* Initialize other non-zero congestion variables */
424: nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
425: nmp->nm_srtt[3] = (NFS_TIMEO << 3);
426: nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
427: nmp->nm_sdrtt[3] = 0;
428: nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
429: nmp->nm_sent = 0;
430: nmp->nm_timeouts = 0;
431: return (0);
432:
433: bad:
434: nfs_disconnect(nmp);
435: return (error);
436: }
437:
438: /*
439: * Reconnect routine:
440: * Called when a connection is broken on a reliable protocol.
441: * - clean up the old socket
442: * - nfs_connect() again
443: * - set R_MUSTRESEND for all outstanding requests on mount point
444: * If this fails the mount point is DEAD!
445: * nb: Must be called with the nfs_sndlock() set on the mount point.
446: */
447: static int
448: nfs_reconnect(rep)
449: register struct nfsreq *rep;
450: {
451: register struct nfsreq *rp;
452: register struct nfsmount *nmp = rep->r_nmp;
453: int error;
454:
455: nfs_disconnect(nmp);
456: while ((error = nfs_connect(nmp, rep))) {
457: if (error == EINTR || error == ERESTART)
458: return (EINTR);
459: (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
460: }
461:
462: NFS_DPF(DUP, ("nfs_reconnect RESEND\n"));
463: /*
464: * Loop through outstanding request list and fix up all requests
465: * on old socket.
466: */
467: for (rp = nfs_reqq.tqh_first; rp != 0; rp = rp->r_chain.tqe_next) {
468: if (rp->r_nmp == nmp)
469: rp->r_flags |= R_MUSTRESEND;
470: }
471: return (0);
472: }
473:
474: /*
475: * NFS disconnect. Clean up and unlink.
476: */
477: void
478: nfs_disconnect(nmp)
479: register struct nfsmount *nmp;
480: {
481: register struct socket *so;
482:
483: if (nmp->nm_so) {
484: so = nmp->nm_so;
485: nmp->nm_so = (struct socket *)0;
486: soshutdown(so, 2);
487: soclose(so);
488: }
489: }
490:
491: /*
492: * This is the nfs send routine. For connection based socket types, it
493: * must be called with an nfs_sndlock() on the socket.
494: * "rep == NULL" indicates that it has been called from a server.
495: * For the client side:
496: * - return EINTR if the RPC is terminated, 0 otherwise
497: * - set R_MUSTRESEND if the send fails for any reason
498: * - do any cleanup required by recoverable socket errors (???)
499: * For the server side:
500: * - return EINTR or ERESTART if interrupted by a signal
501: * - return EPIPE if a connection is lost for connection based sockets (TCP...)
502: * - do any cleanup required by recoverable socket errors (???)
503: */
504: int
505: nfs_send(so, nam, top, rep)
506: register struct socket *so;
507: struct mbuf *nam;
508: register struct mbuf *top;
509: struct nfsreq *rep;
510: {
511: struct sockaddr *sendnam;
512: int error, soflags, flags;
513:
514: if (rep) {
515: if (rep->r_flags & R_SOFTTERM) {
516: m_freem(top);
517: return (EINTR);
518: }
519: if ((so = rep->r_nmp->nm_so) == NULL) {
520: rep->r_flags |= R_MUSTRESEND;
521: m_freem(top);
522: return (0);
523: }
524: rep->r_flags &= ~R_MUSTRESEND;
525: soflags = rep->r_nmp->nm_soflags;
526: } else
527: soflags = so->so_proto->pr_flags;
528: if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED) ||
529: (nam == 0))
530: sendnam = (struct sockaddr *)0;
531: else
532: sendnam = mtod(nam, struct sockaddr *);
533:
534: if (so->so_type == SOCK_SEQPACKET)
535: flags = MSG_EOR;
536: else
537: flags = 0;
538:
539: #if DIAGNOSTIC
540: if (rep)
541: nfsdup(rep);
542: #endif
543: error = sosend(so, sendnam, (struct uio *)0, top,
544: (struct mbuf *)0, flags);
545: if (error) {
546: if (rep) {
547: log(LOG_INFO, "nfs send error %d for server %s\n",
548: error,
549: rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
550: /*
551: * Deal with errors for the client side.
552: */
553: if (rep->r_flags & R_SOFTTERM)
554: error = EINTR;
555: else {
556: rep->r_flags |= R_MUSTRESEND;
557: NFS_DPF(DUP,
558: ("nfs_send RESEND error=%d\n",
559: error));
560: }
561: } else
562: log(LOG_INFO, "nfsd send error %d\n", error);
563:
564: /*
565: * Handle any recoverable (soft) socket errors here. (???)
566: */
567: if (error != EINTR && error != ERESTART &&
568: error != EWOULDBLOCK && error != EPIPE)
569: error = 0;
570: }
571: return (error);
572: }
573:
574: /*
575: * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
576: * done by soreceive(), but for SOCK_STREAM we must deal with the Record
577: * Mark and consolidate the data into a new mbuf list.
578: * nb: Sometimes TCP passes the data up to soreceive() in long lists of
579: * small mbufs.
580: * For SOCK_STREAM we must be very careful to read an entire record once
581: * we have read any of it, even if the system call has been interrupted.
582: */
583: static int
584: nfs_receive(rep, aname, mp)
585: register struct nfsreq *rep;
586: struct mbuf **aname;
587: struct mbuf **mp;
588: {
589: register struct socket *so;
590: struct uio auio;
591: struct iovec aio;
592: register struct mbuf *m;
593: struct mbuf *control;
594: u_long len;
595: struct sockaddr **getnam;
596: struct sockaddr *tmp_nam;
597: struct mbuf *mhck;
598: struct sockaddr_in *sin;
599: int error, sotype, rcvflg;
600: struct proc *p = current_proc(); /* XXX */
601:
602: /*
603: * Set up arguments for soreceive()
604: */
605: *mp = (struct mbuf *)0;
606: *aname = (struct mbuf *)0;
607: sotype = rep->r_nmp->nm_sotype;
608:
609: /*
610: * For reliable protocols, lock against other senders/receivers
611: * in case a reconnect is necessary.
612: * For SOCK_STREAM, first get the Record Mark to find out how much
613: * more there is to get.
614: * We must lock the socket against other receivers
615: * until we have an entire rpc request/reply.
616: */
617: if (sotype != SOCK_DGRAM) {
618: error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
619: if (error)
620: return (error);
621: tryagain:
622: /*
623: * Check for fatal errors and resending request.
624: */
625: /*
626: * Ugh: If a reconnect attempt just happened, nm_so
627: * would have changed. NULL indicates a failed
628: * attempt that has essentially shut down this
629: * mount point.
630: */
631: if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
632: nfs_sndunlock(&rep->r_nmp->nm_flag);
633: return (EINTR);
634: }
635: so = rep->r_nmp->nm_so;
636: if (!so) {
637: error = nfs_reconnect(rep);
638: if (error) {
639: nfs_sndunlock(&rep->r_nmp->nm_flag);
640: return (error);
641: }
642: goto tryagain;
643: }
644: while (rep->r_flags & R_MUSTRESEND) {
645: m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
646: nfsstats.rpcretries++;
647: NFS_DPF(DUP,
648: ("nfs_receive RESEND %s\n",
649: rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname));
650: error = nfs_send(so, rep->r_nmp->nm_nam, m, rep);
651: if (error) {
652: if (error == EINTR || error == ERESTART ||
653: (error = nfs_reconnect(rep))) {
654: nfs_sndunlock(&rep->r_nmp->nm_flag);
655: return (error);
656: }
657: goto tryagain;
658: }
659: }
660: nfs_sndunlock(&rep->r_nmp->nm_flag);
661: if (sotype == SOCK_STREAM) {
662: aio.iov_base = (caddr_t) &len;
663: aio.iov_len = sizeof(u_long);
664: auio.uio_iov = &aio;
665: auio.uio_iovcnt = 1;
666: auio.uio_segflg = UIO_SYSSPACE;
667: auio.uio_rw = UIO_READ;
668: auio.uio_offset = 0;
669: auio.uio_resid = sizeof(u_long);
670: auio.uio_procp = p;
671: do {
672: rcvflg = MSG_WAITALL;
673: error = soreceive(so, (struct sockaddr **)0, &auio,
674: (struct mbuf **)0, (struct mbuf **)0, &rcvflg);
675: if (!rep->r_nmp) /* if unmounted then bailout */
676: goto shutout;
677: if (error == EWOULDBLOCK && rep) {
678: if (rep->r_flags & R_SOFTTERM)
679: return (EINTR);
680: }
681: } while (error == EWOULDBLOCK);
682: if (!error && auio.uio_resid > 0) {
683: log(LOG_INFO,
684: "short receive (%d/%d) from nfs server %s\n",
685: sizeof(u_long) - auio.uio_resid,
686: sizeof(u_long),
687: rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
688: error = EPIPE;
689: }
690: if (error)
691: goto errout;
692: len = ntohl(len) & ~0x80000000;
693: /*
694: * This is SERIOUS! We are out of sync with the sender
695: * and forcing a disconnect/reconnect is all I can do.
696: */
697: if (len > NFS_MAXPACKET) {
698: log(LOG_ERR, "%s (%d) from nfs server %s\n",
699: "impossible packet length",
700: len,
701: rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
702: error = EFBIG;
703: goto errout;
704: }
705: auio.uio_resid = len;
706: do {
707: rcvflg = MSG_WAITALL;
708: error = soreceive(so, (struct sockaddr **)0,
709: &auio, mp, (struct mbuf **)0, &rcvflg);
710: if (!rep->r_nmp) /* if unmounted then bailout */
711: goto shutout;
712: } while (error == EWOULDBLOCK || error == EINTR ||
713: error == ERESTART);
714: if (!error && auio.uio_resid > 0) {
715: log(LOG_INFO,
716: "short receive (%d/%d) from nfs server %s\n",
717: len - auio.uio_resid, len,
718: rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
719: error = EPIPE;
720: }
721: } else {
722: /*
723: * NB: Since uio_resid is big, MSG_WAITALL is ignored
724: * and soreceive() will return when it has either a
725: * control msg or a data msg.
726: * We have no use for control msg., but must grab them
727: * and then throw them away so we know what is going
728: * on.
729: */
730: auio.uio_resid = len = 100000000; /* Anything Big */
731: auio.uio_procp = p;
732: do {
733: rcvflg = 0;
734: error = soreceive(so, (struct sockaddr **)0,
735: &auio, mp, &control, &rcvflg);
736: if (!rep->r_nmp) /* if unmounted then bailout */
737: goto shutout;
738: if (control)
739: m_freem(control);
740: if (error == EWOULDBLOCK && rep) {
741: if (rep->r_flags & R_SOFTTERM)
742: return (EINTR);
743: }
744: } while (error == EWOULDBLOCK ||
745: (!error && *mp == NULL && control));
746: if ((rcvflg & MSG_EOR) == 0)
747: printf("Egad!!\n");
748: if (!error && *mp == NULL)
749: error = EPIPE;
750: len -= auio.uio_resid;
751: }
752: errout:
753: if (error && error != EINTR && error != ERESTART) {
754: m_freem(*mp);
755: *mp = (struct mbuf *)0;
756: if (error != EPIPE)
757: log(LOG_INFO,
758: "receive error %d from nfs server %s\n",
759: error,
760: rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
761: error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
762: if (!error)
763: error = nfs_reconnect(rep);
764: if (!error)
765: goto tryagain;
766: }
767: } else {
768: if ((so = rep->r_nmp->nm_so) == NULL)
769: return (EACCES);
770: if (so->so_state & SS_ISCONNECTED)
771: getnam = (struct sockaddr **)0;
772: else
773: getnam = &tmp_nam;;
774: auio.uio_resid = len = 1000000;
775: auio.uio_procp = p;
776: do {
777: rcvflg = 0;
778: error = soreceive(so, getnam, &auio, mp,
779: (struct mbuf **)0, &rcvflg);
780:
781: if ((getnam) && (*getnam)) {
782: MGET(mhck, M_WAIT, MT_SONAME);
783: mhck->m_len = (*getnam)->sa_len;
784: sin = mtod(mhck, struct sockaddr_in *);
785: bcopy(*getnam, sin, sizeof(struct sockaddr_in));
786: mhck->m_hdr.mh_len = sizeof(struct sockaddr_in);
787: FREE(*getnam, M_SONAME);
788: *aname = mhck;
789: }
790: if (!rep->r_nmp) /* if unmounted then bailout */
791: goto shutout;
792:
793: if (error == EWOULDBLOCK &&
794: (rep->r_flags & R_SOFTTERM))
795: return (EINTR);
796: } while (error == EWOULDBLOCK);
797: len -= auio.uio_resid;
798: }
799: shutout:
800: if (error) {
801: m_freem(*mp);
802: *mp = (struct mbuf *)0;
803: }
804: return (error);
805: }
806:
807: /*
808: * Implement receipt of reply on a socket.
809: * We must search through the list of received datagrams matching them
810: * with outstanding requests using the xid, until ours is found.
811: */
812: /* ARGSUSED */
813: int
814: nfs_reply(myrep)
815: struct nfsreq *myrep;
816: {
817: register struct nfsreq *rep;
818: register struct nfsmount *nmp = myrep->r_nmp;
819: register long t1;
820: struct mbuf *mrep, *md;
821: struct mbuf *nam;
822: u_long rxid, *tl;
823: caddr_t dpos, cp2;
824: int error;
825:
826: /*
827: * Loop around until we get our own reply
828: */
829: for (;;) {
830: /*
831: * Lock against other receivers so that I don't get stuck in
832: * sbwait() after someone else has received my reply for me.
833: * Also necessary for connection based protocols to avoid
834: * race conditions during a reconnect.
835: * If nfs_rcvlock() returns EALREADY, that means that
836: * the reply has already been recieved by another
837: * process and we can return immediately. In this
838: * case, the lock is not taken to avoid races with
839: * other processes.
840: */
841: error = nfs_rcvlock(myrep);
842: if (error == EALREADY)
843: return (0);
844: if (error)
845: return (error);
846: /*
847: * Get the next Rpc reply off the socket
848: */
849: error = nfs_receive(myrep, &nam, &mrep);
850: /*
851: * Bailout asap if nfsmount struct gone (unmounted)
852: */
853: if (!myrep->r_nmp)
854: return(ECONNABORTED);
855: nfs_rcvunlock(&nmp->nm_flag);
856: if (error) {
857:
858: /*
859: * Ignore routing errors on connectionless protocols??
860: */
861: if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
862: nmp->nm_so->so_error = 0;
863: if (myrep->r_flags & R_GETONEREP)
864: return (0);
865: continue;
866: }
867: return (error);
868: }
869: if (nam)
870: m_freem(nam);
871:
872: /*
873: * Get the xid and check that it is an rpc reply
874: */
875: md = mrep;
876: dpos = mtod(md, caddr_t);
877: nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
878: rxid = *tl++;
879: if (*tl != rpc_reply) {
880: #ifndef NFS_NOSERVER
881: if (nmp->nm_flag & NFSMNT_NQNFS) {
882: if (nqnfs_callback(nmp, mrep, md, dpos))
883: nfsstats.rpcinvalid++;
884: } else {
885: nfsstats.rpcinvalid++;
886: m_freem(mrep);
887: }
888: #else
889: nfsstats.rpcinvalid++;
890: m_freem(mrep);
891: #endif
892: nfsmout:
893: if (myrep->r_flags & R_GETONEREP)
894: return (0);
895: continue;
896: }
897:
898: /*
899: * Loop through the request list to match up the reply
900: * Iff no match, just drop the datagram
901: */
902: for (rep = nfs_reqq.tqh_first; rep != 0;
903: rep = rep->r_chain.tqe_next) {
904: if (rep->r_mrep == NULL && rxid == rep->r_xid) {
905: /* Found it.. */
906: rep->r_mrep = mrep;
907: rep->r_md = md;
908: rep->r_dpos = dpos;
909: if (nfsrtton) {
910: struct rttl *rt;
911:
912: rt = &nfsrtt.rttl[nfsrtt.pos];
913: rt->proc = rep->r_procnum;
914: rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
915: rt->sent = nmp->nm_sent;
916: rt->cwnd = nmp->nm_cwnd;
917: if (proct[rep->r_procnum] == 0)
918: panic("nfs_reply: proct[%d] is zero", rep->r_procnum);
919: rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
920: rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
921: rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
922: rt->tstamp = time;
923: if (rep->r_flags & R_TIMING)
924: rt->rtt = rep->r_rtt;
925: else
926: rt->rtt = 1000000;
927: nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
928: }
929: /*
930: * Update congestion window.
931: * Do the additive increase of
932: * one rpc/rtt.
933: */
934: if (nmp->nm_cwnd <= nmp->nm_sent) {
935: nmp->nm_cwnd +=
936: (NFS_CWNDSCALE * NFS_CWNDSCALE +
937: (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
938: if (nmp->nm_cwnd > NFS_MAXCWND)
939: nmp->nm_cwnd = NFS_MAXCWND;
940: }
941: rep->r_flags &= ~R_SENT;
942: nmp->nm_sent -= NFS_CWNDSCALE;
943: /*
944: * Update rtt using a gain of 0.125 on the mean
945: * and a gain of 0.25 on the deviation.
946: */
947: if (rep->r_flags & R_TIMING) {
948: /*
949: * Since the timer resolution of
950: * NFS_HZ is so course, it can often
951: * result in r_rtt == 0. Since
952: * r_rtt == N means that the actual
953: * rtt is between N+dt and N+2-dt ticks,
954: * add 1.
955: */
956: if (proct[rep->r_procnum] == 0)
957: panic("nfs_reply: proct[%d] is zero", rep->r_procnum);
958: t1 = rep->r_rtt + 1;
959: t1 -= (NFS_SRTT(rep) >> 3);
960: NFS_SRTT(rep) += t1;
961: if (t1 < 0)
962: t1 = -t1;
963: t1 -= (NFS_SDRTT(rep) >> 2);
964: NFS_SDRTT(rep) += t1;
965: }
966: nmp->nm_timeouts = 0;
967: break;
968: }
969: }
970: /*
971: * If not matched to a request, drop it.
972: * If it's mine, get out.
973: */
974: if (rep == 0) {
975: nfsstats.rpcunexpected++;
976: m_freem(mrep);
977: } else if (rep == myrep) {
978: if (rep->r_mrep == NULL)
979: panic("nfsreply nil");
980: return (0);
981: }
982: if (myrep->r_flags & R_GETONEREP)
983: return (0);
984: }
985: }
986:
987: /*
988: * nfs_request - goes something like this
989: * - fill in request struct
990: * - links it into list
991: * - calls nfs_send() for first transmit
992: * - calls nfs_receive() to get reply
993: * - break down rpc header and return with nfs reply pointed to
994: * by mrep or error
995: * nb: always frees up mreq mbuf list
996: */
997: int
998: nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp)
999: struct vnode *vp;
1000: struct mbuf *mrest;
1001: int procnum;
1002: struct proc *procp;
1003: struct ucred *cred;
1004: struct mbuf **mrp;
1005: struct mbuf **mdp;
1006: caddr_t *dposp;
1007: {
1008: register struct mbuf *m, *mrep;
1009: register struct nfsreq *rep;
1010: register u_long *tl;
1011: register int i;
1012: struct nfsmount *nmp;
1013: struct mbuf *md, *mheadend;
1014: struct nfsnode *np;
1015: char nickv[RPCX_NICKVERF];
1016: time_t reqtime, waituntil;
1017: caddr_t dpos, cp2;
1018: int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type;
1019: int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0;
1020: int verf_len, verf_type;
1021: u_long xid;
1022: u_quad_t frev;
1023: char *auth_str, *verf_str;
1024: NFSKERBKEY_T key; /* save session key */
1025:
1026: NFSTRACE4(NFSTRC_REQ, vp, procnum, 0, 0);
1027: nmp = VFSTONFS(vp->v_mount);
1028:
1029: MALLOC_ZONE(rep, struct nfsreq *,
1030: sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
1031: rep->r_nmp = nmp;
1032: rep->r_vp = vp;
1033: rep->r_procp = procp;
1034: rep->r_procnum = procnum;
1035: i = 0;
1036: m = mrest;
1037: while (m) {
1038: i += m->m_len;
1039: m = m->m_next;
1040: }
1041: mrest_len = i;
1042:
1043: /*
1044: * Get the RPC header with authorization.
1045: */
1046: kerbauth:
1047: verf_str = auth_str = (char *)0;
1048: if (nmp->nm_flag & NFSMNT_KERB) {
1049: verf_str = nickv;
1050: verf_len = sizeof (nickv);
1051: auth_type = RPCAUTH_KERB4;
1052: bzero((caddr_t)key, sizeof (key));
1053: if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str,
1054: &auth_len, verf_str, verf_len)) {
1055: error = nfs_getauth(nmp, rep, cred, &auth_str,
1056: &auth_len, verf_str, &verf_len, key);
1057: if (error) {
1058: _FREE_ZONE((caddr_t)rep,
1059: sizeof (struct nfsreq), M_NFSREQ);
1060: m_freem(mrest);
1061: return (error);
1062: }
1063: }
1064: } else {
1065: auth_type = RPCAUTH_UNIX;
1066: if (cred->cr_ngroups < 1)
1067: panic("nfsreq nogrps");
1068: auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
1069: nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
1070: 5 * NFSX_UNSIGNED;
1071: }
1072: m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len,
1073: auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid);
1074: if (auth_str)
1075: _FREE(auth_str, M_TEMP);
1076:
1077: /*
1078: * For stream protocols, insert a Sun RPC Record Mark.
1079: */
1080: if (nmp->nm_sotype == SOCK_STREAM) {
1081: M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
1082: *mtod(m, u_long *) = htonl(0x80000000 |
1083: (m->m_pkthdr.len - NFSX_UNSIGNED));
1084: }
1085: rep->r_mreq = m;
1086: rep->r_xid = xid;
1087: tryagain:
1088: if (nmp->nm_flag & NFSMNT_SOFT)
1089: rep->r_retry = nmp->nm_retry;
1090: else
1091: rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
1092: rep->r_rtt = rep->r_rexmit = 0;
1093: if (proct[procnum] > 0)
1094: rep->r_flags = R_TIMING;
1095: else
1096: rep->r_flags = 0;
1097: rep->r_mrep = NULL;
1098:
1099: /*
1100: * Do the client side RPC.
1101: */
1102: nfsstats.rpcrequests++;
1103: /*
1104: * Chain request into list of outstanding requests. Be sure
1105: * to put it LAST so timer finds oldest requests first.
1106: */
1107: s = splsoftclock();
1108: TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
1109:
1110: /* Get send time for nqnfs */
1111: reqtime = time.tv_sec;
1112:
1113: /*
1114: * If backing off another request or avoiding congestion, don't
1115: * send this one now but let timer do it. If not timing a request,
1116: * do it now.
1117: */
1118: if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
1119: (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
1120: nmp->nm_sent < nmp->nm_cwnd)) {
1121: splx(s);
1122: if (nmp->nm_soflags & PR_CONNREQUIRED)
1123: error = nfs_sndlock(&nmp->nm_flag, rep);
1124: if (!error) {
1125: m = m_copym(m, 0, M_COPYALL, M_WAIT);
1126: error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
1127: if (nmp->nm_soflags & PR_CONNREQUIRED)
1128: nfs_sndunlock(&nmp->nm_flag);
1129: }
1130: if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
1131: nmp->nm_sent += NFS_CWNDSCALE;
1132: rep->r_flags |= R_SENT;
1133: }
1134: } else {
1135: splx(s);
1136: rep->r_rtt = -1;
1137: }
1138:
1139: /*
1140: * Wait for the reply from our send or the timer's.
1141: */
1142: if (!error || error == EPIPE)
1143: error = nfs_reply(rep);
1144:
1145: /*
1146: * RPC done, unlink the request.
1147: */
1148: s = splsoftclock();
1149: TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
1150: splx(s);
1151:
1152: /*
1153: * Decrement the outstanding request count.
1154: */
1155: if (rep->r_flags & R_SENT) {
1156: rep->r_flags &= ~R_SENT; /* paranoia */
1157: nmp->nm_sent -= NFS_CWNDSCALE;
1158: }
1159:
1160: /*
1161: * If there was a successful reply and a tprintf msg.
1162: * tprintf a response.
1163: */
1164: if (!error && (rep->r_flags & R_TPRINTFMSG))
1165: nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
1166: "is alive again");
1167: mrep = rep->r_mrep;
1168: md = rep->r_md;
1169: dpos = rep->r_dpos;
1170: if (error) {
1171: m_freem(rep->r_mreq);
1172: _FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1173: return (error);
1174: }
1175:
1176: /*
1177: * break down the rpc header and check if ok
1178: */
1179: nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
1180: if (*tl++ == rpc_msgdenied) {
1181: if (*tl == rpc_mismatch)
1182: error = EOPNOTSUPP;
1183: else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
1184: if (!failed_auth) {
1185: failed_auth++;
1186: mheadend->m_next = (struct mbuf *)0;
1187: m_freem(mrep);
1188: m_freem(rep->r_mreq);
1189: goto kerbauth;
1190: } else
1191: error = EAUTH;
1192: } else
1193: error = EACCES;
1194: m_freem(mrep);
1195: m_freem(rep->r_mreq);
1196: _FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1197: return (error);
1198: }
1199:
1200: /*
1201: * Grab any Kerberos verifier, otherwise just throw it away.
1202: */
1203: verf_type = fxdr_unsigned(int, *tl++);
1204: i = fxdr_unsigned(int, *tl);
1205: if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) {
1206: error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep);
1207: if (error)
1208: goto nfsmout;
1209: } else if (i > 0)
1210: nfsm_adv(nfsm_rndup(i));
1211: nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
1212: /* 0 == ok */
1213: if (*tl == 0) {
1214: nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
1215: if (*tl != 0) {
1216: error = fxdr_unsigned(int, *tl);
1217: if ((nmp->nm_flag & NFSMNT_NFSV3) &&
1218: error == NFSERR_TRYLATER) {
1219: m_freem(mrep);
1220: error = 0;
1221: waituntil = time.tv_sec + trylater_delay;
1222: NFS_DPF(DUP,
1223: ("nfs_request %s flag=%x trylater_cnt=%x waituntil=%lx trylater_delay=%x\n",
1224: nmp->nm_mountp->mnt_stat.f_mntfromname,
1225: nmp->nm_flag, trylater_cnt, waituntil,
1226: trylater_delay));
1227: while (time.tv_sec < waituntil)
1228: (void) tsleep((caddr_t)&lbolt,
1229: PSOCK, "nqnfstry", 0);
1230: trylater_delay *= nfs_backoff[trylater_cnt];
1231: if (trylater_cnt < 7)
1232: trylater_cnt++;
1233: goto tryagain;
1234: }
1235:
1236: /*
1237: * If the File Handle was stale, invalidate the
1238: * lookup cache, just in case.
1239: */
1240: if (error == ESTALE)
1241: cache_purge(vp);
1242: if (nmp->nm_flag & NFSMNT_NFSV3) {
1243: *mrp = mrep;
1244: *mdp = md;
1245: *dposp = dpos;
1246: error |= NFSERR_RETERR;
1247: } else
1248: m_freem(mrep);
1249: m_freem(rep->r_mreq);
1250: _FREE_ZONE((caddr_t)rep,
1251: sizeof (struct nfsreq), M_NFSREQ);
1252: return (error);
1253: }
1254:
1255: /*
1256: * For nqnfs, get any lease in reply
1257: */
1258: if (nmp->nm_flag & NFSMNT_NQNFS) {
1259: nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
1260: if (*tl) {
1261: np = VTONFS(vp);
1262: nqlflag = fxdr_unsigned(int, *tl);
1263: nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
1264: cachable = fxdr_unsigned(int, *tl++);
1265: reqtime += fxdr_unsigned(int, *tl++);
1266: if (reqtime > time.tv_sec) {
1267: fxdr_hyper(tl, &frev);
1268: nqnfs_clientlease(nmp, np, nqlflag,
1269: cachable, reqtime, frev);
1270: }
1271: }
1272: }
1273: *mrp = mrep;
1274: *mdp = md;
1275: *dposp = dpos;
1276: m_freem(rep->r_mreq);
1277: FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1278: return (0);
1279: }
1280: m_freem(mrep);
1281: error = EPROTONOSUPPORT;
1282: nfsmout:
1283: m_freem(rep->r_mreq);
1284: _FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ);
1285: return (error);
1286: }
1287:
1288: #ifndef NFS_NOSERVER
1289: /*
1290: * Generate the rpc reply header
1291: * siz arg. is used to decide if adding a cluster is worthwhile
1292: */
1293: int
1294: nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp)
1295: int siz;
1296: struct nfsrv_descript *nd;
1297: struct nfssvc_sock *slp;
1298: int err;
1299: int cache;
1300: u_quad_t *frev;
1301: struct mbuf **mrq;
1302: struct mbuf **mbp;
1303: caddr_t *bposp;
1304: {
1305: register u_long *tl;
1306: register struct mbuf *mreq;
1307: caddr_t bpos;
1308: struct mbuf *mb, *mb2;
1309:
1310: MGETHDR(mreq, M_WAIT, MT_DATA);
1311: mb = mreq;
1312: /*
1313: * If this is a big reply, use a cluster else
1314: * try and leave leading space for the lower level headers.
1315: */
1316: siz += RPC_REPLYSIZ;
1317: if (siz >= MINCLSIZE) {
1318: MCLGET(mreq, M_WAIT);
1319: } else
1320: mreq->m_data += max_hdr;
1321: tl = mtod(mreq, u_long *);
1322: mreq->m_len = 6 * NFSX_UNSIGNED;
1323: bpos = ((caddr_t)tl) + mreq->m_len;
1324: *tl++ = txdr_unsigned(nd->nd_retxid);
1325: *tl++ = rpc_reply;
1326: if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
1327: *tl++ = rpc_msgdenied;
1328: if (err & NFSERR_AUTHERR) {
1329: *tl++ = rpc_autherr;
1330: *tl = txdr_unsigned(err & ~NFSERR_AUTHERR);
1331: mreq->m_len -= NFSX_UNSIGNED;
1332: bpos -= NFSX_UNSIGNED;
1333: } else {
1334: *tl++ = rpc_mismatch;
1335: *tl++ = txdr_unsigned(RPC_VER2);
1336: *tl = txdr_unsigned(RPC_VER2);
1337: }
1338: } else {
1339: *tl++ = rpc_msgaccepted;
1340:
1341: /*
1342: * For Kerberos authentication, we must send the nickname
1343: * verifier back, otherwise just RPCAUTH_NULL.
1344: */
1345: if (nd->nd_flag & ND_KERBFULL) {
1346: register struct nfsuid *nuidp;
1347: struct timeval ktvin, ktvout;
1348:
1349: for (nuidp = NUIDHASH(slp, nd->nd_cr.cr_uid)->lh_first;
1350: nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
1351: if (nuidp->nu_cr.cr_uid == nd->nd_cr.cr_uid &&
1352: (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp),
1353: &nuidp->nu_haddr, nd->nd_nam2)))
1354: break;
1355: }
1356: if (nuidp) {
1357: ktvin.tv_sec =
1358: txdr_unsigned(nuidp->nu_timestamp.tv_sec - 1);
1359: ktvin.tv_usec =
1360: txdr_unsigned(nuidp->nu_timestamp.tv_usec);
1361:
1362: /*
1363: * Encrypt the timestamp in ecb mode using the
1364: * session key.
1365: */
1366: #if NFSKERB
1367: XXX
1368: #endif
1369:
1370: *tl++ = rpc_auth_kerb;
1371: *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED);
1372: *tl = ktvout.tv_sec;
1373: nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
1374: *tl++ = ktvout.tv_usec;
1375: *tl++ = txdr_unsigned(nuidp->nu_cr.cr_uid);
1376: } else {
1377: *tl++ = 0;
1378: *tl++ = 0;
1379: }
1380: } else {
1381: *tl++ = 0;
1382: *tl++ = 0;
1383: }
1384: switch (err) {
1385: case EPROGUNAVAIL:
1386: *tl = txdr_unsigned(RPC_PROGUNAVAIL);
1387: break;
1388: case EPROGMISMATCH:
1389: *tl = txdr_unsigned(RPC_PROGMISMATCH);
1390: nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
1391: if (nd->nd_flag & ND_NQNFS) {
1392: *tl++ = txdr_unsigned(3);
1393: *tl = txdr_unsigned(3);
1394: } else {
1395: *tl++ = txdr_unsigned(2);
1396: *tl = txdr_unsigned(3);
1397: }
1398: break;
1399: case EPROCUNAVAIL:
1400: *tl = txdr_unsigned(RPC_PROCUNAVAIL);
1401: break;
1402: case EBADRPC:
1403: *tl = txdr_unsigned(RPC_GARBAGE);
1404: break;
1405: default:
1406: *tl = 0;
1407: if (err != NFSERR_RETVOID) {
1408: nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1409: if (err)
1410: *tl = txdr_unsigned(nfsrv_errmap(nd, err));
1411: else
1412: *tl = 0;
1413: }
1414: break;
1415: };
1416: }
1417:
1418: /*
1419: * For nqnfs, piggyback lease as requested.
1420: */
1421: if ((nd->nd_flag & ND_NQNFS) && err == 0) {
1422: if (nd->nd_flag & ND_LEASE) {
1423: nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED);
1424: *tl++ = txdr_unsigned(nd->nd_flag & ND_LEASE);
1425: *tl++ = txdr_unsigned(cache);
1426: *tl++ = txdr_unsigned(nd->nd_duration);
1427: txdr_hyper(frev, tl);
1428: } else {
1429: nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1430: *tl = 0;
1431: }
1432: }
1433: if (mrq != NULL)
1434: *mrq = mreq;
1435: *mbp = mb;
1436: *bposp = bpos;
1437: if (err != 0 && err != NFSERR_RETVOID)
1438: nfsstats.srvrpc_errs++;
1439: return (0);
1440: }
1441:
1442:
1443: #endif /* NFS_NOSERVER */
1444:
1445: /*
1446: * Nfs timer routine
1447: * Scan the nfsreq list and retranmit any requests that have timed out
1448: * To avoid retransmission attempts on STREAM sockets (in the future) make
1449: * sure to set the r_retry field to 0 (implies nm_retry == 0).
1450: */
1451: void
1452: nfs_timer(arg)
1453: void *arg; /* never used */
1454: {
1455: register struct nfsreq *rep;
1456: register struct mbuf *m;
1457: register struct socket *so;
1458: register struct nfsmount *nmp;
1459: register int timeo;
1460: int s, error;
1461: #ifndef NFS_NOSERVER
1462: static long lasttime = 0;
1463: register struct nfssvc_sock *slp;
1464: u_quad_t cur_usec;
1465: #endif /* NFS_NOSERVER */
1466: #if DIAGNOSTIC
1467: int rttdiag;
1468: #endif
1469:
1470: s = splnet();
1471: for (rep = nfs_reqq.tqh_first; rep != 0; rep = rep->r_chain.tqe_next) {
1472: nmp = rep->r_nmp;
1473: if (!nmp) /* unmounted */
1474: continue;
1475: if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
1476: continue;
1477: if (nfs_sigintr(nmp, rep, rep->r_procp)) {
1478: rep->r_flags |= R_SOFTTERM;
1479: continue;
1480: }
1481: if (rep->r_rtt >= 0) {
1482: rep->r_rtt++;
1483: if (nmp->nm_flag & NFSMNT_DUMBTIMR)
1484: timeo = nmp->nm_timeo;
1485: else
1486: timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
1487: /* ensure 62.5 ms floor */
1488: while (16 * timeo < hz)
1489: timeo *= 2;
1490: if (nmp->nm_timeouts > 0)
1491: timeo *= nfs_backoff[nmp->nm_timeouts - 1];
1492: if (rep->r_rtt <= timeo)
1493: continue;
1494: if (nmp->nm_timeouts < 8)
1495: nmp->nm_timeouts++;
1496: }
1497: /*
1498: * Check for server not responding
1499: */
1500: if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
1501: rep->r_rexmit > nmp->nm_deadthresh) {
1502: nfs_msg(rep->r_procp,
1503: nmp->nm_mountp->mnt_stat.f_mntfromname,
1504: "not responding");
1505: rep->r_flags |= R_TPRINTFMSG;
1506: }
1507: if (rep->r_rexmit >= rep->r_retry) { /* too many */
1508: nfsstats.rpctimeouts++;
1509: rep->r_flags |= R_SOFTTERM;
1510: continue;
1511: }
1512: if (nmp->nm_sotype != SOCK_DGRAM) {
1513: if (++rep->r_rexmit > NFS_MAXREXMIT)
1514: rep->r_rexmit = NFS_MAXREXMIT;
1515: continue;
1516: }
1517: if ((so = nmp->nm_so) == NULL)
1518: continue;
1519:
1520: /*
1521: * If there is enough space and the window allows..
1522: * Resend it
1523: * Set r_rtt to -1 in case we fail to send it now.
1524: */
1525: #if DIAGNOSTIC
1526: rttdiag = rep->r_rtt;
1527: #endif
1528: rep->r_rtt = -1;
1529: if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
1530: ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
1531: (rep->r_flags & R_SENT) ||
1532: nmp->nm_sent < nmp->nm_cwnd) &&
1533: (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
1534:
1535: struct proc *p = current_proc();
1536:
1537: #if DIAGNOSTIC
1538: if (rep->r_flags & R_SENT && nfsprnttimo &&
1539: nmp->nm_timeouts >= nfsprnttimo) {
1540: int t = proct[rep->r_procnum];
1541: if (t)
1542: NFS_DPF(DUP, ("nfs_timer %s nmtm=%d tms=%d rtt=%d tm=%d p=%d A=%d D=%d\n", nmp->nm_mountp->mnt_stat.f_mntfromname, nmp->nm_timeo, nmp->nm_timeouts, rttdiag, timeo, rep->r_procnum, nmp->nm_srtt[t-1], nmp->nm_sdrtt[t-1]));
1543: else
1544: NFS_DPF(DUP, ("nfs_timer %s nmtm=%d tms=%d rtt=%d tm=%d p=%d\n", nmp->nm_mountp->mnt_stat.f_mntfromname, nmp->nm_timeo, nmp->nm_timeouts, rttdiag, timeo, rep->r_procnum));
1545: }
1546: nfsdup(rep);
1547: #endif
1548: if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
1549: error = (*so->so_proto->pr_usrreqs->pru_send)
1550: (so, 0, m, 0, 0, p);
1551: else
1552: error = (*so->so_proto->pr_usrreqs->pru_send)
1553: (so, 0, m, mtod(nmp->nm_nam, struct sockaddr *), 0, p);
1554:
1555: if (error) {
1556: if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
1557: so->so_error = 0;
1558: } else {
1559: /*
1560: * Iff first send, start timing
1561: * else turn timing off, backoff timer
1562: * and divide congestion window by 2.
1563: */
1564: if (rep->r_flags & R_SENT) {
1565: rep->r_flags &= ~R_TIMING;
1566: if (++rep->r_rexmit > NFS_MAXREXMIT)
1567: rep->r_rexmit = NFS_MAXREXMIT;
1568: nmp->nm_cwnd >>= 1;
1569: if (nmp->nm_cwnd < NFS_CWNDSCALE)
1570: nmp->nm_cwnd = NFS_CWNDSCALE;
1571: nfsstats.rpcretries++;
1572: } else {
1573: rep->r_flags |= R_SENT;
1574: nmp->nm_sent += NFS_CWNDSCALE;
1575: }
1576: rep->r_rtt = 0;
1577: }
1578: }
1579: }
1580: #ifndef NFS_NOSERVER
1581: /*
1582: * Call the nqnfs server timer once a second to handle leases.
1583: */
1584: if (lasttime != time.tv_sec) {
1585: lasttime = time.tv_sec;
1586: nqnfs_serverd();
1587: }
1588:
1589: /*
1590: * Scan the write gathering queues for writes that need to be
1591: * completed now.
1592: */
1593: cur_usec = (u_quad_t)time.tv_sec * 1000000 + (u_quad_t)time.tv_usec;
1594: for (slp = nfssvc_sockhead.tqh_first; slp != 0;
1595: slp = slp->ns_chain.tqe_next) {
1596: if (slp->ns_tq.lh_first && slp->ns_tq.lh_first->nd_time<=cur_usec)
1597: nfsrv_wakenfsd(slp);
1598: }
1599: #endif /* NFS_NOSERVER */
1600: splx(s);
1601: timeout(nfs_timer, (void *)0, nfs_ticks);
1602: }
1603:
1604:
1605: /*
1606: * Test for a termination condition pending on the process.
1607: * This is used for NFSMNT_INT mounts.
1608: */
1609: int
1610: nfs_sigintr(nmp, rep, p)
1611: struct nfsmount *nmp;
1612: struct nfsreq *rep;
1613: register struct proc *p;
1614: {
1615:
1616: if (rep && (rep->r_flags & R_SOFTTERM))
1617: return (EINTR);
1618: if (!(nmp->nm_flag & NFSMNT_INT))
1619: return (0);
1620: if (p && p->p_siglist &&
1621: (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) &
1622: NFSINT_SIGMASK))
1623: return (EINTR);
1624: return (0);
1625: }
1626:
1627: /*
1628: * Lock a socket against others.
1629: * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
1630: * and also to avoid race conditions between the processes with nfs requests
1631: * in progress when a reconnect is necessary.
1632: */
1633: int
1634: nfs_sndlock(flagp, rep)
1635: register int *flagp;
1636: struct nfsreq *rep;
1637: {
1638: struct proc *p;
1639: int slpflag = 0, slptimeo = 0;
1640:
1641: if (rep) {
1642: p = rep->r_procp;
1643: if (rep->r_nmp->nm_flag & NFSMNT_INT)
1644: slpflag = PCATCH;
1645: } else
1646: p = (struct proc *)0;
1647: while (*flagp & NFSMNT_SNDLOCK) {
1648: if (nfs_sigintr(rep->r_nmp, rep, p))
1649: return (EINTR);
1650: *flagp |= NFSMNT_WANTSND;
1651: (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck",
1652: slptimeo);
1653: if (slpflag == PCATCH) {
1654: slpflag = 0;
1655: slptimeo = 2 * hz;
1656: }
1657: }
1658: *flagp |= NFSMNT_SNDLOCK;
1659: return (0);
1660: }
1661:
1662: /*
1663: * Unlock the stream socket for others.
1664: */
1665: void
1666: nfs_sndunlock(flagp)
1667: register int *flagp;
1668: {
1669:
1670: if ((*flagp & NFSMNT_SNDLOCK) == 0)
1671: panic("nfs sndunlock");
1672: *flagp &= ~NFSMNT_SNDLOCK;
1673: if (*flagp & NFSMNT_WANTSND) {
1674: *flagp &= ~NFSMNT_WANTSND;
1675: wakeup((caddr_t)flagp);
1676: }
1677: }
1678:
1679: static int
1680: nfs_rcvlock(rep)
1681: register struct nfsreq *rep;
1682: {
1683: register int *flagp = &rep->r_nmp->nm_flag;
1684: int slpflag, slptimeo = 0;
1685:
1686: if (*flagp & NFSMNT_INT)
1687: slpflag = PCATCH;
1688: else
1689: slpflag = 0;
1690: while (*flagp & NFSMNT_RCVLOCK) {
1691: NFSTRACE(NFSTRC_RCVLCKW, flagp);
1692: if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp))
1693: return (EINTR);
1694: *flagp |= NFSMNT_WANTRCV;
1695: (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk",
1696: slptimeo);
1697: /*
1698: * If our reply was recieved while we were sleeping,
1699: * then just return without taking the lock to avoid a
1700: * situation where a single iod could 'capture' the
1701: * recieve lock.
1702: */
1703: if (rep->r_mrep != NULL)
1704: return (EALREADY);
1705: if (slpflag == PCATCH) {
1706: slpflag = 0;
1707: slptimeo = 2 * hz;
1708: }
1709: }
1710: NFSTRACE(NFSTRC_RCVLCK, flagp);
1711: *flagp |= NFSMNT_RCVLOCK;
1712: return (0);
1713: }
1714:
1715: /*
1716: * Unlock the stream socket for others.
1717: */
1718: static void
1719: nfs_rcvunlock(flagp)
1720: register int *flagp;
1721: {
1722:
1723: if ((*flagp & NFSMNT_RCVLOCK) == 0)
1724: panic("nfs rcvunlock");
1725: *flagp &= ~NFSMNT_RCVLOCK;
1726: if (*flagp & NFSMNT_WANTRCV) {
1727: NFSTRACE(NFSTRC_RCVUNLW, flagp);
1728: *flagp &= ~NFSMNT_WANTRCV;
1729: wakeup((caddr_t)flagp);
1730: } else {
1731: NFSTRACE(NFSTRC_RCVUNL, flagp);
1732: }
1733: }
1734:
1735:
1736: #ifndef NFS_NOSERVER
1737: /*
1738: * Socket upcall routine for the nfsd sockets.
1739: * The caddr_t arg is a pointer to the "struct nfssvc_sock".
1740: * Essentially do as much as possible non-blocking, else punt and it will
1741: * be called with M_WAIT from an nfsd.
1742: */
1743: void
1744: nfsrv_rcv(so, arg, waitflag)
1745: struct socket *so;
1746: caddr_t arg;
1747: int waitflag;
1748: {
1749: register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
1750: register struct mbuf *m;
1751: struct mbuf *mp, *mhck;
1752: struct sockaddr *nam=0;
1753: struct uio auio;
1754: int flags, error;
1755: struct sockaddr_in *sin;
1756:
1757: if ((slp->ns_flag & SLP_VALID) == 0)
1758: return;
1759: #ifdef notdef
1760: /*
1761: * Define this to test for nfsds handling this under heavy load.
1762: */
1763: if (waitflag == M_DONTWAIT) {
1764: slp->ns_flag |= SLP_NEEDQ; goto dorecs;
1765: }
1766: #endif
1767: auio.uio_procp = NULL;
1768: if (so->so_type == SOCK_STREAM) {
1769: /*
1770: * If there are already records on the queue, defer soreceive()
1771: * to an nfsd so that there is feedback to the TCP layer that
1772: * the nfs servers are heavily loaded.
1773: */
1774: if (slp->ns_rec && waitflag == M_DONTWAIT) {
1775: slp->ns_flag |= SLP_NEEDQ;
1776: goto dorecs;
1777: }
1778:
1779: /*
1780: * Do soreceive().
1781: */
1782: auio.uio_resid = 1000000000;
1783: flags = MSG_DONTWAIT;
1784: error = soreceive(so, (struct sockaddr **) 0, &auio, &mp, (struct mbuf **)0, &flags);
1785: if (error || mp == (struct mbuf *)0) {
1786: if (error == EWOULDBLOCK)
1787: slp->ns_flag |= SLP_NEEDQ;
1788: else
1789: slp->ns_flag |= SLP_DISCONN;
1790: goto dorecs;
1791: }
1792: m = mp;
1793: if (slp->ns_rawend) {
1794: slp->ns_rawend->m_next = m;
1795: slp->ns_cc += 1000000000 - auio.uio_resid;
1796: } else {
1797: slp->ns_raw = m;
1798: slp->ns_cc = 1000000000 - auio.uio_resid;
1799: }
1800: while (m->m_next)
1801: m = m->m_next;
1802: slp->ns_rawend = m;
1803:
1804: /*
1805: * Now try and parse record(s) out of the raw stream data.
1806: */
1807: error = nfsrv_getstream(slp, waitflag);
1808: if (error) {
1809: if (error == EPERM)
1810: slp->ns_flag |= SLP_DISCONN;
1811: else
1812: slp->ns_flag |= SLP_NEEDQ;
1813: }
1814: } else {
1815: do {
1816: auio.uio_resid = 1000000000;
1817: flags = MSG_DONTWAIT;
1818: nam = 0;
1819: error = soreceive(so, &nam, &auio, &mp,
1820: (struct mbuf **)0, &flags);
1821: if (mp) {
1822: if (nam) {
1823: MGET(mhck, M_WAIT, MT_SONAME);
1824: mhck->m_len = nam->sa_len;
1825: sin = mtod(mhck, struct sockaddr_in *);
1826: bcopy(nam, sin, sizeof(struct sockaddr_in));
1827: mhck->m_hdr.mh_len = sizeof(struct sockaddr_in);
1828: FREE(nam, M_SONAME);
1829:
1830: m = mhck;
1831: m->m_next = mp;
1832: } else
1833: m = mp;
1834: if (slp->ns_recend)
1835: slp->ns_recend->m_nextpkt = m;
1836: else
1837: slp->ns_rec = m;
1838: slp->ns_recend = m;
1839: m->m_nextpkt = (struct mbuf *)0;
1840: }
1841: if (error) {
1842: if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
1843: && error != EWOULDBLOCK) {
1844: slp->ns_flag |= SLP_DISCONN;
1845: goto dorecs;
1846: }
1847: }
1848: } while (mp);
1849: }
1850:
1851: /*
1852: * Now try and process the request records, non-blocking.
1853: */
1854: dorecs:
1855: if (waitflag == M_DONTWAIT &&
1856: (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN))))
1857: nfsrv_wakenfsd(slp);
1858: }
1859:
1860: /*
1861: * Try and extract an RPC request from the mbuf data list received on a
1862: * stream socket. The "waitflag" argument indicates whether or not it
1863: * can sleep.
1864: */
1865: static int
1866: nfsrv_getstream(slp, waitflag)
1867: register struct nfssvc_sock *slp;
1868: int waitflag;
1869: {
1870: register struct mbuf *m, **mpp;
1871: register char *cp1, *cp2;
1872: register int len;
1873: struct mbuf *om, *m2, *recm = 0;
1874: u_long recmark;
1875:
1876: if (slp->ns_flag & SLP_GETSTREAM)
1877: panic("nfs getstream");
1878: slp->ns_flag |= SLP_GETSTREAM;
1879: for (;;) {
1880: if (slp->ns_reclen == 0) {
1881: if (slp->ns_cc < NFSX_UNSIGNED) {
1882: slp->ns_flag &= ~SLP_GETSTREAM;
1883: return (0);
1884: }
1885: m = slp->ns_raw;
1886: if (m->m_len >= NFSX_UNSIGNED) {
1887: bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
1888: m->m_data += NFSX_UNSIGNED;
1889: m->m_len -= NFSX_UNSIGNED;
1890: } else {
1891: cp1 = (caddr_t)&recmark;
1892: cp2 = mtod(m, caddr_t);
1893: while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
1894: while (m->m_len == 0) {
1895: m = m->m_next;
1896: cp2 = mtod(m, caddr_t);
1897: }
1898: *cp1++ = *cp2++;
1899: m->m_data++;
1900: m->m_len--;
1901: }
1902: }
1903: slp->ns_cc -= NFSX_UNSIGNED;
1904: recmark = ntohl(recmark);
1905: slp->ns_reclen = recmark & ~0x80000000;
1906: if (recmark & 0x80000000)
1907: slp->ns_flag |= SLP_LASTFRAG;
1908: else
1909: slp->ns_flag &= ~SLP_LASTFRAG;
1910: if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
1911: slp->ns_flag &= ~SLP_GETSTREAM;
1912: return (EPERM);
1913: }
1914: }
1915:
1916: /*
1917: * Now get the record part.
1918: */
1919: if (slp->ns_cc == slp->ns_reclen) {
1920: recm = slp->ns_raw;
1921: slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
1922: slp->ns_cc = slp->ns_reclen = 0;
1923: } else if (slp->ns_cc > slp->ns_reclen) {
1924: len = 0;
1925: m = slp->ns_raw;
1926: om = (struct mbuf *)0;
1927: while (len < slp->ns_reclen) {
1928: if ((len + m->m_len) > slp->ns_reclen) {
1929: m2 = m_copym(m, 0, slp->ns_reclen - len,
1930: waitflag);
1931: if (m2) {
1932: if (om) {
1933: om->m_next = m2;
1934: recm = slp->ns_raw;
1935: } else
1936: recm = m2;
1937: m->m_data += slp->ns_reclen - len;
1938: m->m_len -= slp->ns_reclen - len;
1939: len = slp->ns_reclen;
1940: } else {
1941: slp->ns_flag &= ~SLP_GETSTREAM;
1942: return (EWOULDBLOCK);
1943: }
1944: } else if ((len + m->m_len) == slp->ns_reclen) {
1945: om = m;
1946: len += m->m_len;
1947: m = m->m_next;
1948: recm = slp->ns_raw;
1949: om->m_next = (struct mbuf *)0;
1950: } else {
1951: om = m;
1952: len += m->m_len;
1953: m = m->m_next;
1954: }
1955: }
1956: slp->ns_raw = m;
1957: slp->ns_cc -= len;
1958: slp->ns_reclen = 0;
1959: } else {
1960: slp->ns_flag &= ~SLP_GETSTREAM;
1961: return (0);
1962: }
1963:
1964: /*
1965: * Accumulate the fragments into a record.
1966: */
1967: mpp = &slp->ns_frag;
1968: while (*mpp)
1969: mpp = &((*mpp)->m_next);
1970: *mpp = recm;
1971: if (slp->ns_flag & SLP_LASTFRAG) {
1972: if (slp->ns_recend)
1973: slp->ns_recend->m_nextpkt = slp->ns_frag;
1974: else
1975: slp->ns_rec = slp->ns_frag;
1976: slp->ns_recend = slp->ns_frag;
1977: slp->ns_frag = (struct mbuf *)0;
1978: }
1979: }
1980: }
1981:
1982: /*
1983: * Parse an RPC header.
1984: */
1985: int
1986: nfsrv_dorec(slp, nfsd, ndp)
1987: register struct nfssvc_sock *slp;
1988: struct nfsd *nfsd;
1989: struct nfsrv_descript **ndp;
1990: {
1991: register struct mbuf *m;
1992: register struct mbuf *nam;
1993: register struct nfsrv_descript *nd;
1994: int error;
1995:
1996: *ndp = NULL;
1997: if ((slp->ns_flag & SLP_VALID) == 0 ||
1998: (m = slp->ns_rec) == (struct mbuf *)0)
1999: return (ENOBUFS);
2000: slp->ns_rec = m->m_nextpkt;
2001: if (slp->ns_rec)
2002: m->m_nextpkt = (struct mbuf *)0;
2003: else
2004: slp->ns_recend = (struct mbuf *)0;
2005: if (m->m_type == MT_SONAME) {
2006: nam = m;
2007: m = m->m_next;
2008: nam->m_next = NULL;
2009: } else
2010: nam = NULL;
2011: MALLOC_ZONE(nd, struct nfsrv_descript *,
2012: sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK);
2013: nd->nd_md = nd->nd_mrep = m;
2014: nd->nd_nam2 = nam;
2015: nd->nd_dpos = mtod(m, caddr_t);
2016: error = nfs_getreq(nd, nfsd, TRUE);
2017: if (error) {
2018: m_freem(nam);
2019: _FREE_ZONE((caddr_t)nd, sizeof *nd, M_NFSRVDESC);
2020: return (error);
2021: }
2022: *ndp = nd;
2023: nfsd->nfsd_nd = nd;
2024: return (0);
2025: }
2026:
2027: /*
2028: * Parse an RPC request
2029: * - verify it
2030: * - fill in the cred struct.
2031: */
2032: int
2033: nfs_getreq(nd, nfsd, has_header)
2034: register struct nfsrv_descript *nd;
2035: struct nfsd *nfsd;
2036: int has_header;
2037: {
2038: register int len, i;
2039: register u_long *tl;
2040: register long t1;
2041: struct uio uio;
2042: struct iovec iov;
2043: caddr_t dpos, cp2, cp;
2044: u_long nfsvers, auth_type;
2045: uid_t nickuid;
2046: int error = 0, nqnfs = 0, ticklen;
2047: struct mbuf *mrep, *md;
2048: register struct nfsuid *nuidp;
2049: struct timeval tvin, tvout;
2050: #if 0 /* until encrypted keys are implemented */
2051: NFSKERBKEYSCHED_T keys; /* stores key schedule */
2052: #endif
2053:
2054: mrep = nd->nd_mrep;
2055: md = nd->nd_md;
2056: dpos = nd->nd_dpos;
2057: if (has_header) {
2058: nfsm_dissect(tl, u_long *, 10 * NFSX_UNSIGNED);
2059: nd->nd_retxid = fxdr_unsigned(u_long, *tl++);
2060: if (*tl++ != rpc_call) {
2061: m_freem(mrep);
2062: return (EBADRPC);
2063: }
2064: } else
2065: nfsm_dissect(tl, u_long *, 8 * NFSX_UNSIGNED);
2066: nd->nd_repstat = 0;
2067: nd->nd_flag = 0;
2068: if (*tl++ != rpc_vers) {
2069: nd->nd_repstat = ERPCMISMATCH;
2070: nd->nd_procnum = NFSPROC_NOOP;
2071: return (0);
2072: }
2073: if (*tl != nfs_prog) {
2074: if (*tl == nqnfs_prog)
2075: nqnfs++;
2076: else {
2077: nd->nd_repstat = EPROGUNAVAIL;
2078: nd->nd_procnum = NFSPROC_NOOP;
2079: return (0);
2080: }
2081: }
2082: tl++;
2083: nfsvers = fxdr_unsigned(u_long, *tl++);
2084: if (((nfsvers < NFS_VER2 || nfsvers > NFS_VER3) && !nqnfs) ||
2085: (nfsvers != NQNFS_VER3 && nqnfs)) {
2086: nd->nd_repstat = EPROGMISMATCH;
2087: nd->nd_procnum = NFSPROC_NOOP;
2088: return (0);
2089: }
2090: if (nqnfs)
2091: nd->nd_flag = (ND_NFSV3 | ND_NQNFS);
2092: else if (nfsvers == NFS_VER3)
2093: nd->nd_flag = ND_NFSV3;
2094: nd->nd_procnum = fxdr_unsigned(u_long, *tl++);
2095: if (nd->nd_procnum == NFSPROC_NULL)
2096: return (0);
2097: if (nd->nd_procnum >= NFS_NPROCS ||
2098: (!nqnfs && nd->nd_procnum >= NQNFSPROC_GETLEASE) ||
2099: (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) {
2100: nd->nd_repstat = EPROCUNAVAIL;
2101: nd->nd_procnum = NFSPROC_NOOP;
2102: return (0);
2103: }
2104: if ((nd->nd_flag & ND_NFSV3) == 0)
2105: nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
2106: auth_type = *tl++;
2107: len = fxdr_unsigned(int, *tl++);
2108: if (len < 0 || len > RPCAUTH_MAXSIZ) {
2109: m_freem(mrep);
2110: return (EBADRPC);
2111: }
2112:
2113: nd->nd_flag &= ~ND_KERBAUTH;
2114: /*
2115: * Handle auth_unix or auth_kerb.
2116: */
2117: if (auth_type == rpc_auth_unix) {
2118: len = fxdr_unsigned(int, *++tl);
2119: if (len < 0 || len > NFS_MAXNAMLEN) {
2120: m_freem(mrep);
2121: return (EBADRPC);
2122: }
2123: nfsm_adv(nfsm_rndup(len));
2124: nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2125: bzero((caddr_t)&nd->nd_cr, sizeof (struct ucred));
2126: nd->nd_cr.cr_ref = 1;
2127: nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
2128: nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
2129: len = fxdr_unsigned(int, *tl);
2130: if (len < 0 || len > RPCAUTH_UNIXGIDS) {
2131: m_freem(mrep);
2132: return (EBADRPC);
2133: }
2134: nfsm_dissect(tl, u_long *, (len + 2) * NFSX_UNSIGNED);
2135: for (i = 1; i <= len; i++)
2136: if (i < NGROUPS)
2137: nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
2138: else
2139: tl++;
2140: nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
2141: if (nd->nd_cr.cr_ngroups > 1)
2142: nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups);
2143: len = fxdr_unsigned(int, *++tl);
2144: if (len < 0 || len > RPCAUTH_MAXSIZ) {
2145: m_freem(mrep);
2146: return (EBADRPC);
2147: }
2148: if (len > 0)
2149: nfsm_adv(nfsm_rndup(len));
2150: } else if (auth_type == rpc_auth_kerb) {
2151: switch (fxdr_unsigned(int, *tl++)) {
2152: case RPCAKN_FULLNAME:
2153: ticklen = fxdr_unsigned(int, *tl);
2154: *((u_long *)nfsd->nfsd_authstr) = *tl;
2155: uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED;
2156: nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED;
2157: if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
2158: m_freem(mrep);
2159: return (EBADRPC);
2160: }
2161: uio.uio_offset = 0;
2162: uio.uio_iov = &iov;
2163: uio.uio_iovcnt = 1;
2164: uio.uio_segflg = UIO_SYSSPACE;
2165: iov.iov_base = (caddr_t)&nfsd->nfsd_authstr[4];
2166: iov.iov_len = RPCAUTH_MAXSIZ - 4;
2167: nfsm_mtouio(&uio, uio.uio_resid);
2168: nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
2169: if (*tl++ != rpc_auth_kerb ||
2170: fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) {
2171: printf("Bad kerb verifier\n");
2172: nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2173: nd->nd_procnum = NFSPROC_NOOP;
2174: return (0);
2175: }
2176: nfsm_dissect(cp, caddr_t, 4 * NFSX_UNSIGNED);
2177: tl = (u_long *)cp;
2178: if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) {
2179: printf("Not fullname kerb verifier\n");
2180: nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2181: nd->nd_procnum = NFSPROC_NOOP;
2182: return (0);
2183: }
2184: cp += NFSX_UNSIGNED;
2185: bcopy(cp, nfsd->nfsd_verfstr, 3 * NFSX_UNSIGNED);
2186: nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED;
2187: nd->nd_flag |= ND_KERBFULL;
2188: nfsd->nfsd_flag |= NFSD_NEEDAUTH;
2189: break;
2190: case RPCAKN_NICKNAME:
2191: if (len != 2 * NFSX_UNSIGNED) {
2192: printf("Kerb nickname short\n");
2193: nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED);
2194: nd->nd_procnum = NFSPROC_NOOP;
2195: return (0);
2196: }
2197: nickuid = fxdr_unsigned(uid_t, *tl);
2198: nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
2199: if (*tl++ != rpc_auth_kerb ||
2200: fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) {
2201: printf("Kerb nick verifier bad\n");
2202: nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2203: nd->nd_procnum = NFSPROC_NOOP;
2204: return (0);
2205: }
2206: nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
2207: tvin.tv_sec = *tl++;
2208: tvin.tv_usec = *tl;
2209:
2210: for (nuidp = NUIDHASH(nfsd->nfsd_slp,nickuid)->lh_first;
2211: nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
2212: if (nuidp->nu_cr.cr_uid == nickuid &&
2213: (!nd->nd_nam2 ||
2214: netaddr_match(NU_NETFAM(nuidp),
2215: &nuidp->nu_haddr, nd->nd_nam2)))
2216: break;
2217: }
2218: if (!nuidp) {
2219: nd->nd_repstat =
2220: (NFSERR_AUTHERR|AUTH_REJECTCRED);
2221: nd->nd_procnum = NFSPROC_NOOP;
2222: return (0);
2223: }
2224:
2225: /*
2226: * Now, decrypt the timestamp using the session key
2227: * and validate it.
2228: */
2229: #if NFSKERB
2230: XXX
2231: #endif
2232:
2233: tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec);
2234: tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec);
2235: if (nuidp->nu_expire < time.tv_sec ||
2236: nuidp->nu_timestamp.tv_sec > tvout.tv_sec ||
2237: (nuidp->nu_timestamp.tv_sec == tvout.tv_sec &&
2238: nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) {
2239: nuidp->nu_expire = 0;
2240: nd->nd_repstat =
2241: (NFSERR_AUTHERR|AUTH_REJECTVERF);
2242: nd->nd_procnum = NFSPROC_NOOP;
2243: return (0);
2244: }
2245: nfsrv_setcred(&nuidp->nu_cr, &nd->nd_cr);
2246: nd->nd_flag |= ND_KERBNICK;
2247: };
2248: } else {
2249: nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
2250: nd->nd_procnum = NFSPROC_NOOP;
2251: return (0);
2252: }
2253:
2254: /*
2255: * For nqnfs, get piggybacked lease request.
2256: */
2257: if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) {
2258: nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
2259: nd->nd_flag |= fxdr_unsigned(int, *tl);
2260: if (nd->nd_flag & ND_LEASE) {
2261: nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
2262: nd->nd_duration = fxdr_unsigned(int, *tl);
2263: } else
2264: nd->nd_duration = NQ_MINLEASE;
2265: } else
2266: nd->nd_duration = NQ_MINLEASE;
2267: nd->nd_md = md;
2268: nd->nd_dpos = dpos;
2269: return (0);
2270: nfsmout:
2271: return (error);
2272: }
2273:
2274: /*
2275: * Search for a sleeping nfsd and wake it up.
2276: * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
2277: * running nfsds will go look for the work in the nfssvc_sock list.
2278: */
2279: void
2280: nfsrv_wakenfsd(slp)
2281: struct nfssvc_sock *slp;
2282: {
2283: register struct nfsd *nd;
2284:
2285: if ((slp->ns_flag & SLP_VALID) == 0)
2286: return;
2287: for (nd = nfsd_head.tqh_first; nd != 0; nd = nd->nfsd_chain.tqe_next) {
2288: if (nd->nfsd_flag & NFSD_WAITING) {
2289: nd->nfsd_flag &= ~NFSD_WAITING;
2290: if (nd->nfsd_slp)
2291: panic("nfsd wakeup");
2292: slp->ns_sref++;
2293: nd->nfsd_slp = slp;
2294: wakeup((caddr_t)nd);
2295: return;
2296: }
2297: }
2298: slp->ns_flag |= SLP_DOREC;
2299: nfsd_head_flag |= NFSD_CHECKSLP;
2300: }
2301: #endif /* NFS_NOSERVER */
2302:
2303: static int
2304: nfs_msg(p, server, msg)
2305: struct proc *p;
2306: char *server, *msg;
2307: {
2308: tpr_t tpr;
2309:
2310: if (p)
2311: tpr = tprintf_open(p);
2312: else
2313: tpr = NULL;
2314: tprintf(tpr, "nfs server %s: %s\n", server, msg);
2315: tprintf_close(tpr);
2316: return (0);
2317: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.