|
|
1.1 root 1: /*
2: * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3: *
4: * @APPLE_LICENSE_HEADER_START@
5: *
6: * The contents of this file constitute Original Code as defined in and
7: * are subject to the Apple Public Source License Version 1.1 (the
8: * "License"). You may not use this file except in compliance with the
9: * License. Please obtain a copy of the License at
10: * http://www.apple.com/publicsource and read it before using this file.
11: *
12: * This Original Code and all software distributed under the License are
13: * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14: * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15: * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16: * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17: * License for the specific language governing rights and limitations
18: * under the License.
19: *
20: * @APPLE_LICENSE_HEADER_END@
21: */
22: /*
23: * Copyright (c) 1982, 1986, 1989, 1990, 1993
24: * The Regents of the University of California. All rights reserved.
25: *
26: * sendfile(2) and related extensions:
27: * Copyright (c) 1998, David Greenman. All rights reserved.
28: *
29: * Redistribution and use in source and binary forms, with or without
30: * modification, are permitted provided that the following conditions
31: * are met:
32: * 1. Redistributions of source code must retain the above copyright
33: * notice, this list of conditions and the following disclaimer.
34: * 2. Redistributions in binary form must reproduce the above copyright
35: * notice, this list of conditions and the following disclaimer in the
36: * documentation and/or other materials provided with the distribution.
37: * 3. All advertising materials mentioning features or use of this software
38: * must display the following acknowledgement:
39: * This product includes software developed by the University of
40: * California, Berkeley and its contributors.
41: * 4. Neither the name of the University nor the names of its contributors
42: * may be used to endorse or promote products derived from this software
43: * without specific prior written permission.
44: *
45: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
46: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
49: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55: * SUCH DAMAGE.
56: *
57: * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
58: */
59:
60:
61:
62: #include <sys/param.h>
63: #include <sys/systm.h>
64: #include <sys/filedesc.h>
65: #include <sys/proc.h>
66: #include <sys/file.h>
67: #include <sys/buf.h>
68: #include <sys/malloc.h>
69: #include <sys/mbuf.h>
70: #include <sys/protosw.h>
71: #include <sys/socket.h>
72: #include <sys/socketvar.h>
73: #if KTRACE
74: #include <sys/ktrace.h>
75: #endif
76: #include <sys/kernel.h>
77:
78: #if NEXT
79: #import <sys/kdebug.h>
80:
81: #if KDEBUG
82:
83: #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
84: #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
85: #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
86: #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
87: #define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1)
88: #define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1)
89: #define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1)
90: #define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8))
91: #define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8))
92: #define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8))
93:
94:
95: #endif
96:
97: #endif
98:
99: struct getsockname_args {
100: int fdes;
101: caddr_t asa;
102: int *alen;
103: };
104:
105: struct getsockopt_args {
106: int s;
107: int level;
108: int name;
109: caddr_t val;
110: int *avalsize;
111: } ;
112:
113: struct accept_args {
114: int s;
115: caddr_t name;
116: int *anamelen;
117: };
118:
119: struct getpeername_args {
120: int fdes;
121: caddr_t asa;
122: int *alen;
123: };
124:
125:
126: /* ARGSUSED */
127:
128: #if SENDFILE
129: static void sf_buf_init(void *arg);
130: SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
131: static struct sf_buf *sf_buf_alloc(void);
132: static void sf_buf_ref(caddr_t addr, u_int size);
133: static void sf_buf_free(caddr_t addr, u_int size);
134:
135: static SLIST_HEAD(, sf_buf) sf_freelist;
136: static vm_offset_t sf_base;
137: static struct sf_buf *sf_bufs;
138: static int sf_buf_alloc_want;
139: #endif
140:
141: static int sendit __P((struct proc *p, int s, struct msghdr *mp, int flags, register_t *retval));
142: static int recvit __P((struct proc *p, int s, struct msghdr *mp,
143: caddr_t namelenp, register_t *retval));
144:
145: static int accept1 __P((struct proc *p, struct accept_args *uap, register_t *retval, int compat));
146: static int getsockname1 __P((struct proc *p, struct getsockname_args *uap,
147: register_t *retval, int compat));
148: static int getpeername1 __P((struct proc *p, struct getpeername_args *uap,
149: register_t *retval, int compat));
150:
151: /*
152: * System call interface to the socket abstraction.
153: */
154: #if COMPAT_43 || defined(COMPAT_SUNOS)
155: #define COMPAT_OLDSOCK
156: #endif
157:
158: extern struct fileops socketops;
159:
160: struct socket_args {
161: int domain;
162: int type;
163: int protocol;
164: };
165: int
166: socket(p, uap, retval)
167: struct proc *p;
168: register struct socket_args *uap;
169: register_t *retval;
170: {
171: struct filedesc *fdp = p->p_fd;
172: struct socket *so;
173: struct file *fp;
174: int fd, error;
175:
176: error = falloc(p, &fp, &fd);
177: if (error)
178: return (error);
179: fp->f_flag = FREAD|FWRITE;
180: fp->f_type = DTYPE_SOCKET;
181: fp->f_ops = &socketops;
182: if (error = socreate(uap->domain, &so, uap->type,
183: uap->protocol)) {
184: fdrelse(p, fd);
185: ffree(fp);
186: } else {
187: fp->f_data = (caddr_t)so;
188: *fdflags(p, fd) &= ~UF_RESERVED;
189: *retval = fd;
190: }
191: return (error);
192: }
193:
194: struct bind_args {
195: int s;
196: caddr_t name;
197: int namelen;
198: };
199:
200: /* ARGSUSED */
201: int
202: bind(p, uap, retval)
203: struct proc *p;
204: register struct bind_args *uap;
205: register_t *retval;
206: {
207: struct file *fp;
208: struct sockaddr *sa;
209: int error;
210:
211: error = getsock(p->p_fd, uap->s, &fp);
212: if (error)
213: return (error);
214: error = getsockaddr(&sa, uap->name, uap->namelen);
215: if (error)
216: return (error);
217: error = sobind((struct socket *)fp->f_data, sa);
218: FREE(sa, M_SONAME);
219: return (error);
220: }
221:
222: struct listen_args {
223: int s;
224: int backlog;
225: };
226:
227:
228:
229: int
230: listen(p, uap, retval)
231: struct proc *p;
232: register struct listen_args *uap;
233: register_t *retval;
234: {
235: struct file *fp;
236: int error;
237:
238: error = getsock(p->p_fd, uap->s, &fp);
239: if (error)
240: return (error);
241: return (solisten((struct socket *)fp->f_data, uap->backlog));
242: }
243:
244: #ifndef COMPAT_OLDSOCK
245: #define accept1 accept
246: #endif
247:
248:
249:
250: int
251: accept1(p, uap, retval, compat)
252: struct proc *p;
253: register struct accept_args *uap;
254: register_t *retval;
255: int compat;
256: {
257: struct file *fp;
258: struct sockaddr *sa;
259: int namelen, error, s;
260: struct socket *head, *so;
261: int fd;
262: short fflag; /* type must match fp->f_flag */
263: int tmpfd;
264:
265: if (uap->name) {
266: error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen,
267: sizeof (namelen));
268: if(error)
269: return (error);
270: }
271: error = getsock(p->p_fd, uap->s, &fp);
272: if (error)
273: return (error);
274: s = splnet();
275: head = (struct socket *)fp->f_data;
276: if ((head->so_options & SO_ACCEPTCONN) == 0) {
277: splx(s);
278: return (EINVAL);
279: }
280: if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
281: splx(s);
282: return (EWOULDBLOCK);
283: }
284: while (head->so_comp.tqh_first == NULL && head->so_error == 0) {
285: if (head->so_state & SS_CANTRCVMORE) {
286: head->so_error = ECONNABORTED;
287: break;
288: }
289: error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH,
290: "accept", 0);
291: if (error) {
292: splx(s);
293: return (error);
294: }
295: }
296: if (head->so_error) {
297: error = head->so_error;
298: head->so_error = 0;
299: splx(s);
300: return (error);
301: }
302:
303:
304: /*
305: * At this point we know that there is at least one connection
306: * ready to be accepted. Remove it from the queue prior to
307: * allocating the file descriptor for it since falloc() may
308: * block allowing another process to accept the connection
309: * instead.
310: */
311: so = head->so_comp.tqh_first;
312: TAILQ_REMOVE(&head->so_comp, so, so_list);
313: head->so_qlen--;
314:
315: fflag = fp->f_flag;
316: error = falloc(p, &fp, &fd);
317: if (error) {
318: /*
319: * Probably ran out of file descriptors. Put the
320: * unaccepted connection back onto the queue and
321: * do another wakeup so some other process might
322: * have a chance at it.
323: */
324: TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
325: head->so_qlen++;
326: wakeup_one(&head->so_timeo);
327: splx(s);
328: return (error);
329: } else {
330: *fdflags(p, fd) &= ~UF_RESERVED;
331: *retval = fd;
332: }
333:
334: so->so_state &= ~SS_COMP;
335: so->so_head = NULL;
336: #if ISFB31
337: if (head->so_pgid != NULL)
338: fsetown(fgetown(head->so_pgid), &so->so_pgid);
339: #endif
340: fp->f_type = DTYPE_SOCKET;
341: fp->f_flag = fflag;
342: fp->f_ops = &socketops;
343: fp->f_data = (caddr_t)so;
344: sa = 0;
345: (void) soaccept(so, &sa);
346: if (sa == 0) {
347: namelen = 0;
348: if (uap->name)
349: goto gotnoname;
350: return 0;
351: }
352: if (uap->name) {
353: /* check sa_len before it is destroyed */
354: if (namelen > sa->sa_len)
355: namelen = sa->sa_len;
356: #ifdef COMPAT_OLDSOCK
357: if (compat)
358: ((struct osockaddr *)sa)->sa_family =
359: sa->sa_family;
360: #endif
361: error = copyout(sa, (caddr_t)uap->name, (u_int)namelen);
362: if (!error)
363: gotnoname:
364: error = copyout((caddr_t)&namelen,
365: (caddr_t)uap->anamelen, sizeof (*uap->anamelen));
366: }
367: FREE(sa, M_SONAME);
368: splx(s);
369: return (error);
370: }
371:
372: int
373: accept(p, uap, retval)
374: struct proc *p;
375: struct accept_args *uap;
376: register_t *retval;
377: {
378:
379: return (accept1(p, uap, retval, 0));
380: }
381:
382: #ifdef COMPAT_OLDSOCK
383: int
384: oaccept(p, uap, retval)
385: struct proc *p;
386: struct accept_args *uap;
387: register_t *retval;
388: {
389:
390: return (accept1(p, uap, retval, 1));
391: }
392: #endif /* COMPAT_OLDSOCK */
393:
394: struct connect_args {
395: int s;
396: caddr_t name;
397: int namelen;
398: };
399: /* ARGSUSED */
400: int
401: connect(p, uap, retval)
402: struct proc *p;
403: register struct connect_args *uap;
404: register_t *retval;
405: {
406: struct file *fp;
407: register struct socket *so;
408: struct sockaddr *sa;
409: int error, s;
410:
411: error = getsock(p->p_fd, uap->s, &fp);
412: if (error)
413: return (error);
414: so = (struct socket *)fp->f_data;
415: if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING))
416: return (EALREADY);
417: error = getsockaddr(&sa, uap->name, uap->namelen);
418: if (error)
419: return (error);
420: error = soconnect(so, sa);
421: if (error)
422: goto bad;
423: if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
424: FREE(sa, M_SONAME);
425: return (EINPROGRESS);
426: }
427: s = splnet();
428: while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
429: error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
430: "connec", 0);
431: if (error)
432: break;
433: }
434: if (error == 0) {
435: error = so->so_error;
436: so->so_error = 0;
437: }
438: splx(s);
439: bad:
440: so->so_state &= ~SS_ISCONNECTING;
441: FREE(sa, M_SONAME);
442: if (error == ERESTART)
443: error = EINTR;
444: return (error);
445: }
446:
447: struct socketpair_args {
448: int domain;
449: int type;
450: int protocol;
451: int *rsv;
452: };
453: int
454: socketpair(p, uap, retval)
455: struct proc *p;
456: register struct socketpair_args *uap;
457: register_t *retval;
458: {
459: register struct filedesc *fdp = p->p_fd;
460: struct file *fp1, *fp2;
461: struct socket *so1, *so2;
462: int fd, error, sv[2];
463:
464: error = socreate(uap->domain, &so1, uap->type, uap->protocol);
465: if (error)
466: return (error);
467: error = socreate(uap->domain, &so2, uap->type, uap->protocol);
468: if (error)
469: goto free1;
470: error = falloc(p, &fp1, &fd);
471: if (error)
472: goto free2;
473: sv[0] = fd;
474: fp1->f_flag = FREAD|FWRITE;
475: fp1->f_type = DTYPE_SOCKET;
476: fp1->f_ops = &socketops;
477: fp1->f_data = (caddr_t)so1;
478: error = falloc(p, &fp2, &fd);
479: if (error)
480: goto free3;
481: fp2->f_flag = FREAD|FWRITE;
482: fp2->f_type = DTYPE_SOCKET;
483: fp2->f_ops = &socketops;
484: fp2->f_data = (caddr_t)so2;
485: sv[1] = fd;
486: error = soconnect2(so1, so2);
487: if (error)
488: goto free4;
489: if (uap->type == SOCK_DGRAM) {
490: /*
491: * Datagram socket connection is asymmetric.
492: */
493: error = soconnect2(so2, so1);
494: if (error)
495: goto free4;
496: }
497: *fdflags(p, sv[0]) &= ~UF_RESERVED;
498: *fdflags(p, sv[1]) &= ~UF_RESERVED;
499: error = copyout((caddr_t)sv, (caddr_t)uap->rsv,
500: 2 * sizeof (int));
501: #if 0 /* old pipe(2) syscall compatability, unused these days */
502: retval[0] = sv[0]; /* XXX ??? */
503: retval[1] = sv[1]; /* XXX ??? */
504: #endif /* 0 */
505: return (error);
506: free4:
507: fdrelse(p, sv[1]);
508: ffree(fp2);
509: free3:
510: fdrelse(p, sv[0]);
511: ffree(fp1);
512: free2:
513: (void)soclose(so2);
514: free1:
515: (void)soclose(so1);
516: return (error);
517: }
518:
519: static int
520: sendit(p, s, mp, flags, retsize)
521: register struct proc *p;
522: int s;
523: register struct msghdr *mp;
524: int flags;
525: register_t *retsize;
526: {
527: struct file *fp;
528: struct uio auio;
529: register struct iovec *iov;
530: register int i;
531: struct mbuf *control;
532: struct sockaddr *to;
533: int len, error;
534: struct socket *so;
535: #if KTRACE
536: struct iovec *ktriov = NULL;
537: #endif
538:
539: KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0,0,0,0,0);
540:
541: if (error = getsock(p->p_fd, s, &fp))
542: {
543: KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0);
544: return (error);
545: }
546:
547: auio.uio_iov = mp->msg_iov;
548: auio.uio_iovcnt = mp->msg_iovlen;
549: auio.uio_segflg = UIO_USERSPACE;
550: auio.uio_rw = UIO_WRITE;
551: auio.uio_procp = p;
552: auio.uio_offset = 0; /* XXX */
553: auio.uio_resid = 0;
554: iov = mp->msg_iov;
555: for (i = 0; i < mp->msg_iovlen; i++, iov++) {
556: if (iov->iov_len < 0)
557: {
558: KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, EINVAL,0,0,0,0);
559: return (EINVAL);
560: }
561:
562: if ((auio.uio_resid += iov->iov_len) < 0)
563: {
564: KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, EINVAL,0,0,0,0);
565: return (EINVAL);
566: }
567: }
568: if (mp->msg_name) {
569: error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
570: if (error)
571: return (error);
572: } else
573: to = 0;
574: if (mp->msg_control) {
575: if (mp->msg_controllen < sizeof(struct cmsghdr)
576: #ifdef COMPAT_OLDSOCK
577: && mp->msg_flags != MSG_COMPAT
578: #endif
579: ) {
580: error = EINVAL;
581: goto bad;
582: }
583: error = sockargs(&control, mp->msg_control,
584: mp->msg_controllen, MT_CONTROL);
585: if (error)
586: goto bad;
587: #ifdef COMPAT_OLDSOCK
588: if (mp->msg_flags == MSG_COMPAT) {
589: register struct cmsghdr *cm;
590:
591: M_PREPEND(control, sizeof(*cm), M_WAIT);
592: if (control == 0) {
593: error = ENOBUFS;
594: goto bad;
595: } else {
596: cm = mtod(control, struct cmsghdr *);
597: cm->cmsg_len = control->m_len;
598: cm->cmsg_level = SOL_SOCKET;
599: cm->cmsg_type = SCM_RIGHTS;
600: }
601: }
602: #endif
603: } else
604: control = 0;
605:
606: len = auio.uio_resid;
607: so = (struct socket *)fp->f_data;
608: error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control,
609: flags);
610: if (error) {
611: if (auio.uio_resid != len && (error == ERESTART ||
612: error == EINTR || error == EWOULDBLOCK))
613: error = 0;
614: if (error == EPIPE)
615: psignal(p, SIGPIPE);
616: }
617: if (error == 0)
618: *retsize = len - auio.uio_resid;
619: #if KTRACE
620: if (ktriov != NULL) {
621: if (error == 0)
622: ktrgenio(p->p_tracep, s, UIO_WRITE,
623: ktriov, *retsize, error);
624: FREE(ktriov, M_TEMP);
625: }
626: #endif
627: bad:
628: if (to)
629: FREE(to, M_SONAME);
630: return (error);
631: }
632:
633:
634: struct sendto_args {
635: int s;
636: caddr_t buf;
637: size_t len;
638: int flags;
639: caddr_t to;
640: int tolen;
641: };
642:
643: int
644: sendto(p, uap, retval)
645: struct proc *p;
646: register struct sendto_args /* {
647: int s;
648: caddr_t buf;
649: size_t len;
650: int flags;
651: caddr_t to;
652: int tolen;
653: } */ *uap;
654: register_t *retval;
655:
656: {
657: struct msghdr msg;
658: struct iovec aiov;
659:
660: msg.msg_name = uap->to;
661: msg.msg_namelen = uap->tolen;
662: msg.msg_iov = &aiov;
663: msg.msg_iovlen = 1;
664: msg.msg_control = 0;
665: #ifdef COMPAT_OLDSOCK
666: msg.msg_flags = 0;
667: #endif
668: aiov.iov_base = uap->buf;
669: aiov.iov_len = uap->len;
670: KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, stat, *retval,0,0,0);
671: return (sendit(p, uap->s, &msg, uap->flags, retval));
672: }
673:
674: #ifdef COMPAT_OLDSOCK
675: struct osend_args {
676: int s;
677: caddr_t buf;
678: int len;
679: int flags;
680: };
681:
682: int
683: osend(p, uap, retval)
684: struct proc *p;
685: register struct osend_args /* {
686: int s;
687: caddr_t buf;
688: int len;
689: int flags;
690: } */ *uap;
691: register_t *retval;
692:
693: {
694: struct msghdr msg;
695: struct iovec aiov;
696:
697: msg.msg_name = 0;
698: msg.msg_namelen = 0;
699: msg.msg_iov = &aiov;
700: msg.msg_iovlen = 1;
701: aiov.iov_base = uap->buf;
702: aiov.iov_len = uap->len;
703: msg.msg_control = 0;
704: msg.msg_flags = 0;
705: return (sendit(p, uap->s, &msg, uap->flags, retval));
706: }
707: struct osendmsg_args {
708: int s;
709: caddr_t msg;
710: int flags;
711: };
712:
713: int
714: osendmsg(p, uap, retval)
715: struct proc *p;
716: register struct osendmsg_args /* {
717: int s;
718: caddr_t msg;
719: int flags;
720: } */ *uap;
721: register_t *retval;
722:
723: {
724: struct msghdr msg;
725: struct iovec aiov[UIO_SMALLIOV], *iov;
726: int error;
727:
728: error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr));
729: if (error)
730: return (error);
731: if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
732: if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
733: return (EMSGSIZE);
734: MALLOC(iov, struct iovec *,
735: sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
736: M_WAITOK);
737: } else
738: iov = aiov;
739: error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
740: (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
741: if (error)
742: goto done;
743: msg.msg_flags = MSG_COMPAT;
744: msg.msg_iov = iov;
745: error = sendit(p, uap->s, &msg, uap->flags, retval);
746: done:
747: if (iov != aiov)
748: FREE(iov, M_IOV);
749: return (error);
750: }
751: #endif
752:
753: struct sendmsg_args {
754: int s;
755: caddr_t msg;
756: int flags;
757: };
758:
759: int
760: sendmsg(p, uap, retval)
761: struct proc *p;
762: register struct sendmsg_args *uap;
763: register_t *retval;
764: {
765: struct msghdr msg;
766: struct iovec aiov[UIO_SMALLIOV], *iov;
767: int error;
768:
769: error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg));
770: if (error)
771: return (error);
772: if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
773: if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
774: return (EMSGSIZE);
775: MALLOC(iov, struct iovec *,
776: sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
777: M_WAITOK);
778: } else
779: iov = aiov;
780: if (msg.msg_iovlen &&
781: (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
782: (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
783: goto done;
784: msg.msg_iov = iov;
785: #ifdef COMPAT_OLDSOCK
786: msg.msg_flags = 0;
787: #endif
788: error = sendit(p, uap->s, &msg, uap->flags, retval);
789: done:
790: if (iov != aiov)
791: FREE(iov, M_IOV);
792: return (error);
793: }
794:
795: static int
796: recvit(p, s, mp, namelenp, retval)
797: register struct proc *p;
798: int s;
799: register struct msghdr *mp;
800: caddr_t namelenp;
801: register_t *retval;
802: {
803: struct file *fp;
804: struct uio auio;
805: register struct iovec *iov;
806: register int i;
807: int len, error;
808: struct mbuf *m, *control = 0;
809: caddr_t ctlbuf;
810: struct socket *so;
811: struct sockaddr *fromsa = 0;
812: #if KTRACE
813: struct iovec *ktriov = NULL;
814: #endif
815:
816: error = getsock(p->p_fd, s, &fp);
817: if (error)
818: return (error);
819: auio.uio_iov = mp->msg_iov;
820: auio.uio_iovcnt = mp->msg_iovlen;
821: auio.uio_segflg = UIO_USERSPACE;
822: auio.uio_rw = UIO_READ;
823: auio.uio_procp = p;
824: auio.uio_offset = 0; /* XXX */
825: auio.uio_resid = 0;
826: iov = mp->msg_iov;
827: for (i = 0; i < mp->msg_iovlen; i++, iov++) {
828: if ((auio.uio_resid += iov->iov_len) < 0)
829: return (EINVAL);
830: }
831: #if KTRACE
832: if (KTRPOINT(p, KTR_GENIO)) {
833: int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
834:
835: MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
836: bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
837: }
838: #endif
839: len = auio.uio_resid;
840: so = (struct socket *)fp->f_data;
841: error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
842: (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
843: &mp->msg_flags);
844: if (error) {
845: if (auio.uio_resid != len && (error == ERESTART ||
846: error == EINTR || error == EWOULDBLOCK))
847: error = 0;
848: }
849: #if KTRACE
850: if (ktriov != NULL) {
851: if (error == 0)
852: ktrgenio(p->p_tracep, s, UIO_WRITE,
853: ktriov, len - auio.uio_resid, error);
854: FREE(ktriov, M_TEMP);
855: }
856: #endif
857: if (error)
858: goto out;
859: *retval = len - auio.uio_resid;
860: if (mp->msg_name) {
861: len = mp->msg_namelen;
862: if (len <= 0 || fromsa == 0)
863: len = 0;
864: else {
865: #ifndef MIN
866: #define MIN(a,b) ((a)>(b)?(b):(a))
867: #endif
868: /* save sa_len before it is destroyed by MSG_COMPAT */
869: len = MIN(len, fromsa->sa_len);
870: #ifdef COMPAT_OLDSOCK
871: if (mp->msg_flags & MSG_COMPAT)
872: ((struct osockaddr *)fromsa)->sa_family =
873: fromsa->sa_family;
874: #endif
875: error = copyout(fromsa,
876: (caddr_t)mp->msg_name, (unsigned)len);
877: if (error)
878: goto out;
879: }
880: mp->msg_namelen = len;
881: if (namelenp &&
882: (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
883: #ifdef COMPAT_OLDSOCK
884: if (mp->msg_flags & MSG_COMPAT)
885: error = 0; /* old recvfrom didn't check */
886: else
887: #endif
888: goto out;
889: }
890: }
891: if (mp->msg_control) {
892: #ifdef COMPAT_OLDSOCK
893: /*
894: * We assume that old recvmsg calls won't receive access
895: * rights and other control info, esp. as control info
896: * is always optional and those options didn't exist in 4.3.
897: * If we receive rights, trim the cmsghdr; anything else
898: * is tossed.
899: */
900: if (control && mp->msg_flags & MSG_COMPAT) {
901: if (mtod(control, struct cmsghdr *)->cmsg_level !=
902: SOL_SOCKET ||
903: mtod(control, struct cmsghdr *)->cmsg_type !=
904: SCM_RIGHTS) {
905: mp->msg_controllen = 0;
906: goto out;
907: }
908: control->m_len -= sizeof (struct cmsghdr);
909: control->m_data += sizeof (struct cmsghdr);
910: }
911: #endif
912: len = mp->msg_controllen;
913: m = control;
914: mp->msg_controllen = 0;
915: ctlbuf = (caddr_t) mp->msg_control;
916:
917: while (m && len > 0) {
918: unsigned int tocopy;
919:
920: if (len >= m->m_len)
921: tocopy = m->m_len;
922: else {
923: mp->msg_flags |= MSG_CTRUNC;
924: tocopy = len;
925: }
926:
927: if (error = copyout((caddr_t)mtod(m, caddr_t),
928: ctlbuf, tocopy))
929: goto out;
930:
931: ctlbuf += tocopy;
932: len -= tocopy;
933: m = m->m_next;
934: }
935: mp->msg_controllen = ctlbuf - mp->msg_control;
936: }
937: out:
938: if (fromsa)
939: FREE(fromsa, M_SONAME);
940: if (control)
941: m_freem(control);
942: return (error);
943: }
944:
945:
946: struct recvfrom_args {
947: int s;
948: caddr_t buf;
949: size_t len;
950: int flags;
951: caddr_t from;
952: int *fromlenaddr;
953: };
954:
955: int
956: recvfrom(p, uap, retval)
957: struct proc *p;
958: register struct recvfrom_args /* {
959: int s;
960: caddr_t buf;
961: size_t len;
962: int flags;
963: caddr_t from;
964: int *fromlenaddr;
965: } */ *uap;
966: register_t *retval;
967: {
968: struct msghdr msg;
969: struct iovec aiov;
970: int error;
971:
972: if (uap->fromlenaddr) {
973: error = copyin((caddr_t)uap->fromlenaddr,
974: (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
975: if (error)
976: return (error);
977: } else
978: msg.msg_namelen = 0;
979: msg.msg_name = uap->from;
980: msg.msg_iov = &aiov;
981: msg.msg_iovlen = 1;
982: aiov.iov_base = uap->buf;
983: aiov.iov_len = uap->len;
984: msg.msg_control = 0;
985: msg.msg_flags = uap->flags;
986: return (recvit(p, uap->s, &msg, (caddr_t)uap->fromlenaddr, retval));
987: }
988:
989: #ifdef COMPAT_OLDSOCK
990: int
991: orecvfrom(p, uap, retval)
992: struct proc *p;
993: struct recvfrom_args *uap;
994: register_t *retval;
995: {
996:
997: uap->flags |= MSG_COMPAT;
998: return (recvfrom(p, uap));
999: }
1000: #endif
1001:
1002:
1003: #ifdef COMPAT_OLDSOCK
1004: int
1005: orecv(p, uap, retval)
1006: struct proc *p;
1007: register struct orecv_args {
1008: int s;
1009: caddr_t buf;
1010: int len;
1011: int flags;
1012: } *uap;
1013: register_t *retval;
1014: {
1015: struct msghdr msg;
1016: struct iovec aiov;
1017:
1018: msg.msg_name = 0;
1019: msg.msg_namelen = 0;
1020: msg.msg_iov = &aiov;
1021: msg.msg_iovlen = 1;
1022: aiov.iov_base = uap->buf;
1023: aiov.iov_len = uap->len;
1024: msg.msg_control = 0;
1025: msg.msg_flags = uap->flags;
1026: return (recvit(p, uap->s, &msg, (caddr_t)0, retval));
1027: }
1028:
1029: /*
1030: * Old recvmsg. This code takes advantage of the fact that the old msghdr
1031: * overlays the new one, missing only the flags, and with the (old) access
1032: * rights where the control fields are now.
1033: */
1034: int
1035: orecvmsg(p, uap, retval)
1036: struct proc *p;
1037: register struct orecvmsg_args {
1038: int s;
1039: struct omsghdr *msg;
1040: int flags;
1041: } *uap;
1042: register_t *retval;
1043: {
1044: struct msghdr msg;
1045: struct iovec aiov[UIO_SMALLIOV], *iov;
1046: int error;
1047:
1048: error = copyin((caddr_t)uap->msg, (caddr_t)&msg,
1049: sizeof (struct omsghdr));
1050: if (error)
1051: return (error);
1052: if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1053: if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
1054: return (EMSGSIZE);
1055: MALLOC(iov, struct iovec *,
1056: sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1057: M_WAITOK);
1058: } else
1059: iov = aiov;
1060: msg.msg_flags = uap->flags | MSG_COMPAT;
1061: error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
1062: (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1063: if (error)
1064: goto done;
1065: msg.msg_iov = iov;
1066: error = recvit(p, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen, retval);
1067:
1068: if (msg.msg_controllen && error == 0)
1069: error = copyout((caddr_t)&msg.msg_controllen,
1070: (caddr_t)&uap->msg->msg_accrightslen, sizeof (int));
1071: done:
1072: if (iov != aiov)
1073: FREE(iov, M_IOV);
1074: return (error);
1075: }
1076: #endif
1077:
1078: int
1079: recvmsg(p, uap, retval)
1080: struct proc *p;
1081: register struct recvmsg_args {
1082: int s;
1083: struct msghdr *msg;
1084: int flags;
1085: } *uap;
1086: register_t *retval;
1087: {
1088: struct msghdr msg;
1089: struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
1090: register int error;
1091:
1092: error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg));
1093: if (error)
1094: return (error);
1095: if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1096: if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
1097: return (EMSGSIZE);
1098: MALLOC(iov, struct iovec *,
1099: sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1100: M_WAITOK);
1101: } else
1102: iov = aiov;
1103: #ifdef COMPAT_OLDSOCK
1104: msg.msg_flags = uap->flags &~ MSG_COMPAT;
1105: #else
1106: msg.msg_flags = uap->flags;
1107: #endif
1108: uiov = msg.msg_iov;
1109: msg.msg_iov = iov;
1110: error = copyin((caddr_t)uiov, (caddr_t)iov,
1111: (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1112: if (error)
1113: goto done;
1114: error = recvit(p, uap->s, &msg, (caddr_t)0, retval);
1115: if (!error) {
1116: msg.msg_iov = uiov;
1117: error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg));
1118: }
1119: done:
1120: if (iov != aiov)
1121: FREE(iov, M_IOV);
1122: return (error);
1123: }
1124:
1125: /* ARGSUSED */
1126: int
1127: shutdown(p, uap, retval)
1128: struct proc *p;
1129: register struct shutdown_args {
1130: int s;
1131: int how;
1132: } *uap;
1133: register_t *retval;
1134: {
1135: struct file *fp;
1136: int error;
1137:
1138: error = getsock(p->p_fd, uap->s, &fp);
1139: if (error)
1140: return (error);
1141: return (soshutdown((struct socket *)fp->f_data, uap->how));
1142: }
1143:
1144:
1145:
1146:
1147:
1148: /* ARGSUSED */
1149: int
1150: setsockopt(p, uap, retval)
1151: struct proc *p;
1152: register struct setsockopt_args {
1153: int s;
1154: int level;
1155: int name;
1156: caddr_t val;
1157: int valsize;
1158: } *uap;
1159: register_t *retval;
1160: {
1161: struct file *fp;
1162: struct sockopt sopt;
1163: int error;
1164:
1165: if (uap->val == 0 && uap->valsize != 0)
1166: return (EFAULT);
1167: if (uap->valsize < 0)
1168: return (EINVAL);
1169:
1170: error = getsock(p->p_fd, uap->s, &fp);
1171: if (error)
1172: return (error);
1173:
1174: sopt.sopt_dir = SOPT_SET;
1175: sopt.sopt_level = uap->level;
1176: sopt.sopt_name = uap->name;
1177: sopt.sopt_val = uap->val;
1178: sopt.sopt_valsize = uap->valsize;
1179: sopt.sopt_p = p;
1180:
1181: return (sosetopt((struct socket *)fp->f_data, &sopt));
1182: }
1183:
1184:
1185:
1186: int
1187: getsockopt(p, uap, retval)
1188: struct proc *p;
1189: struct getsockopt_args *uap;
1190: register_t *retval;
1191: {
1192: int valsize, error;
1193: struct file *fp;
1194: struct sockopt sopt;
1195:
1196: error = getsock(p->p_fd, uap->s, &fp);
1197: if (error)
1198: return (error);
1199: if (uap->val) {
1200: error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize,
1201: sizeof (valsize));
1202: if (error)
1203: return (error);
1204: if (valsize < 0)
1205: return (EINVAL);
1206: } else
1207: valsize = 0;
1208:
1209: sopt.sopt_dir = SOPT_GET;
1210: sopt.sopt_level = uap->level;
1211: sopt.sopt_name = uap->name;
1212: sopt.sopt_val = uap->val;
1213: sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1214: sopt.sopt_p = p;
1215:
1216: error = sogetopt((struct socket *)fp->f_data, &sopt);
1217: if (error == 0) {
1218: valsize = sopt.sopt_valsize;
1219: error = copyout((caddr_t)&valsize,
1220: (caddr_t)uap->avalsize, sizeof (valsize));
1221: }
1222: return (error);
1223: }
1224:
1225:
1226:
1227: struct pipe_args {
1228: int dummy;
1229: };
1230: /* ARGSUSED */
1231: int
1232: pipe(p, uap, retval)
1233: struct proc *p;
1234: struct pipe_args *uap;
1235: register_t *retval;
1236: {
1237: struct file *rf, *wf;
1238: struct socket *rso, *wso;
1239: int fd, error;
1240:
1241: if (error = socreate(AF_UNIX, &rso, SOCK_STREAM, 0))
1242: return (error);
1243: if (error = socreate(AF_UNIX, &wso, SOCK_STREAM, 0))
1244: goto free1;
1245: if (error = falloc(p, &rf, &fd))
1246: goto free2;
1247: retval[0] = fd;
1248: rf->f_flag = FREAD;
1249: rf->f_type = DTYPE_SOCKET;
1250: rf->f_ops = &socketops;
1251: rf->f_data = (caddr_t)rso;
1252: if (error = falloc(p, &wf, &fd))
1253: goto free3;
1254: wf->f_flag = FWRITE;
1255: wf->f_type = DTYPE_SOCKET;
1256: wf->f_ops = &socketops;
1257: wf->f_data = (caddr_t)wso;
1258: retval[1] = fd;
1259: if (error = unp_connect2(wso, rso))
1260: goto free4;
1261: *fdflags(p, retval[0]) &= ~UF_RESERVED;
1262: *fdflags(p, retval[1]) &= ~UF_RESERVED;
1263: return (0);
1264: free4:
1265: fdrelse(p, retval[1]);
1266: ffree(wf);
1267: free3:
1268: fdrelse(p, retval[0]);
1269: ffree(rf);
1270: free2:
1271: (void)soclose(wso);
1272: free1:
1273: (void)soclose(rso);
1274: return (error);
1275: }
1276:
1277:
1278: /*
1279: * Get socket name.
1280: */
1281: /* ARGSUSED */
1282: static int
1283: getsockname1(p, uap, retval, compat)
1284: struct proc *p;
1285: register struct getsockname_args *uap;
1286: register_t *retval;
1287: int compat;
1288: {
1289: struct file *fp;
1290: register struct socket *so;
1291: struct sockaddr *sa;
1292: int len, error;
1293:
1294: error = getsock(p->p_fd, uap->fdes, &fp);
1295: if (error)
1296: return (error);
1297: error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1298: if (error)
1299: return (error);
1300: so = (struct socket *)fp->f_data;
1301: sa = 0;
1302: error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1303: if (error)
1304: goto bad;
1305: if (sa == 0) {
1306: len = 0;
1307: goto gotnothing;
1308: }
1309:
1310: len = MIN(len, sa->sa_len);
1311: #ifdef COMPAT_OLDSOCK
1312: if (compat)
1313: ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1314: #endif
1315: error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1316: if (error == 0)
1317: gotnothing:
1318: error = copyout((caddr_t)&len, (caddr_t)uap->alen,
1319: sizeof (len));
1320: bad:
1321: if (sa)
1322: FREE(sa, M_SONAME);
1323: return (error);
1324: }
1325:
1326: int
1327: getsockname(p, uap, retval)
1328: struct proc *p;
1329: struct getsockname_args *uap;
1330: register_t *retval;
1331: {
1332:
1333: return (getsockname1(p, uap, retval, 0));
1334: }
1335:
1336: #ifdef COMPAT_OLDSOCK
1337: int
1338: ogetsockname(p, uap, retval)
1339: struct proc *p;
1340: struct getsockname_args *uap;
1341: register_t *retval;
1342: {
1343:
1344: return (getsockname1(p, uap, retval, 1));
1345: }
1346: #endif /* COMPAT_OLDSOCK */
1347:
1348: /*
1349: * Get name of peer for connected socket.
1350: */
1351: /* ARGSUSED */
1352: int
1353: getpeername1(p, uap, retval, compat)
1354: struct proc *p;
1355: register struct getpeername_args *uap;
1356: register_t *retval;
1357: int compat;
1358: {
1359: struct file *fp;
1360: register struct socket *so;
1361: struct sockaddr *sa;
1362: int len, error;
1363:
1364: error = getsock(p->p_fd, uap->fdes, &fp);
1365: if (error)
1366: return (error);
1367: so = (struct socket *)fp->f_data;
1368: if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0)
1369: return (ENOTCONN);
1370: error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1371: if (error)
1372: return (error);
1373: sa = 0;
1374: error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1375: if (error)
1376: goto bad;
1377: if (sa == 0) {
1378: len = 0;
1379: goto gotnothing;
1380: }
1381: len = MIN(len, sa->sa_len);
1382: #ifdef COMPAT_OLDSOCK
1383: if (compat)
1384: ((struct osockaddr *)sa)->sa_family =
1385: sa->sa_family;
1386: #endif
1387: error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1388: if (error)
1389: goto bad;
1390: gotnothing:
1391: error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len));
1392: bad:
1393: if (sa) FREE(sa, M_SONAME);
1394: return (error);
1395: }
1396:
1397: int
1398: getpeername(p, uap, retval)
1399: struct proc *p;
1400: struct getpeername_args *uap;
1401: register_t *retval;
1402: {
1403:
1404: return (getpeername1(p, uap, retval, 0));
1405: }
1406:
1407: #ifdef COMPAT_OLDSOCK
1408: int
1409: ogetpeername(p, uap, retval)
1410: struct proc *p;
1411: struct ogetpeername_args *uap;
1412: register_t *retval;
1413: {
1414:
1415: /* XXX uap should have type `getpeername_args *' to begin with. */
1416: return (getpeername1(p, (struct getpeername_args *)uap, retval, 1));
1417: }
1418: #endif /* COMPAT_OLDSOCK */
1419:
1420: int
1421: sockargs(mp, buf, buflen, type)
1422: struct mbuf **mp;
1423: caddr_t buf;
1424: int buflen, type;
1425: {
1426: register struct sockaddr *sa;
1427: register struct mbuf *m;
1428: int error;
1429:
1430: if ((u_int)buflen > MLEN) {
1431: #ifdef COMPAT_OLDSOCK
1432: if (type == MT_SONAME && (u_int)buflen <= 112)
1433: buflen = MLEN; /* unix domain compat. hack */
1434: else
1435: #endif
1436: return (EINVAL);
1437: }
1438: m = m_get(M_WAIT, type);
1439: if (m == NULL)
1440: return (ENOBUFS);
1441: m->m_len = buflen;
1442: error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1443: if (error)
1444: (void) m_free(m);
1445: else {
1446: *mp = m;
1447: if (type == MT_SONAME) {
1448: sa = mtod(m, struct sockaddr *);
1449:
1450: #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1451: if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1452: sa->sa_family = sa->sa_len;
1453: #endif
1454: sa->sa_len = buflen;
1455: }
1456: }
1457: return (error);
1458: }
1459:
1460: int
1461: getsockaddr(namp, uaddr, len)
1462: struct sockaddr **namp;
1463: caddr_t uaddr;
1464: size_t len;
1465: {
1466: struct sockaddr *sa;
1467: int error;
1468:
1469: if (len > SOCK_MAXADDRLEN)
1470: return ENAMETOOLONG;
1471: MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1472: error = copyin(uaddr, sa, len);
1473: if (error) {
1474: FREE(sa, M_SONAME);
1475: } else {
1476: #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1477: if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1478: sa->sa_family = sa->sa_len;
1479: #endif
1480: sa->sa_len = len;
1481: *namp = sa;
1482: }
1483: return error;
1484: }
1485:
1486: int
1487: getsock(fdp, fdes, fpp)
1488: struct filedesc *fdp;
1489: int fdes;
1490: struct file **fpp;
1491: {
1492: register struct file *fp;
1493:
1494: if ((unsigned)fdes >= fdp->fd_nfiles ||
1495: (fp = fdp->fd_ofiles[fdes]) == NULL ||
1496: (fdp->fd_ofileflags[fdes] & UF_RESERVED))
1497: return (EBADF);
1498: if (fp->f_type != DTYPE_SOCKET)
1499: return (ENOTSOCK);
1500: *fpp = fp;
1501: return (0);
1502: }
1503:
1504: #if SENDFILE
1505: /*
1506: * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
1507: * XXX - The sf_buf functions are currently private to sendfile(2), so have
1508: * been made static, but may be useful in the future for doing zero-copy in
1509: * other parts of the networking code.
1510: */
1511: static void
1512: sf_buf_init(void *arg)
1513: {
1514: int i;
1515:
1516: SLIST_INIT(&sf_freelist);
1517: sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE);
1518: sf_bufs = _MALLOC(nsfbufs * sizeof(struct sf_buf), M_TEMP, M_NOWAIT);
1519: bzero(sf_bufs, nsfbufs * sizeof(struct sf_buf));
1520: for (i = 0; i < nsfbufs; i++) {
1521: sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
1522: SLIST_INSERT_HEAD(&sf_freelist, &sf_bufs[i], free_list);
1523: }
1524: }
1525:
1526: /*
1527: * Get an sf_buf from the freelist. Will block if none are available.
1528: */
1529: static struct sf_buf *
1530: sf_buf_alloc()
1531: {
1532: struct sf_buf *sf;
1533: int s;
1534:
1535: s = splimp();
1536: while ((sf = SLIST_FIRST(&sf_freelist)) == NULL) {
1537: sf_buf_alloc_want = 1;
1538: tsleep(&sf_freelist, PVM, "sfbufa", 0);
1539: }
1540: SLIST_REMOVE_HEAD(&sf_freelist, free_list);
1541: splx(s);
1542: sf->refcnt = 1;
1543: return (sf);
1544: }
1545:
1546: #define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
1547: static void
1548: sf_buf_ref(caddr_t addr, u_int size)
1549: {
1550: struct sf_buf *sf;
1551:
1552: sf = dtosf(addr);
1553: if (sf->refcnt == 0)
1554: panic("sf_buf_ref: referencing a free sf_buf");
1555: sf->refcnt++;
1556: }
1557:
1558: /*
1559: * Lose a reference to an sf_buf. When none left, detach mapped page
1560: * and release resources back to the system.
1561: *
1562: * Must be called at splimp.
1563: */
1564: static void
1565: sf_buf_free(caddr_t addr, u_int size)
1566: {
1567: struct sf_buf *sf;
1568: struct vm_page *m;
1569: int s;
1570:
1571: sf = dtosf(addr);
1572: if (sf->refcnt == 0)
1573: panic("sf_buf_free: freeing free sf_buf");
1574: sf->refcnt--;
1575: if (sf->refcnt == 0) {
1576: pmap_qremove((vm_offset_t)addr, 1);
1577: m = sf->m;
1578: s = splvm();
1579: vm_page_unwire(m, 0);
1580: /*
1581: * Check for the object going away on us. This can
1582: * happen since we don't hold a reference to it.
1583: * If so, we're responsible for freeing the page.
1584: */
1585: if (m->wire_count == 0 && m->object == NULL)
1586: vm_page_free(m);
1587: splx(s);
1588: sf->m = NULL;
1589: SLIST_INSERT_HEAD(&sf_freelist, sf, free_list);
1590: if (sf_buf_alloc_want) {
1591: sf_buf_alloc_want = 0;
1592: wakeup(&sf_freelist);
1593: }
1594: }
1595: }
1596:
1597: /*
1598: * sendfile(2).
1599: * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1600: * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1601: *
1602: * Send a file specified by 'fd' and starting at 'offset' to a socket
1603: * specified by 's'. Send only 'nbytes' of the file or until EOF if
1604: * nbytes == 0. Optionally add a header and/or trailer to the socket
1605: * output. If specified, write the total number of bytes sent into *sbytes.
1606: */
1607: int
1608: sendfile(struct proc *p, struct sendfile_args *uap)
1609: {
1610: struct file *fp;
1611: struct filedesc *fdp = p->p_fd;
1612: struct vnode *vp;
1613: struct vm_object *obj;
1614: struct socket *so;
1615: struct mbuf *m;
1616: struct sf_buf *sf;
1617: struct vm_page *pg;
1618: struct writev_args nuap;
1619: struct sf_hdtr hdtr;
1620: off_t off, xfsize, sbytes = 0;
1621: int error = 0, s;
1622:
1623: /*
1624: * Do argument checking. Must be a regular file in, stream
1625: * type and connected socket out, positive offset.
1626: */
1627: if (((u_int)uap->fd) >= fdp->fd_nfiles ||
1628: (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
1629: (fp->f_flag & FREAD) == 0) {
1630: error = EBADF;
1631: goto done;
1632: }
1633: if (fp->f_type != DTYPE_VNODE) {
1634: error = EINVAL;
1635: goto done;
1636: }
1637: vp = (struct vnode *)fp->f_data;
1638: obj = vp->v_object;
1639: if (vp->v_type != VREG || obj == NULL) {
1640: error = EINVAL;
1641: goto done;
1642: }
1643: error = getsock(p->p_fd, uap->s, &fp);
1644: if (error)
1645: goto done;
1646: so = (struct socket *)fp->f_data;
1647: if (so->so_type != SOCK_STREAM) {
1648: error = EINVAL;
1649: goto done;
1650: }
1651: if ((so->so_state & SS_ISCONNECTED) == 0) {
1652: error = ENOTCONN;
1653: goto done;
1654: }
1655: if (uap->offset < 0) {
1656: error = EINVAL;
1657: goto done;
1658: }
1659:
1660: /*
1661: * If specified, get the pointer to the sf_hdtr struct for
1662: * any headers/trailers.
1663: */
1664: if (uap->hdtr != NULL) {
1665: error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1666: if (error)
1667: goto done;
1668: /*
1669: * Send any headers. Wimp out and use writev(2).
1670: */
1671: if (hdtr.headers != NULL) {
1672: nuap.fd = uap->s;
1673: nuap.iovp = hdtr.headers;
1674: nuap.iovcnt = hdtr.hdr_cnt;
1675: error = writev(p, &nuap);
1676: if (error)
1677: goto done;
1678: sbytes += p->p_retval[0];
1679: }
1680: }
1681:
1682: /*
1683: * Protect against multiple writers to the socket.
1684: */
1685: (void) sblock(&so->so_snd, M_WAIT);
1686:
1687: /*
1688: * Loop through the pages in the file, starting with the requested
1689: * offset. Get a file page (do I/O if necessary), map the file page
1690: * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1691: * it on the socket.
1692: */
1693: for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1694: vm_pindex_t pindex;
1695: vm_offset_t pgoff;
1696:
1697: pindex = OFF_TO_IDX(off);
1698: retry_lookup:
1699: /*
1700: * Calculate the amount to transfer. Not to exceed a page,
1701: * the EOF, or the passed in nbytes.
1702: */
1703: xfsize = obj->un_pager.vnp.vnp_size - off;
1704: if (xfsize > PAGE_SIZE)
1705: xfsize = PAGE_SIZE;
1706: pgoff = (vm_offset_t)(off & PAGE_MASK);
1707: if (PAGE_SIZE - pgoff < xfsize)
1708: xfsize = PAGE_SIZE - pgoff;
1709: if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1710: xfsize = uap->nbytes - sbytes;
1711: if (xfsize <= 0)
1712: break;
1713: /*
1714: * Optimize the non-blocking case by looking at the socket space
1715: * before going to the extra work of constituting the sf_buf.
1716: */
1717: if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1718: if (so->so_state & SS_CANTSENDMORE)
1719: error = EPIPE;
1720: else
1721: error = EAGAIN;
1722: sbunlock(&so->so_snd);
1723: goto done;
1724: }
1725: /*
1726: * Attempt to look up the page. If the page doesn't exist or the
1727: * part we're interested in isn't valid, then read it from disk.
1728: * If some other part of the kernel has this page (i.e. it's busy),
1729: * then disk I/O may be occuring on it, so wait and retry.
1730: */
1731: pg = vm_page_lookup(obj, pindex);
1732: if (pg == NULL || (!(pg->flags & PG_BUSY) && !pg->busy &&
1733: !vm_page_is_valid(pg, pgoff, xfsize))) {
1734: struct uio auio;
1735: struct iovec aiov;
1736: int bsize;
1737:
1738: if (pg == NULL) {
1739: pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL);
1740: if (pg == NULL) {
1741: VM_WAIT;
1742: goto retry_lookup;
1743: }
1744: /*
1745: * don't just clear PG_BUSY manually -
1746: * vm_page_alloc() should be considered opaque,
1747: * use the VM routine provided to clear
1748: * PG_BUSY.
1749: */
1750: vm_page_wakeup(pg);
1751:
1752: }
1753: /*
1754: * Ensure that our page is still around when the I/O completes.
1755: */
1756: vm_page_io_start(pg);
1757: vm_page_wire(pg);
1758: /*
1759: * Get the page from backing store.
1760: */
1761: bsize = vp->v_mount->mnt_stat.f_iosize;
1762: auio.uio_iov = &aiov;
1763: auio.uio_iovcnt = 1;
1764: aiov.iov_base = 0;
1765: aiov.iov_len = MAXBSIZE;
1766: auio.uio_resid = MAXBSIZE;
1767: auio.uio_offset = trunc_page(off);
1768: auio.uio_segflg = UIO_NOCOPY;
1769: auio.uio_rw = UIO_READ;
1770: auio.uio_procp = p;
1771: vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p);
1772: error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16),
1773: p->p_ucred);
1774: VOP_UNLOCK(vp, 0, p);
1775: vm_page_flag_clear(pg, PG_ZERO);
1776: vm_page_io_finish(pg);
1777: if (error) {
1778: vm_page_unwire(pg, 0);
1779: /*
1780: * See if anyone else might know about this page.
1781: * If not and it is not valid, then free it.
1782: */
1783: if (pg->wire_count == 0 && pg->valid == 0 &&
1784: pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1785: pg->hold_count == 0)
1786: vm_page_free(pg);
1787: sbunlock(&so->so_snd);
1788: goto done;
1789: }
1790: } else {
1791: if ((pg->flags & PG_BUSY) || pg->busy) {
1792: s = splvm();
1793: if ((pg->flags & PG_BUSY) || pg->busy) {
1794: /*
1795: * Page is busy. Wait and retry.
1796: */
1797: vm_page_flag_set(pg, PG_WANTED);
1798: tsleep(pg, PVM, "sfpbsy", 0);
1799: splx(s);
1800: goto retry_lookup;
1801: }
1802: splx(s);
1803: }
1804: /*
1805: * Protect from having the page ripped out from beneath us.
1806: */
1807: vm_page_wire(pg);
1808: }
1809: /*
1810: * Allocate a kernel virtual page and insert the physical page
1811: * into it.
1812: */
1813: sf = sf_buf_alloc();
1814: sf->m = pg;
1815: pmap_qenter(sf->kva, &pg, 1);
1816: /*
1817: * Get an mbuf header and set it up as having external storage.
1818: */
1819: MGETHDR(m, M_WAIT, MT_DATA);
1820: m->m_ext.ext_free = sf_buf_free;
1821: m->m_ext.ext_ref = sf_buf_ref;
1822: m->m_ext.ext_buf = (void *)sf->kva;
1823: m->m_ext.ext_size = PAGE_SIZE;
1824: m->m_data = (char *) sf->kva + pgoff;
1825: m->m_flags |= M_EXT;
1826: m->m_pkthdr.len = m->m_len = xfsize;
1827: /*
1828: * Add the buffer to the socket buffer chain.
1829: */
1830: s = splnet();
1831: retry_space:
1832: /*
1833: * Make sure that the socket is still able to take more data.
1834: * CANTSENDMORE being true usually means that the connection
1835: * was closed. so_error is true when an error was sensed after
1836: * a previous send.
1837: * The state is checked after the page mapping and buffer
1838: * allocation above since those operations may block and make
1839: * any socket checks stale. From this point forward, nothing
1840: * blocks before the pru_send (or more accurately, any blocking
1841: * results in a loop back to here to re-check).
1842: */
1843: if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1844: if (so->so_state & SS_CANTSENDMORE) {
1845: error = EPIPE;
1846: } else {
1847: error = so->so_error;
1848: so->so_error = 0;
1849: }
1850: m_freem(m);
1851: sbunlock(&so->so_snd);
1852: splx(s);
1853: goto done;
1854: }
1855: /*
1856: * Wait for socket space to become available. We do this just
1857: * after checking the connection state above in order to avoid
1858: * a race condition with sbwait().
1859: */
1860: if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
1861: if (so->so_state & SS_NBIO) {
1862: m_freem(m);
1863: sbunlock(&so->so_snd);
1864: splx(s);
1865: error = EAGAIN;
1866: goto done;
1867: }
1868: error = sbwait(&so->so_snd);
1869: /*
1870: * An error from sbwait usually indicates that we've
1871: * been interrupted by a signal. If we've sent anything
1872: * then return bytes sent, otherwise return the error.
1873: */
1874: if (error) {
1875: m_freem(m);
1876: sbunlock(&so->so_snd);
1877: splx(s);
1878: goto done;
1879: }
1880: goto retry_space;
1881: }
1882: error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, p);
1883: splx(s);
1884: if (error) {
1885: sbunlock(&so->so_snd);
1886: goto done;
1887: }
1888: }
1889: sbunlock(&so->so_snd);
1890:
1891: /*
1892: * Send trailers. Wimp out and use writev(2).
1893: */
1894: if (uap->hdtr != NULL && hdtr.trailers != NULL) {
1895: nuap.fd = uap->s;
1896: nuap.iovp = hdtr.trailers;
1897: nuap.iovcnt = hdtr.trl_cnt;
1898: error = writev(p, &nuap);
1899: if (error)
1900: goto done;
1901: sbytes += p->p_retval[0];
1902: }
1903:
1904: done:
1905: if (uap->sbytes != NULL) {
1906: copyout(&sbytes, uap->sbytes, sizeof(off_t));
1907: }
1908: return (error);
1909: }
1910:
1911: #endif
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.