|
|
1.1 root 1: /*
2: * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3: *
4: * @APPLE_LICENSE_HEADER_START@
5: *
6: * The contents of this file constitute Original Code as defined in and
7: * are subject to the Apple Public Source License Version 1.1 (the
8: * "License"). You may not use this file except in compliance with the
9: * License. Please obtain a copy of the License at
10: * http://www.apple.com/publicsource and read it before using this file.
11: *
12: * This Original Code and all software distributed under the License are
13: * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14: * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15: * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16: * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17: * License for the specific language governing rights and limitations
18: * under the License.
19: *
20: * @APPLE_LICENSE_HEADER_END@
21: */
22: /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
23: /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
24: /*
25: * Copyright (c) 1982, 1986, 1988, 1990, 1993
26: * The Regents of the University of California. All rights reserved.
27: *
28: * Redistribution and use in source and binary forms, with or without
29: * modification, are permitted provided that the following conditions
30: * are met:
31: * 1. Redistributions of source code must retain the above copyright
32: * notice, this list of conditions and the following disclaimer.
33: * 2. Redistributions in binary form must reproduce the above copyright
34: * notice, this list of conditions and the following disclaimer in the
35: * documentation and/or other materials provided with the distribution.
36: * 3. All advertising materials mentioning features or use of this software
37: * must display the following acknowledgement:
38: * This product includes software developed by the University of
39: * California, Berkeley and its contributors.
40: * 4. Neither the name of the University nor the names of its contributors
41: * may be used to endorse or promote products derived from this software
42: * without specific prior written permission.
43: *
44: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54: * SUCH DAMAGE.
55: *
56: * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
57: */
58:
59: #include <sys/param.h>
60: #include <sys/systm.h>
61: #include <sys/domain.h>
62: #include <sys/kernel.h>
63: #include <sys/proc.h>
64: #include <sys/malloc.h>
65: #include <sys/mbuf.h>
66: #include <sys/protosw.h>
67: #include <sys/stat.h>
68: #include <sys/socket.h>
69: #include <sys/socketvar.h>
70: #include <sys/signalvar.h>
71: #include <sys/sysctl.h>
72: #include <sys/ev.h>
73:
74: /*
75: * Primitive routines for operating on sockets and socket buffers
76: */
77:
78: u_long sb_max = SB_MAX; /* XXX should be static */
79:
80: static u_long sb_efficiency = 8; /* parameter for sbreserve() */
81:
82: char netcon[] = "netcon";
83:
84: /*
85: * Procedures to manipulate state flags of socket
86: * and do appropriate wakeups. Normal sequence from the
87: * active (originating) side is that soisconnecting() is
88: * called during processing of connect() call,
89: * resulting in an eventual call to soisconnected() if/when the
90: * connection is established. When the connection is torn down
91: * soisdisconnecting() is called during processing of disconnect() call,
92: * and soisdisconnected() is called when the connection to the peer
93: * is totally severed. The semantics of these routines are such that
94: * connectionless protocols can call soisconnected() and soisdisconnected()
95: * only, bypassing the in-progress calls when setting up a ``connection''
96: * takes no time.
97: *
98: * From the passive side, a socket is created with
99: * two queues of sockets: so_q0 for connections in progress
100: * and so_q for connections already made and awaiting user acceptance.
101: * As a protocol is preparing incoming connections, it creates a socket
102: * structure queued on so_q0 by calling sonewconn(). When the connection
103: * is established, soisconnected() is called, and transfers the
104: * socket structure to so_q, making it available to accept().
105: *
106: * If a socket is closed with sockets on either
107: * so_q0 or so_q, these sockets are dropped.
108: *
109: * If higher level protocols are implemented in
110: * the kernel, the wakeups done here will sometimes
111: * cause software-interrupt process scheduling.
112: */
113:
114: void
115: soisconnecting(so)
116: register struct socket *so;
117: {
118:
119: so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
120: so->so_state |= SS_ISCONNECTING;
121: }
122:
123: void
124: soisconnected(so)
125: register struct socket *so;
126: { register struct kextcb *kp;
127: register struct socket *head = so->so_head;
128:
129: kp = sotokextcb(so);
130: while (kp)
131: { if (kp->e_soif && kp->e_soif->sf_soisconnected)
132: { if ((*kp->e_soif->sf_soisconnected)(so, kp))
133: return;
134: }
135: kp = kp->e_next;
136: }
137:
138: so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
139: so->so_state |= SS_ISCONNECTED;
140: if (head && (so->so_state & SS_INCOMP)) {
141: postevent(head,0,EV_RCONN);
142: TAILQ_REMOVE(&head->so_incomp, so, so_list);
143: head->so_incqlen--;
144: so->so_state &= ~SS_INCOMP;
145: TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
146: so->so_state |= SS_COMP;
147: sorwakeup(head);
148: wakeup((caddr_t)&head->so_timeo);
149: } else {
150: postevent(so,0,EV_WCONN);
151: wakeup((caddr_t)&so->so_timeo);
152: sorwakeup(so);
153: sowwakeup(so);
154: }
155: }
156:
157: void
158: soisdisconnecting(so)
159: register struct socket *so;
160: { register struct kextcb *kp;
161:
162: kp = sotokextcb(so);
163: while (kp)
164: { if (kp->e_soif && kp->e_soif->sf_soisdisconnecting)
165: { if ((*kp->e_soif->sf_soisdisconnecting)(so, kp))
166: return;
167: }
168: kp = kp->e_next;
169: }
170:
171: so->so_state &= ~SS_ISCONNECTING;
172: so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
173: wakeup((caddr_t)&so->so_timeo);
174: sowwakeup(so);
175: sorwakeup(so);
176: }
177:
178: void
179: soisdisconnected(so)
180: register struct socket *so;
181: { register struct kextcb *kp;
182:
183: kp = sotokextcb(so);
184: while (kp)
185: { if (kp->e_soif && kp->e_soif->sf_soisdisconnected)
186: { if ((*kp->e_soif->sf_soisdisconnected)(so, kp))
187: return;
188: }
189: kp = kp->e_next;
190: }
191:
192: so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
193: so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
194: wakeup((caddr_t)&so->so_timeo);
195: sowwakeup(so);
196: sorwakeup(so);
197: }
198:
199: /*
200: * Return a random connection that hasn't been serviced yet and
201: * is eligible for discard. There is a one in qlen chance that
202: * we will return a null, saying that there are no dropable
203: * requests. In this case, the protocol specific code should drop
204: * the new request. This insures fairness.
205: *
206: * This may be used in conjunction with protocol specific queue
207: * congestion routines.
208: */
209: struct socket *
210: sodropablereq(head)
211: register struct socket *head;
212: {
213: register struct socket *so;
214: unsigned int i, j, qlen;
215: static int rnd;
216: static struct timeval old_runtime;
217: static unsigned int cur_cnt, old_cnt;
218: struct timeval tv;
219:
220: microtime(&tv);
221: if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) {
222: old_runtime = tv;
223: old_cnt = cur_cnt / i;
224: cur_cnt = 0;
225: }
226:
227: so = TAILQ_FIRST(&head->so_incomp);
228: if (!so)
229: return (so);
230:
231: qlen = head->so_incqlen;
232: if (++cur_cnt > qlen || old_cnt > qlen) {
233: rnd = (314159 * rnd + 66329) & 0xffff;
234: j = ((qlen + 1) * rnd) >> 16;
235:
236: while (j-- && so)
237: so = TAILQ_NEXT(so, so_list);
238: }
239:
240: return (so);
241: }
242:
243: /*
244: * When an attempt at a new connection is noted on a socket
245: * which accepts connections, sonewconn is called. If the
246: * connection is possible (subject to space constraints, etc.)
247: * then we allocate a new structure, propoerly linked into the
248: * data structure of the original socket, and return this.
249: * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
250: */
251: struct socket *
252: sonewconn(head, connstatus)
253: register struct socket *head;
254: int connstatus;
255: { int error = 0;
256: register struct socket *so;
257:
258: if (head->so_qlen > 3 * head->so_qlimit / 2)
259: return ((struct socket *)0);
260: so = soalloc(0);
261: if (so == NULL)
262: return ((struct socket *)0);
263: so->so_head = head;
264: so->so_type = head->so_type;
265: so->so_options = head->so_options &~ SO_ACCEPTCONN;
266: so->so_linger = head->so_linger;
267: so->so_state = head->so_state | SS_NOFDREF;
268: so->so_proto = head->so_proto;
269: so->so_timeo = head->so_timeo;
270: so->so_pgid = head->so_pgid;
271: so->so_uid = head->so_uid;
272: so->so_rcv.sb_flags |= SB_RECV; /* XXX */
273: (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
274:
275: if (so->so_proto->pr_sfilter.tqh_first)
276: error = sfilter_init(so);
277: if (error == 0 && (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
278: sfilter_term(so);
279: sodealloc(so);
280: return ((struct socket *)0);
281: }
282:
283: if (connstatus) {
284: TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
285: so->so_state |= SS_COMP;
286: } else {
287: TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
288: so->so_state |= SS_INCOMP;
289: head->so_incqlen++;
290: }
291: head->so_qlen++;
292: if (connstatus) {
293: sorwakeup(head);
294: wakeup((caddr_t)&head->so_timeo);
295: so->so_state |= connstatus;
296: }
297: so->so_rcv.sb_so = so->so_snd.sb_so = so;
298: TAILQ_INIT(&so->so_evlist);
299: return (so);
300: }
301:
302: /*
303: * Socantsendmore indicates that no more data will be sent on the
304: * socket; it would normally be applied to a socket when the user
305: * informs the system that no more data is to be sent, by the protocol
306: * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
307: * will be received, and will normally be applied to the socket by a
308: * protocol when it detects that the peer will send no more data.
309: * Data queued for reading in the socket may yet be read.
310: */
311:
312: void
313: socantsendmore(so)
314: struct socket *so;
315: { register struct kextcb *kp;
316:
317: kp = sotokextcb(so);
318: while (kp)
319: { if (kp->e_soif && kp->e_soif->sf_socantsendmore)
320: { if ((*kp->e_soif->sf_socantsendmore)(so, kp))
321: return;
322: }
323: kp = kp->e_next;
324: }
325:
326:
327: so->so_state |= SS_CANTSENDMORE;
328: sowwakeup(so);
329: }
330:
331: void
332: socantrcvmore(so)
333: struct socket *so;
334: { register struct kextcb *kp;
335:
336: kp = sotokextcb(so);
337: while (kp)
338: { if (kp->e_soif && kp->e_soif->sf_socantrcvmore)
339: { if ((*kp->e_soif->sf_socantrcvmore)(so, kp))
340: return;
341: }
342: kp = kp->e_next;
343: }
344:
345:
346: so->so_state |= SS_CANTRCVMORE;
347: sorwakeup(so);
348: }
349:
350: /*
351: * Wait for data to arrive at/drain from a socket buffer.
352: */
353: int
354: sbwait(sb)
355: struct sockbuf *sb;
356: {
357:
358: sb->sb_flags |= SB_WAIT;
359: return (tsleep((caddr_t)&sb->sb_cc,
360: (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
361: sb->sb_timeo));
362: }
363:
364: /*
365: * Lock a sockbuf already known to be locked;
366: * return any error returned from sleep (EINTR).
367: */
368: int
369: sb_lock(sb)
370: register struct sockbuf *sb;
371: {
372: int error;
373:
374: while (sb->sb_flags & SB_LOCK) {
375: sb->sb_flags |= SB_WANT;
376: error = tsleep((caddr_t)&sb->sb_flags,
377: (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH,
378: "sblock", 0);
379: if (error)
380: return (error);
381: }
382: sb->sb_flags |= SB_LOCK;
383: return (0);
384: }
385:
386: /*
387: * Wakeup processes waiting on a socket buffer.
388: * Do asynchronous notification via SIGIO
389: * if the socket has the SS_ASYNC flag set.
390: */
391: void
392: sowakeup(so, sb)
393: register struct socket *so;
394: register struct sockbuf *sb;
395: {
396: struct proc *p = current_proc();
397:
398:
399: selwakeup(&sb->sb_sel);
400: sb->sb_flags &= ~SB_SEL;
401: if (sb->sb_flags & SB_WAIT) {
402: sb->sb_flags &= ~SB_WAIT;
403: wakeup((caddr_t)&sb->sb_cc);
404: }
405: if (so->so_state & SS_ASYNC) {
406: if (so->so_pgid < 0)
407: gsignal(-so->so_pgid, SIGIO);
408: else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
409: psignal(p, SIGIO);
410: }
411:
412: if (sb->sb_flags & SB_UPCALL)
413: (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
414: }
415:
416: /*
417: * Socket buffer (struct sockbuf) utility routines.
418: *
419: * Each socket contains two socket buffers: one for sending data and
420: * one for receiving data. Each buffer contains a queue of mbufs,
421: * information about the number of mbufs and amount of data in the
422: * queue, and other fields allowing select() statements and notification
423: * on data availability to be implemented.
424: *
425: * Data stored in a socket buffer is maintained as a list of records.
426: * Each record is a list of mbufs chained together with the m_next
427: * field. Records are chained together with the m_nextpkt field. The upper
428: * level routine soreceive() expects the following conventions to be
429: * observed when placing information in the receive buffer:
430: *
431: * 1. If the protocol requires each message be preceded by the sender's
432: * name, then a record containing that name must be present before
433: * any associated data (mbuf's must be of type MT_SONAME).
434: * 2. If the protocol supports the exchange of ``access rights'' (really
435: * just additional data associated with the message), and there are
436: * ``rights'' to be received, then a record containing this data
437: * should be present (mbuf's must be of type MT_RIGHTS).
438: * 3. If a name or rights record exists, then it must be followed by
439: * a data record, perhaps of zero length.
440: *
441: * Before using a new socket structure it is first necessary to reserve
442: * buffer space to the socket, by calling sbreserve(). This should commit
443: * some of the available buffer space in the system buffer pool for the
444: * socket (currently, it does nothing but enforce limits). The space
445: * should be released by calling sbrelease() when the socket is destroyed.
446: */
447:
448: int
449: soreserve(so, sndcc, rcvcc)
450: register struct socket *so;
451: u_long sndcc, rcvcc;
452: {
453:
454: if (sbreserve(&so->so_snd, sndcc) == 0)
455: goto bad;
456: if (sbreserve(&so->so_rcv, rcvcc) == 0)
457: goto bad2;
458: if (so->so_rcv.sb_lowat == 0)
459: so->so_rcv.sb_lowat = 1;
460: if (so->so_snd.sb_lowat == 0)
461: so->so_snd.sb_lowat = MCLBYTES;
462: if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
463: so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
464: return (0);
465: bad2:
466: sbrelease(&so->so_snd);
467: bad:
468: return (ENOBUFS);
469: }
470:
471: /*
472: * Allot mbufs to a sockbuf.
473: * Attempt to scale mbmax so that mbcnt doesn't become limiting
474: * if buffering efficiency is near the normal case.
475: */
476: int
477: sbreserve(sb, cc)
478: struct sockbuf *sb;
479: u_long cc;
480: {
481: if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES))
482: return (0);
483: sb->sb_hiwat = cc;
484: sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
485: if (sb->sb_lowat > sb->sb_hiwat)
486: sb->sb_lowat = sb->sb_hiwat;
487: return (1);
488: }
489:
490: /*
491: * Free mbufs held by a socket, and reserved mbuf space.
492: */
493: void
494: sbrelease(sb)
495: struct sockbuf *sb;
496: {
497:
498: sbflush(sb);
499: sb->sb_hiwat = sb->sb_mbmax = 0;
500:
501: {
502: int oldpri = splimp();
503: selthreadclear(&sb->sb_sel);
504: splx(oldpri);
505: }
506: }
507:
508: /*
509: * Routines to add and remove
510: * data from an mbuf queue.
511: *
512: * The routines sbappend() or sbappendrecord() are normally called to
513: * append new mbufs to a socket buffer, after checking that adequate
514: * space is available, comparing the function sbspace() with the amount
515: * of data to be added. sbappendrecord() differs from sbappend() in
516: * that data supplied is treated as the beginning of a new record.
517: * To place a sender's address, optional access rights, and data in a
518: * socket receive buffer, sbappendaddr() should be used. To place
519: * access rights and data in a socket receive buffer, sbappendrights()
520: * should be used. In either case, the new data begins a new record.
521: * Note that unlike sbappend() and sbappendrecord(), these routines check
522: * for the caller that there will be enough space to store the data.
523: * Each fails if there is not enough space, or if it cannot find mbufs
524: * to store additional information in.
525: *
526: * Reliable protocols may use the socket send buffer to hold data
527: * awaiting acknowledgement. Data is normally copied from a socket
528: * send buffer in a protocol with m_copy for output to a peer,
529: * and then removing the data from the socket buffer with sbdrop()
530: * or sbdroprecord() when the data is acknowledged by the peer.
531: */
532:
533: /*
534: * Append mbuf chain m to the last record in the
535: * socket buffer sb. The additional space associated
536: * the mbuf chain is recorded in sb. Empty mbufs are
537: * discarded and mbufs are compacted where possible.
538: */
539: void
540: sbappend(sb, m)
541: struct sockbuf *sb;
542: struct mbuf *m;
543: { register struct kextcb *kp;
544: register struct mbuf *n;
545:
546: if (m == 0)
547: return;
548: kp = sotokextcb(sbtoso(sb));
549: while (kp)
550: { if (kp->e_sout && kp->e_sout->su_sbappend)
551: { if ((*kp->e_sout->su_sbappend)(sb, m, kp))
552: return;
553: }
554: kp = kp->e_next;
555: }
556:
557: if (n = sb->sb_mb) {
558: while (n->m_nextpkt)
559: n = n->m_nextpkt;
560: do {
561: if (n->m_flags & M_EOR) {
562: sbappendrecord(sb, m); /* XXXXXX!!!! */
563: return;
564: }
565: } while (n->m_next && (n = n->m_next));
566: }
567: sbcompress(sb, m, n);
568: }
569:
570: #ifdef SOCKBUF_DEBUG
571: void
572: sbcheck(sb)
573: register struct sockbuf *sb;
574: {
575: register struct mbuf *m;
576: register struct mbuf *n = 0;
577: register u_long len = 0, mbcnt = 0;
578:
579: for (m = sb->sb_mb; m; m = n) {
580: n = m->m_nextpkt;
581: for (; m; m = m->m_next) {
582: len += m->m_len;
583: mbcnt += MSIZE;
584: if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
585: mbcnt += m->m_ext.ext_size;
586: if (m->m_nextpkt)
587: panic("sbcheck nextpkt");
588: }
589: if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
590: printf("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
591: mbcnt, sb->sb_mbcnt);
592: panic("sbcheck");
593: }
594: }
595: #endif
596:
597: /*
598: * As above, except the mbuf chain
599: * begins a new record.
600: */
601: void
602: sbappendrecord(sb, m0)
603: register struct sockbuf *sb;
604: register struct mbuf *m0;
605: {
606: register struct mbuf *m;
607:
608: if (m0 == 0)
609: return;
610: m = sb->sb_mb;
611: if (m)
612: while (m->m_nextpkt)
613: m = m->m_nextpkt;
614: /*
615: * Put the first mbuf on the queue.
616: * Note this permits zero length records.
617: */
618: sballoc(sb, m0);
619: if (m)
620: m->m_nextpkt = m0;
621: else
622: sb->sb_mb = m0;
623: m = m0->m_next;
624: m0->m_next = 0;
625: if (m && (m0->m_flags & M_EOR)) {
626: m0->m_flags &= ~M_EOR;
627: m->m_flags |= M_EOR;
628: }
629: sbcompress(sb, m, m0);
630: }
631:
632: /*
633: * As above except that OOB data
634: * is inserted at the beginning of the sockbuf,
635: * but after any other OOB data.
636: */
637: void
638: sbinsertoob(sb, m0)
639: register struct sockbuf *sb;
640: register struct mbuf *m0;
641: {
642: register struct mbuf *m;
643: register struct mbuf **mp;
644:
645: if (m0 == 0)
646: return;
647: for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
648: m = *mp;
649: again:
650: switch (m->m_type) {
651:
652: case MT_OOBDATA:
653: continue; /* WANT next train */
654:
655: case MT_CONTROL:
656: m = m->m_next;
657: if (m)
658: goto again; /* inspect THIS train further */
659: }
660: break;
661: }
662: /*
663: * Put the first mbuf on the queue.
664: * Note this permits zero length records.
665: */
666: sballoc(sb, m0);
667: m0->m_nextpkt = *mp;
668: *mp = m0;
669: m = m0->m_next;
670: m0->m_next = 0;
671: if (m && (m0->m_flags & M_EOR)) {
672: m0->m_flags &= ~M_EOR;
673: m->m_flags |= M_EOR;
674: }
675: sbcompress(sb, m, m0);
676: }
677:
678: /*
679: * Append address and data, and optionally, control (ancillary) data
680: * to the receive queue of a socket. If present,
681: * m0 must include a packet header with total length.
682: * Returns 0 if no space in sockbuf or insufficient mbufs.
683: */
684: int
685: sbappendaddr(sb, asa, m0, control)
686: register struct sockbuf *sb;
687: struct sockaddr *asa;
688: struct mbuf *m0, *control;
689: {
690: register struct mbuf *m, *n;
691: int space = asa->sa_len;
692:
693: if (m0 && (m0->m_flags & M_PKTHDR) == 0)
694: panic("sbappendaddr");
695: if (m0)
696: space += m0->m_pkthdr.len;
697: for (n = control; n; n = n->m_next) {
698: space += n->m_len;
699: if (n->m_next == 0) /* keep pointer to last control buf */
700: break;
701: }
702: if (space > sbspace(sb))
703: return (0);
704: if (asa->sa_len > MLEN)
705: return (0);
706: MGET(m, M_DONTWAIT, MT_SONAME);
707: if (m == 0)
708: return (0);
709: m->m_len = asa->sa_len;
710: bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
711: if (n)
712: n->m_next = m0; /* concatenate data to control */
713: else
714: control = m0;
715: m->m_next = control;
716: for (n = m; n; n = n->m_next)
717: sballoc(sb, n);
718: n = sb->sb_mb;
719: if (n) {
720: while (n->m_nextpkt)
721: n = n->m_nextpkt;
722: n->m_nextpkt = m;
723: } else
724: sb->sb_mb = m;
725: postevent(0,sb,EV_RWBYTES);
726: return (1);
727: }
728:
729: int
730: sbappendcontrol(sb, m0, control)
731: struct sockbuf *sb;
732: struct mbuf *control, *m0;
733: {
734: register struct mbuf *m, *n;
735: int space = 0;
736:
737: if (control == 0)
738: panic("sbappendcontrol");
739: for (m = control; ; m = m->m_next) {
740: space += m->m_len;
741: if (m->m_next == 0)
742: break;
743: }
744: n = m; /* save pointer to last control buffer */
745: for (m = m0; m; m = m->m_next)
746: space += m->m_len;
747: if (space > sbspace(sb))
748: return (0);
749: n->m_next = m0; /* concatenate data to control */
750: for (m = control; m; m = m->m_next)
751: sballoc(sb, m);
752: n = sb->sb_mb;
753: if (n) {
754: while (n->m_nextpkt)
755: n = n->m_nextpkt;
756: n->m_nextpkt = control;
757: } else
758: sb->sb_mb = control;
759: postevent(0,sb,EV_RWBYTES);
760: return (1);
761: }
762:
763: /*
764: * Compress mbuf chain m into the socket
765: * buffer sb following mbuf n. If n
766: * is null, the buffer is presumed empty.
767: */
768: void
769: sbcompress(sb, m, n)
770: register struct sockbuf *sb;
771: register struct mbuf *m, *n;
772: {
773: register int eor = 0;
774: register struct mbuf *o;
775:
776: while (m) {
777: eor |= m->m_flags & M_EOR;
778: if (m->m_len == 0 &&
779: (eor == 0 ||
780: (((o = m->m_next) || (o = n)) &&
781: o->m_type == m->m_type))) {
782: m = m_free(m);
783: continue;
784: }
785: if (n && (n->m_flags & (M_EXT | M_EOR)) == 0 &&
786: (n->m_data + n->m_len + m->m_len) < &n->m_dat[MLEN] &&
787: n->m_type == m->m_type) {
788: bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
789: (unsigned)m->m_len);
790: n->m_len += m->m_len;
791: sb->sb_cc += m->m_len;
792: m = m_free(m);
793: continue;
794: }
795: if (n)
796: n->m_next = m;
797: else
798: sb->sb_mb = m;
799: sballoc(sb, m);
800: n = m;
801: m->m_flags &= ~M_EOR;
802: m = m->m_next;
803: n->m_next = 0;
804: }
805: if (eor) {
806: if (n)
807: n->m_flags |= eor;
808: else
809: printf("semi-panic: sbcompress\n");
810: }
811: postevent(0,sb, EV_RWBYTES);
812: }
813:
814: /*
815: * Free all mbufs in a sockbuf.
816: * Check that all resources are reclaimed.
817: */
818: void
819: sbflush(sb)
820: register struct sockbuf *sb;
821: {
822:
823: if (sb->sb_flags & SB_LOCK)
824: panic("sbflush: locked");
825: while (sb->sb_mbcnt && sb->sb_cc)
826: sbdrop(sb, (int)sb->sb_cc);
827: if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt)
828: panic("sbflush: cc %ld || mb %p || mbcnt %ld", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
829: postevent(0, sb, EV_RWBYTES);
830: }
831:
832: /*
833: * Drop data from (the front of) a sockbuf.
834: */
835: void
836: sbdrop(sb, len)
837: register struct sockbuf *sb;
838: register int len;
839: {
840: register struct mbuf *m, *mn;
841: struct mbuf *next;
842:
843: next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
844: while (len > 0) {
845: if (m == 0) {
846: if (next == 0)
847: panic("sbdrop");
848: m = next;
849: next = m->m_nextpkt;
850: continue;
851: }
852: if (m->m_len > len) {
853: m->m_len -= len;
854: m->m_data += len;
855: sb->sb_cc -= len;
856: break;
857: }
858: len -= m->m_len;
859: sbfree(sb, m);
860: MFREE(m, mn);
861: m = mn;
862: }
863: while (m && m->m_len == 0) {
864: sbfree(sb, m);
865: MFREE(m, mn);
866: m = mn;
867: }
868: if (m) {
869: sb->sb_mb = m;
870: m->m_nextpkt = next;
871: } else
872: sb->sb_mb = next;
873: postevent(0, sb, EV_RWBYTES);
874: }
875:
876: /*
877: * Drop a record off the front of a sockbuf
878: * and move the next record to the front.
879: */
880: void
881: sbdroprecord(sb)
882: register struct sockbuf *sb;
883: {
884: register struct mbuf *m, *mn;
885:
886: m = sb->sb_mb;
887: if (m) {
888: sb->sb_mb = m->m_nextpkt;
889: do {
890: sbfree(sb, m);
891: MFREE(m, mn);
892: } while (m = mn);
893: }
894: postevent(0, sb, EV_RWBYTES);
895: }
896:
897: /*
898: * Create a "control" mbuf containing the specified data
899: * with the specified type for presentation on a socket buffer.
900: */
901: struct mbuf *
902: sbcreatecontrol(p, size, type, level)
903: caddr_t p;
904: register int size;
905: int type, level;
906: {
907: register struct cmsghdr *cp;
908: struct mbuf *m;
909:
910: if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
911: return ((struct mbuf *) NULL);
912: cp = mtod(m, struct cmsghdr *);
913: /* XXX check size? */
914: (void)memcpy(CMSG_DATA(cp), p, size);
915: size += sizeof(*cp);
916: m->m_len = size;
917: cp->cmsg_len = size;
918: cp->cmsg_level = level;
919: cp->cmsg_type = type;
920: return (m);
921: }
922:
923: /*
924: * Some routines that return EOPNOTSUPP for entry points that are not
925: * supported by a protocol. Fill in as needed.
926: */
927: int
928: pru_abort_notsupp(struct socket *so)
929: {
930: return EOPNOTSUPP;
931: }
932:
933:
934: int
935: pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
936: {
937: return EOPNOTSUPP;
938: }
939:
940: int
941: pru_attach_notsupp(struct socket *so, int proto, struct proc *p)
942: {
943: return EOPNOTSUPP;
944: }
945:
946: int
947: pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
948: {
949: return EOPNOTSUPP;
950: }
951:
952: int
953: pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
954: {
955: return EOPNOTSUPP;
956: }
957:
958: int
959: pru_connect2_notsupp(struct socket *so1, struct socket *so2)
960: {
961: return EOPNOTSUPP;
962: }
963:
964: int
965: pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
966: struct ifnet *ifp, struct proc *p)
967: {
968: return EOPNOTSUPP;
969: }
970:
971: int
972: pru_detach_notsupp(struct socket *so)
973: {
974: return EOPNOTSUPP;
975: }
976:
977: int
978: pru_disconnect_notsupp(struct socket *so)
979: {
980: return EOPNOTSUPP;
981: }
982:
983: int
984: pru_listen_notsupp(struct socket *so, struct proc *p)
985: {
986: return EOPNOTSUPP;
987: }
988:
989: int
990: pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
991: {
992: return EOPNOTSUPP;
993: }
994:
995: int
996: pru_rcvd_notsupp(struct socket *so, int flags)
997: {
998: return EOPNOTSUPP;
999: }
1000:
1001: int
1002: pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
1003: {
1004: return EOPNOTSUPP;
1005: }
1006:
1007: int
1008: pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
1009: struct sockaddr *addr, struct mbuf *control,
1010: struct proc *p)
1011:
1012: {
1013: return EOPNOTSUPP;
1014: }
1015:
1016:
1017: /*
1018: * This isn't really a ``null'' operation, but it's the default one
1019: * and doesn't do anything destructive.
1020: */
1021: int
1022: pru_sense_null(struct socket *so, struct stat *sb)
1023: {
1024: sb->st_blksize = so->so_snd.sb_hiwat;
1025: return 0;
1026: }
1027:
1028:
1029: int pru_sosend_notsupp(struct socket *so, struct sockaddr *addr,
1030: struct uio *uio, struct mbuf *top,
1031: struct mbuf *control, int flags)
1032:
1033: {
1034: return EOPNOTSUPP;
1035: }
1036:
1037: int pru_soreceive_notsupp(struct socket *so,
1038: struct sockaddr **paddr,
1039: struct uio *uio, struct mbuf **mp0,
1040: struct mbuf **controlp, int *flagsp)
1041: {
1042: return EOPNOTSUPP;
1043: }
1044:
1045: int
1046:
1047: pru_shutdown_notsupp(struct socket *so)
1048: {
1049: return EOPNOTSUPP;
1050: }
1051:
1052: int
1053: pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
1054: {
1055: return EOPNOTSUPP;
1056: }
1057:
1058: int pru_sosend(struct socket *so, struct sockaddr *addr,
1059: struct uio *uio, struct mbuf *top,
1060: struct mbuf *control, int flags)
1061: {
1062: return EOPNOTSUPP;
1063: }
1064:
1065: int pru_soreceive(struct socket *so,
1066: struct sockaddr **paddr,
1067: struct uio *uio, struct mbuf **mp0,
1068: struct mbuf **controlp, int *flagsp)
1069: {
1070: return EOPNOTSUPP;
1071: }
1072:
1073:
1074: int pru_sopoll_notsupp(struct socket *so, int events,
1075: struct ucred *cred)
1076: {
1077: return EOPNOTSUPP;
1078: }
1079:
1080:
1081:
1082: /*
1083: * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
1084: */
1085: struct sockaddr *
1086: dup_sockaddr(sa, canwait)
1087: struct sockaddr *sa;
1088: int canwait;
1089: {
1090: struct sockaddr *sa2;
1091:
1092: MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME,
1093: canwait ? M_WAITOK : M_NOWAIT);
1094: if (sa2)
1095: bcopy(sa, sa2, sa->sa_len);
1096: return sa2;
1097: }
1098:
1099: /*
1100: * Create an external-format (``xsocket'') structure using the information
1101: * in the kernel-format socket structure pointed to by so. This is done
1102: * to reduce the spew of irrelevant information over this interface,
1103: * to isolate user code from changes in the kernel structure, and
1104: * potentially to provide information-hiding if we decide that
1105: * some of this information should be hidden from users.
1106: */
1107: void
1108: sotoxsocket(struct socket *so, struct xsocket *xso)
1109: {
1110: xso->xso_len = sizeof *xso;
1111: xso->xso_so = so;
1112: xso->so_type = so->so_type;
1113: xso->so_options = so->so_options;
1114: xso->so_linger = so->so_linger;
1115: xso->so_state = so->so_state;
1116: xso->so_pcb = so->so_pcb;
1117: xso->xso_protocol = so->so_proto->pr_protocol;
1118: xso->xso_family = so->so_proto->pr_domain->dom_family;
1119: xso->so_qlen = so->so_qlen;
1120: xso->so_incqlen = so->so_incqlen;
1121: xso->so_qlimit = so->so_qlimit;
1122: xso->so_timeo = so->so_timeo;
1123: xso->so_error = so->so_error;
1124: xso->so_pgid = so->so_pgid;
1125: xso->so_oobmark = so->so_oobmark;
1126: sbtoxsockbuf(&so->so_snd, &xso->so_snd);
1127: sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
1128: xso->so_uid = so->so_uid;
1129: }
1130:
1131: /*
1132: * This does the same for sockbufs. Note that the xsockbuf structure,
1133: * since it is always embedded in a socket, does not include a self
1134: * pointer nor a length. We make this entry point public in case
1135: * some other mechanism needs it.
1136: */
1137: void
1138: sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
1139: {
1140: xsb->sb_cc = sb->sb_cc;
1141: xsb->sb_hiwat = sb->sb_hiwat;
1142: xsb->sb_mbcnt = sb->sb_mbcnt;
1143: xsb->sb_mbmax = sb->sb_mbmax;
1144: xsb->sb_lowat = sb->sb_lowat;
1145: xsb->sb_flags = sb->sb_flags;
1146: xsb->sb_timeo = sb->sb_timeo;
1147: }
1148:
1149: /*
1150: * Here is the definition of some of the basic objects in the kern.ipc
1151: * branch of the MIB.
1152: */
1153:
1154:
1155: SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
1156:
1157: /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
1158: static int dummy;
1159: SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
1160:
1161: SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW, &sb_max, 0, "");
1162: SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD, &maxsockets, 0, "");
1163: SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
1164: &sb_efficiency, 0, "");
1165: SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, "");
1166:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.