Annotation of XNU/bsd/netinet/tcp_subr.c, revision 1.1

1.1     ! root        1: /*
        !             2:  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
        !             3:  *
        !             4:  * @APPLE_LICENSE_HEADER_START@
        !             5:  * 
        !             6:  * The contents of this file constitute Original Code as defined in and
        !             7:  * are subject to the Apple Public Source License Version 1.1 (the
        !             8:  * "License").  You may not use this file except in compliance with the
        !             9:  * License.  Please obtain a copy of the License at
        !            10:  * http://www.apple.com/publicsource and read it before using this file.
        !            11:  * 
        !            12:  * This Original Code and all software distributed under the License are
        !            13:  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
        !            14:  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
        !            15:  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
        !            16:  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
        !            17:  * License for the specific language governing rights and limitations
        !            18:  * under the License.
        !            19:  * 
        !            20:  * @APPLE_LICENSE_HEADER_END@
        !            21:  */
        !            22: /*
        !            23:  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
        !            24:  *     The Regents of the University of California.  All rights reserved.
        !            25:  *
        !            26:  * Redistribution and use in source and binary forms, with or without
        !            27:  * modification, are permitted provided that the following conditions
        !            28:  * are met:
        !            29:  * 1. Redistributions of source code must retain the above copyright
        !            30:  *    notice, this list of conditions and the following disclaimer.
        !            31:  * 2. Redistributions in binary form must reproduce the above copyright
        !            32:  *    notice, this list of conditions and the following disclaimer in the
        !            33:  *    documentation and/or other materials provided with the distribution.
        !            34:  * 3. All advertising materials mentioning features or use of this software
        !            35:  *    must display the following acknowledgement:
        !            36:  *     This product includes software developed by the University of
        !            37:  *     California, Berkeley and its contributors.
        !            38:  * 4. Neither the name of the University nor the names of its contributors
        !            39:  *    may be used to endorse or promote products derived from this software
        !            40:  *    without specific prior written permission.
        !            41:  *
        !            42:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
        !            43:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
        !            44:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
        !            45:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
        !            46:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
        !            47:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
        !            48:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
        !            49:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
        !            50:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
        !            51:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
        !            52:  * SUCH DAMAGE.
        !            53:  *
        !            54:  *     @(#)tcp_subr.c  8.2 (Berkeley) 5/24/95
        !            55:  */
        !            56: 
        !            57: #if ISFB31
        !            58: #include "opt_compat.h"
        !            59: #include "opt_tcpdebug.h"
        !            60: #endif
        !            61: 
        !            62: #include <sys/param.h>
        !            63: #include <sys/systm.h>
        !            64: #include <sys/kernel.h>
        !            65: #include <sys/sysctl.h>
        !            66: #include <sys/malloc.h>
        !            67: #include <sys/mbuf.h>
        !            68: #include <sys/socket.h>
        !            69: #include <sys/socketvar.h>
        !            70: #include <sys/protosw.h>
        !            71: 
        !            72: #if ISFB31
        !            73: #include <vm/vm_zone.h>
        !            74: #endif
        !            75: 
        !            76: #include <net/route.h>
        !            77: #include <net/if.h>
        !            78: 
        !            79: #define _IP_VHL
        !            80: #include <netinet/in.h>
        !            81: #include <netinet/in_systm.h>
        !            82: #include <netinet/ip.h>
        !            83: #include <netinet/in_pcb.h>
        !            84: #include <netinet/in_var.h>
        !            85: #include <netinet/ip_var.h>
        !            86: #include <netinet/tcp.h>
        !            87: #include <netinet/tcp_fsm.h>
        !            88: #include <netinet/tcp_seq.h>
        !            89: #include <netinet/tcp_timer.h>
        !            90: #include <netinet/tcp_var.h>
        !            91: #include <netinet/tcpip.h>
        !            92: #if TCPDEBUG
        !            93: #include <netinet/tcp_debug.h>
        !            94: #endif
        !            95: 
        !            96: int    tcp_mssdflt = TCP_MSS;
        !            97: SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt,
        !            98:        CTLFLAG_RW, &tcp_mssdflt , 0, "");
        !            99: 
        !           100: static int     tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
        !           101: SYSCTL_INT(_net_inet_tcp, TCPCTL_RTTDFLT, rttdflt,
        !           102:        CTLFLAG_RW, &tcp_rttdflt , 0, "");
        !           103: 
        !           104: static int     tcp_do_rfc1323 = 1;
        !           105: SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323,
        !           106:        CTLFLAG_RW, &tcp_do_rfc1323 , 0, "");
        !           107: 
        !           108: static int     tcp_do_rfc1644 = 0;
        !           109: SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1644, rfc1644,
        !           110:        CTLFLAG_RW, &tcp_do_rfc1644 , 0, "");
        !           111: 
        !           112: SYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD, &tcbinfo.ipi_count,
        !           113:           0, "Number of active PCBs");
        !           114: 
        !           115: static void    tcp_cleartaocache __P((void));
        !           116: static void    tcp_notify __P((struct inpcb *, int));
        !           117: 
        !           118: /*
        !           119:  * Target size of TCP PCB hash tables. Must be a power of two.
        !           120:  *
        !           121:  * Note that this can be overridden by the kernel environment
        !           122:  * variable net.inet.tcp.tcbhashsize
        !           123:  */
        !           124: #ifndef TCBHASHSIZE
        !           125: #define TCBHASHSIZE    4096
        !           126: #endif
        !           127: 
        !           128: /*
        !           129:  * This is the actual shape of what we allocate using the zone
        !           130:  * allocator.  Doing it this way allows us to protect both structures
        !           131:  * using the same generation count, and also eliminates the overhead
        !           132:  * of allocating tcpcbs separately.  By hiding the structure here,
        !           133:  * we avoid changing most of the rest of the code (although it needs
        !           134:  * to be changed, eventually, for greater efficiency).
        !           135:  */
        !           136: #define        ALIGNMENT       32
        !           137: #define        ALIGNM1         (ALIGNMENT - 1)
        !           138: struct inp_tp {
        !           139:        union {
        !           140:                struct  inpcb inp;
        !           141:                char    align[(sizeof(struct inpcb) + ALIGNM1) & ~ALIGNM1];
        !           142:        } inp_tp_u;
        !           143:        struct  tcpcb tcb;
        !           144: };
        !           145: #undef ALIGNMENT
        !           146: #undef ALIGNM1
        !           147: 
        !           148: static struct tcpcb dummy_tcb;
        !           149: 
        !           150: /*
        !           151:  * Tcp initialization
        !           152:  */
        !           153: void
        !           154: tcp_init()
        !           155: {
        !           156:        int hashsize;
        !           157:        vm_size_t       str_size;
        !           158: 
        !           159: 
        !           160:        tcp_iss = random();     /* wrong, but better than a constant */
        !           161:        tcp_ccgen = 1;
        !           162:        tcp_cleartaocache();
        !           163:        LIST_INIT(&tcb);
        !           164:        tcbinfo.listhead = &tcb;
        !           165:        if (!(getenv_int("net.inet.tcp.tcbhashsize", &hashsize)))
        !           166:                hashsize = TCBHASHSIZE;
        !           167:        if (!powerof2(hashsize)) {
        !           168:                printf("WARNING: TCB hash size not a power of 2\n");
        !           169:                hashsize = 512; /* safe default */
        !           170:        }
        !           171:        tcbinfo.hashbase = hashinit(hashsize, M_PCB, &tcbinfo.hashmask);
        !           172:        tcbinfo.porthashbase = hashinit(hashsize, M_PCB,
        !           173:                                        &tcbinfo.porthashmask);
        !           174: #if ISFB31
        !           175:        tcbinfo.ipi_zone = (void *) zinit("tcpcb", sizeof(struct inp_tp), maxsockets,
        !           176:                                 ZONE_INTERRUPT, 0);
        !           177: #else
        !           178:        str_size = (vm_size_t) sizeof(struct inp_tp);
        !           179:        tcbinfo.ipi_zone = (void *) zinit(str_size, 120000*str_size, 8192, "inpcb_zone");
        !           180: #endif
        !           181: 
        !           182: 
        !           183:        if (max_protohdr < sizeof(struct tcpiphdr))
        !           184:                max_protohdr = sizeof(struct tcpiphdr);
        !           185:        if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN)
        !           186:                panic("tcp_init");
        !           187: 
        !           188:        tcbinfo.last_pcb = 0;
        !           189:        dummy_tcb.t_state = TCP_NSTATES;
        !           190:        dummy_tcb.t_flags = 0;
        !           191:        tcbinfo.dummy_cb = (caddr_t) &dummy_tcb;
        !           192:        in_pcb_nat_init(&tcbinfo, AF_INET, IPPROTO_TCP, SOCK_STREAM);
        !           193: }
        !           194: 
        !           195: /*
        !           196:  * Create template to be used to send tcp packets on a connection.
        !           197:  * Call after host entry created, allocates an mbuf and fills
        !           198:  * in a skeletal tcp/ip header, minimizing the amount of work
        !           199:  * necessary when the connection is used.
        !           200:  */
        !           201: struct tcpiphdr *
        !           202: tcp_template(tp)
        !           203:        struct tcpcb *tp;
        !           204: {
        !           205:        register struct inpcb *inp = tp->t_inpcb;
        !           206:        register struct mbuf *m;
        !           207:        register struct tcpiphdr *n;
        !           208: 
        !           209:        if ((n = tp->t_template) == 0) {
        !           210:                m = m_get(M_DONTWAIT, MT_HEADER);
        !           211:                if (m == NULL)
        !           212:                        return (0);
        !           213:                m->m_len = sizeof (struct tcpiphdr);
        !           214:                n = mtod(m, struct tcpiphdr *);
        !           215:        }
        !           216:        bzero(n->ti_x1, sizeof(n->ti_x1));
        !           217:        n->ti_pr = IPPROTO_TCP;
        !           218:        n->ti_len = htons(sizeof (struct tcpiphdr) - sizeof (struct ip));
        !           219:        n->ti_src = inp->inp_laddr;
        !           220:        n->ti_dst = inp->inp_faddr;
        !           221:        n->ti_sport = inp->inp_lport;
        !           222:        n->ti_dport = inp->inp_fport;
        !           223:        n->ti_seq = 0;
        !           224:        n->ti_ack = 0;
        !           225:        n->ti_x2 = 0;
        !           226:        n->ti_off = 5;
        !           227:        n->ti_flags = 0;
        !           228:        n->ti_win = 0;
        !           229:        n->ti_sum = 0;
        !           230:        n->ti_urp = 0;
        !           231:        return (n);
        !           232: }
        !           233: 
        !           234: /*
        !           235:  * Send a single message to the TCP at address specified by
        !           236:  * the given TCP/IP header.  If m == 0, then we make a copy
        !           237:  * of the tcpiphdr at ti and send directly to the addressed host.
        !           238:  * This is used to force keep alive messages out using the TCP
        !           239:  * template for a connection tp->t_template.  If flags are given
        !           240:  * then we send a message back to the TCP which originated the
        !           241:  * segment ti, and discard the mbuf containing it and any other
        !           242:  * attached mbufs.
        !           243:  *
        !           244:  * In any case the ack and sequence number of the transmitted
        !           245:  * segment are as specified by the parameters.
        !           246:  *
        !           247:  * NOTE: If m != NULL, then ti must point to *inside* the mbuf.
        !           248:  */
        !           249: void
        !           250: tcp_respond(tp, ti, m, ack, seq, flags)
        !           251:        struct tcpcb *tp;
        !           252:        register struct tcpiphdr *ti;
        !           253:        register struct mbuf *m;
        !           254:        tcp_seq ack, seq;
        !           255:        int flags;
        !           256: {
        !           257:        register int tlen;
        !           258:        int win = 0;
        !           259:        struct route *ro = 0;
        !           260:        struct route sro;
        !           261: 
        !           262:        if (tp) {
        !           263:                if (!(flags & TH_RST))
        !           264:                        win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
        !           265:                ro = &tp->t_inpcb->inp_route;
        !           266:        } else {
        !           267:                ro = &sro;
        !           268:                bzero(ro, sizeof *ro);
        !           269:        }
        !           270:        if (m == 0) {
        !           271:                m = m_gethdr(M_DONTWAIT, MT_HEADER);
        !           272:                if (m == NULL)
        !           273:                        return;
        !           274: #if TCP_COMPAT_42
        !           275:                tlen = 1;
        !           276: #else
        !           277:                tlen = 0;
        !           278: #endif
        !           279:                m->m_data += max_linkhdr;
        !           280:                *mtod(m, struct tcpiphdr *) = *ti;
        !           281:                ti = mtod(m, struct tcpiphdr *);
        !           282:                flags = TH_ACK;
        !           283:        } else {
        !           284:                m_freem(m->m_next);
        !           285:                m->m_next = 0;
        !           286:                m->m_data = (caddr_t)ti;
        !           287:                m->m_len = sizeof (struct tcpiphdr);
        !           288:                tlen = 0;
        !           289: #define xchg(a,b,type) { type t; t=a; a=b; b=t; }
        !           290:                xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, n_long);
        !           291:                xchg(ti->ti_dport, ti->ti_sport, n_short);
        !           292: #undef xchg
        !           293:        }
        !           294:        ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + tlen));
        !           295:        tlen += sizeof (struct tcpiphdr);
        !           296:        m->m_len = tlen;
        !           297:        m->m_pkthdr.len = tlen;
        !           298:        m->m_pkthdr.rcvif = (struct ifnet *) 0;
        !           299:        bzero(ti->ti_x1, sizeof(ti->ti_x1));
        !           300:        ti->ti_seq = htonl(seq);
        !           301:        ti->ti_ack = htonl(ack);
        !           302:        ti->ti_x2 = 0;
        !           303:        ti->ti_off = sizeof (struct tcphdr) >> 2;
        !           304:        ti->ti_flags = flags;
        !           305:        if (tp)
        !           306:                ti->ti_win = htons((u_short) (win >> tp->rcv_scale));
        !           307:        else
        !           308:                ti->ti_win = htons((u_short)win);
        !           309:        ti->ti_urp = 0;
        !           310:        ti->ti_sum = 0;
        !           311:        ti->ti_sum = in_cksum(m, tlen);
        !           312:        ((struct ip *)ti)->ip_len = tlen;
        !           313:        ((struct ip *)ti)->ip_ttl = ip_defttl;
        !           314: #if TCPDEBUG
        !           315:        if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
        !           316:                tcp_trace(TA_OUTPUT, 0, tp, ti, 0);
        !           317: #endif
        !           318:        (void) ip_output(m, NULL, ro, 0, NULL);
        !           319:        if (ro == &sro && ro->ro_rt) {
        !           320:                RTFREE(ro->ro_rt);
        !           321:        }
        !           322: }
        !           323: 
        !           324: /*
        !           325:  * Create a new TCP control block, making an
        !           326:  * empty reassembly queue and hooking it to the argument
        !           327:  * protocol control block.  The `inp' parameter must have
        !           328:  * come from the zone allocator set up in tcp_init().
        !           329:  */
        !           330: struct tcpcb *
        !           331: tcp_newtcpcb(inp)
        !           332:        struct inpcb *inp;
        !           333: {
        !           334:        struct inp_tp *it;
        !           335:        register struct tcpcb *tp;
        !           336: 
        !           337:        it = (struct inp_tp *)inp;
        !           338:        tp = &it->tcb;
        !           339:        bzero((char *) tp, sizeof(struct tcpcb));
        !           340:        tp->t_segq = NULL;
        !           341:        tp->t_maxseg = tp->t_maxopd = tcp_mssdflt;
        !           342: 
        !           343:        if (tcp_do_rfc1323)
        !           344:                tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
        !           345:        if (tcp_do_rfc1644)
        !           346:                tp->t_flags |= TF_REQ_CC;
        !           347:        tp->t_inpcb = inp;      /* XXX */
        !           348:        /*
        !           349:         * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
        !           350:         * rtt estimate.  Set rttvar so that srtt + 4 * rttvar gives
        !           351:         * reasonable initial retransmit time.
        !           352:         */
        !           353:        tp->t_srtt = TCPTV_SRTTBASE;
        !           354:        tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
        !           355:        tp->t_rttmin = TCPTV_MIN;
        !           356:        tp->t_rxtcur = TCPTV_RTOBASE;
        !           357:        tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
        !           358:        tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
        !           359:        inp->inp_ip_ttl = ip_defttl;
        !           360:        inp->inp_ppcb = (caddr_t)tp;
        !           361:        return (tp);            /* XXX */
        !           362: }
        !           363: 
        !           364: /*
        !           365:  * Drop a TCP connection, reporting
        !           366:  * the specified error.  If connection is synchronized,
        !           367:  * then send a RST to peer.
        !           368:  */
        !           369: struct tcpcb *
        !           370: tcp_drop(tp, errno)
        !           371:        register struct tcpcb *tp;
        !           372:        int errno;
        !           373: {
        !           374:        struct socket *so = tp->t_inpcb->inp_socket;
        !           375: 
        !           376:        if (TCPS_HAVERCVDSYN(tp->t_state)) {
        !           377:                tp->t_state = TCPS_CLOSED;
        !           378:                (void) tcp_output(tp);
        !           379:                tcpstat.tcps_drops++;
        !           380:        } else
        !           381:                tcpstat.tcps_conndrops++;
        !           382:        if (errno == ETIMEDOUT && tp->t_softerror)
        !           383:                errno = tp->t_softerror;
        !           384:        so->so_error = errno;
        !           385:        return (tcp_close(tp));
        !           386: }
        !           387: 
        !           388: /*
        !           389:  * Close a TCP control block:
        !           390:  *     discard all space held by the tcp
        !           391:  *     discard internet protocol block
        !           392:  *     wake up any sleepers
        !           393:  */
        !           394: struct tcpcb *
        !           395: tcp_close(tp)
        !           396:        register struct tcpcb *tp;
        !           397: {
        !           398:        register struct mbuf *q;
        !           399:        register struct mbuf *nq;
        !           400:        struct inpcb *inp = tp->t_inpcb;
        !           401:        struct socket *so = inp->inp_socket;
        !           402:        register struct rtentry *rt;
        !           403:        int dosavessthresh;
        !           404: 
        !           405:        /*
        !           406:         * If we got enough samples through the srtt filter,
        !           407:         * save the rtt and rttvar in the routing entry.
        !           408:         * 'Enough' is arbitrarily defined as the 16 samples.
        !           409:         * 16 samples is enough for the srtt filter to converge
        !           410:         * to within 5% of the correct value; fewer samples and
        !           411:         * we could save a very bogus rtt.
        !           412:         *
        !           413:         * Don't update the default route's characteristics and don't
        !           414:         * update anything that the user "locked".
        !           415:         */
        !           416:        if (tp->t_rttupdated >= 16 &&
        !           417:            (rt = inp->inp_route.ro_rt) &&
        !           418:            ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr != INADDR_ANY) {
        !           419:                register u_long i = 0;
        !           420: 
        !           421:                if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
        !           422:                        i = tp->t_srtt *
        !           423:                            (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
        !           424:                        if (rt->rt_rmx.rmx_rtt && i)
        !           425:                                /*
        !           426:                                 * filter this update to half the old & half
        !           427:                                 * the new values, converting scale.
        !           428:                                 * See route.h and tcp_var.h for a
        !           429:                                 * description of the scaling constants.
        !           430:                                 */
        !           431:                                rt->rt_rmx.rmx_rtt =
        !           432:                                    (rt->rt_rmx.rmx_rtt + i) / 2;
        !           433:                        else
        !           434:                                rt->rt_rmx.rmx_rtt = i;
        !           435:                        tcpstat.tcps_cachedrtt++;
        !           436:                }
        !           437:                if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
        !           438:                        i = tp->t_rttvar *
        !           439:                            (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
        !           440:                        if (rt->rt_rmx.rmx_rttvar && i)
        !           441:                                rt->rt_rmx.rmx_rttvar =
        !           442:                                    (rt->rt_rmx.rmx_rttvar + i) / 2;
        !           443:                        else
        !           444:                                rt->rt_rmx.rmx_rttvar = i;
        !           445:                        tcpstat.tcps_cachedrttvar++;
        !           446:                }
        !           447:                /*
        !           448:                 * The old comment here said:
        !           449:                 * update the pipelimit (ssthresh) if it has been updated
        !           450:                 * already or if a pipesize was specified & the threshhold
        !           451:                 * got below half the pipesize.  I.e., wait for bad news
        !           452:                 * before we start updating, then update on both good
        !           453:                 * and bad news.
        !           454:                 *
        !           455:                 * But we want to save the ssthresh even if no pipesize is
        !           456:                 * specified explicitly in the route, because such
        !           457:                 * connections still have an implicit pipesize specified
        !           458:                 * by the global tcp_sendspace.  In the absence of a reliable
        !           459:                 * way to calculate the pipesize, it will have to do.
        !           460:                 */
        !           461:                i = tp->snd_ssthresh;
        !           462:                if (rt->rt_rmx.rmx_sendpipe != 0)
        !           463:                        dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2);
        !           464:                else
        !           465:                        dosavessthresh = (i < so->so_snd.sb_hiwat / 2);
        !           466:                if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
        !           467:                     i != 0 && rt->rt_rmx.rmx_ssthresh != 0)
        !           468:                    || dosavessthresh) {
        !           469:                        /*
        !           470:                         * convert the limit from user data bytes to
        !           471:                         * packets then to packet data bytes.
        !           472:                         */
        !           473:                        i = (i + tp->t_maxseg / 2) / tp->t_maxseg;
        !           474:                        if (i < 2)
        !           475:                                i = 2;
        !           476:                        i *= (u_long)(tp->t_maxseg + sizeof (struct tcpiphdr));
        !           477:                        if (rt->rt_rmx.rmx_ssthresh)
        !           478:                                rt->rt_rmx.rmx_ssthresh =
        !           479:                                    (rt->rt_rmx.rmx_ssthresh + i) / 2;
        !           480:                        else
        !           481:                                rt->rt_rmx.rmx_ssthresh = i;
        !           482:                        tcpstat.tcps_cachedssthresh++;
        !           483:                }
        !           484:        }
        !           485:        /* free the reassembly queue, if any */
        !           486:        for (q = tp->t_segq; q; q = nq) {
        !           487:                nq = q->m_nextpkt;
        !           488:                tp->t_segq = nq;
        !           489:                m_freem(q);
        !           490:        }
        !           491:        if (tp->t_template)
        !           492:                (void) m_free(dtom(tp->t_template));
        !           493:        inp->inp_ppcb = NULL;
        !           494:        soisdisconnected(so);
        !           495:        in_pcbdetach(inp);
        !           496:        tcpstat.tcps_closed++;
        !           497:        return ((struct tcpcb *)0);
        !           498: }
        !           499: 
        !           500: void
        !           501: tcp_drain()
        !           502: {
        !           503: 
        !           504: }
        !           505: 
        !           506: /*
        !           507:  * Notify a tcp user of an asynchronous error;
        !           508:  * store error as soft error, but wake up user
        !           509:  * (for now, won't do anything until can select for soft error).
        !           510:  */
        !           511: static void
        !           512: tcp_notify(inp, error)
        !           513:        struct inpcb *inp;
        !           514:        int error;
        !           515: {
        !           516:        register struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
        !           517:        register struct socket *so = inp->inp_socket;
        !           518: 
        !           519:        /*
        !           520:         * Ignore some errors if we are hooked up.
        !           521:         * If connection hasn't completed, has retransmitted several times,
        !           522:         * and receives a second error, give up now.  This is better
        !           523:         * than waiting a long time to establish a connection that
        !           524:         * can never complete.
        !           525:         */
        !           526:        if (tp->t_state == TCPS_ESTABLISHED &&
        !           527:             (error == EHOSTUNREACH || error == ENETUNREACH ||
        !           528:              error == EHOSTDOWN)) {
        !           529:                return;
        !           530:        } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
        !           531:            tp->t_softerror)
        !           532:                so->so_error = error;
        !           533:        else
        !           534:                tp->t_softerror = error;
        !           535:        wakeup((caddr_t) &so->so_timeo);
        !           536:        sorwakeup(so);
        !           537:        sowwakeup(so);
        !           538: }
        !           539: 
        !           540: 
        !           541: static int
        !           542: tcp_pcblist SYSCTL_HANDLER_ARGS
        !           543: {
        !           544:        int error, i, n, s;
        !           545:        struct inpcb *inp, **inp_list;
        !           546:        inp_gen_t gencnt;
        !           547:        struct xinpgen xig;
        !           548: 
        !           549:        /*
        !           550:         * The process of preparing the TCB list is too time-consuming and
        !           551:         * resource-intensive to repeat twice on every request.
        !           552:         */
        !           553:        if (req->oldptr == 0) {
        !           554:                n = tcbinfo.ipi_count;
        !           555:                req->oldidx = 2 * (sizeof xig)
        !           556:                        + (n + n/8) * sizeof(struct xtcpcb);
        !           557:                return 0;
        !           558:        }
        !           559: 
        !           560:        if (req->newptr != 0)
        !           561:                return EPERM;
        !           562: 
        !           563:        /*
        !           564:         * OK, now we're committed to doing something.
        !           565:         */
        !           566:        s = splnet();
        !           567:        gencnt = tcbinfo.ipi_gencnt;
        !           568:        n = tcbinfo.ipi_count;
        !           569:        splx(s);
        !           570: 
        !           571:        xig.xig_len = sizeof xig;
        !           572:        xig.xig_count = n;
        !           573:        xig.xig_gen = gencnt;
        !           574:        xig.xig_sogen = so_gencnt;
        !           575:        error = SYSCTL_OUT(req, &xig, sizeof xig);
        !           576:        if (error)
        !           577:                return error;
        !           578: 
        !           579:        inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK);
        !           580:        if (inp_list == 0)
        !           581:                return ENOMEM;
        !           582:        
        !           583:        s = splnet();
        !           584:        for (inp = tcbinfo.listhead->lh_first, i = 0; inp && i < n;
        !           585:             inp = inp->inp_list.le_next) {
        !           586:                if (inp->inp_gencnt <= gencnt)
        !           587:                        inp_list[i++] = inp;
        !           588:        }
        !           589:        splx(s);
        !           590:        n = i;
        !           591: 
        !           592:        error = 0;
        !           593:        for (i = 0; i < n; i++) {
        !           594:                inp = inp_list[i];
        !           595:                if (inp->inp_gencnt <= gencnt) {
        !           596:                        struct xtcpcb xt;
        !           597:                        xt.xt_len = sizeof xt;
        !           598:                        /* XXX should avoid extra copy */
        !           599:                        bcopy(inp, &xt.xt_inp, sizeof *inp);
        !           600:                        bcopy(inp->inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
        !           601:                        if (inp->inp_socket)
        !           602:                                sotoxsocket(inp->inp_socket, &xt.xt_socket);
        !           603:                        error = SYSCTL_OUT(req, &xt, sizeof xt);
        !           604:                }
        !           605:        }
        !           606:        if (!error) {
        !           607:                /*
        !           608:                 * Give the user an updated idea of our state.
        !           609:                 * If the generation differs from what we told
        !           610:                 * her before, she knows that something happened
        !           611:                 * while we were processing this request, and it
        !           612:                 * might be necessary to retry.
        !           613:                 */
        !           614:                s = splnet();
        !           615:                xig.xig_gen = tcbinfo.ipi_gencnt;
        !           616:                xig.xig_sogen = so_gencnt;
        !           617:                xig.xig_count = tcbinfo.ipi_count;
        !           618:                splx(s);
        !           619:                error = SYSCTL_OUT(req, &xig, sizeof xig);
        !           620:        }
        !           621:        FREE(inp_list, M_TEMP);
        !           622:        return error;
        !           623: }
        !           624: 
        !           625: 
        !           626: SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
        !           627:            tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
        !           628: 
        !           629: void
        !           630: tcp_ctlinput(cmd, sa, vip)
        !           631:        int cmd;
        !           632:        struct sockaddr *sa;
        !           633:        void *vip;
        !           634: {
        !           635:        register struct ip *ip = vip;
        !           636:        register struct tcphdr *th;
        !           637:        void (*notify) __P((struct inpcb *, int)) = tcp_notify;
        !           638: 
        !           639:        if (cmd == PRC_QUENCH)
        !           640:                notify = tcp_quench;
        !           641:        else if (cmd == PRC_MSGSIZE)
        !           642:                notify = tcp_mtudisc;
        !           643:        else if (!PRC_IS_REDIRECT(cmd) &&
        !           644:                 ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0))
        !           645:                return;
        !           646:        if (ip) {
        !           647:                th = (struct tcphdr *)((caddr_t)ip 
        !           648:                                       + (IP_VHL_HL(ip->ip_vhl) << 2));
        !           649:                in_pcbnotify(&tcb, sa, th->th_dport, ip->ip_src, th->th_sport,
        !           650:                        cmd, notify);
        !           651:        } else
        !           652:                in_pcbnotify(&tcb, sa, 0, zeroin_addr, 0, cmd, notify);
        !           653: }
        !           654: 
        !           655: /*
        !           656:  * When a source quench is received, close congestion window
        !           657:  * to one segment.  We will gradually open it again as we proceed.
        !           658:  */
        !           659: void
        !           660: tcp_quench(inp, errno)
        !           661:        struct inpcb *inp;
        !           662:        int errno;
        !           663: {
        !           664:        struct tcpcb *tp = intotcpcb(inp);
        !           665: 
        !           666:        if (tp)
        !           667:                tp->snd_cwnd = tp->t_maxseg;
        !           668: }
        !           669: 
        !           670: /*
        !           671:  * When `need fragmentation' ICMP is received, update our idea of the MSS
        !           672:  * based on the new value in the route.  Also nudge TCP to send something,
        !           673:  * since we know the packet we just sent was dropped.
        !           674:  * This duplicates some code in the tcp_mss() function in tcp_input.c.
        !           675:  */
        !           676: void
        !           677: tcp_mtudisc(inp, errno)
        !           678:        struct inpcb *inp;
        !           679:        int errno;
        !           680: {
        !           681:        struct tcpcb *tp = intotcpcb(inp);
        !           682:        struct rtentry *rt;
        !           683:        struct rmxp_tao *taop;
        !           684:        struct socket *so = inp->inp_socket;
        !           685:        int offered;
        !           686:        int mss;
        !           687: 
        !           688:        if (tp) {
        !           689:                rt = tcp_rtlookup(inp);
        !           690:                if (!rt || !rt->rt_rmx.rmx_mtu) {
        !           691:                        tp->t_maxopd = tp->t_maxseg = tcp_mssdflt;
        !           692:                        return;
        !           693:                }
        !           694:                taop = rmx_taop(rt->rt_rmx);
        !           695:                offered = taop->tao_mssopt;
        !           696:                mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr);
        !           697:                if (offered)
        !           698:                        mss = min(mss, offered);
        !           699:                /*
        !           700:                 * XXX - The above conditional probably violates the TCP
        !           701:                 * spec.  The problem is that, since we don't know the
        !           702:                 * other end's MSS, we are supposed to use a conservative
        !           703:                 * default.  But, if we do that, then MTU discovery will
        !           704:                 * never actually take place, because the conservative
        !           705:                 * default is much less than the MTUs typically seen
        !           706:                 * on the Internet today.  For the moment, we'll sweep
        !           707:                 * this under the carpet.
        !           708:                 *
        !           709:                 * The conservative default might not actually be a problem
        !           710:                 * if the only case this occurs is when sending an initial
        !           711:                 * SYN with options and data to a host we've never talked
        !           712:                 * to before.  Then, they will reply with an MSS value which
        !           713:                 * will get recorded and the new parameters should get
        !           714:                 * recomputed.  For Further Study.
        !           715:                 */
        !           716:                if (tp->t_maxopd <= mss)
        !           717:                        return;
        !           718:                tp->t_maxopd = mss;
        !           719: 
        !           720:                if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
        !           721:                    (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
        !           722:                        mss -= TCPOLEN_TSTAMP_APPA;
        !           723:                if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
        !           724:                    (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC)
        !           725:                        mss -= TCPOLEN_CC_APPA;
        !           726: #if    (MCLBYTES & (MCLBYTES - 1)) == 0
        !           727:                if (mss > MCLBYTES)
        !           728:                        mss &= ~(MCLBYTES-1);
        !           729: #else
        !           730:                if (mss > MCLBYTES)
        !           731:                        mss = mss / MCLBYTES * MCLBYTES;
        !           732: #endif
        !           733:                if (so->so_snd.sb_hiwat < mss)
        !           734:                        mss = so->so_snd.sb_hiwat;
        !           735: 
        !           736:                tp->t_maxseg = mss;
        !           737: 
        !           738:                tcpstat.tcps_mturesent++;
        !           739:                tp->t_rtt = 0;
        !           740:                tp->snd_nxt = tp->snd_una;
        !           741:                tcp_output(tp);
        !           742:        }
        !           743: }
        !           744: 
        !           745: /*
        !           746:  * Look-up the routing entry to the peer of this inpcb.  If no route
        !           747:  * is found and it cannot be allocated the return NULL.  This routine
        !           748:  * is called by TCP routines that access the rmx structure and by tcp_mss
        !           749:  * to get the interface MTU.
        !           750:  */
        !           751: struct rtentry *
        !           752: tcp_rtlookup(inp)
        !           753:        struct inpcb *inp;
        !           754: {
        !           755:        struct route *ro;
        !           756:        struct rtentry *rt;
        !           757: 
        !           758:        ro = &inp->inp_route;
        !           759:        rt = ro->ro_rt;
        !           760:        if (rt == NULL || !(rt->rt_flags & RTF_UP)) {
        !           761:                /* No route yet, so try to acquire one */
        !           762:                if (inp->inp_faddr.s_addr != INADDR_ANY) {
        !           763:                        ro->ro_dst.sa_family = AF_INET;
        !           764:                        ro->ro_dst.sa_len = sizeof(ro->ro_dst);
        !           765:                        ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
        !           766:                                inp->inp_faddr;
        !           767:                        rtalloc(ro);
        !           768:                        rt = ro->ro_rt;
        !           769:                }
        !           770:        }
        !           771:        return rt;
        !           772: }
        !           773: 
        !           774: /*
        !           775:  * Return a pointer to the cached information about the remote host.
        !           776:  * The cached information is stored in the protocol specific part of
        !           777:  * the route metrics.
        !           778:  */
        !           779: struct rmxp_tao *
        !           780: tcp_gettaocache(inp)
        !           781:        struct inpcb *inp;
        !           782: {
        !           783:        struct rtentry *rt = tcp_rtlookup(inp);
        !           784: 
        !           785:        /* Make sure this is a host route and is up. */
        !           786:        if (rt == NULL ||
        !           787:            (rt->rt_flags & (RTF_UP|RTF_HOST)) != (RTF_UP|RTF_HOST))
        !           788:                return NULL;
        !           789: 
        !           790:        return rmx_taop(rt->rt_rmx);
        !           791: }
        !           792: 
        !           793: /*
        !           794:  * Clear all the TAO cache entries, called from tcp_init.
        !           795:  *
        !           796:  * XXX
        !           797:  * This routine is just an empty one, because we assume that the routing
        !           798:  * routing tables are initialized at the same time when TCP, so there is
        !           799:  * nothing in the cache left over.
        !           800:  */
        !           801: static void
        !           802: tcp_cleartaocache()
        !           803: {
        !           804: }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.