Annotation of XNU/bsd/netinet/tcp_input.c, revision 1.1.1.1

1.1       root        1: /*
                      2:  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
                      3:  *
                      4:  * @APPLE_LICENSE_HEADER_START@
                      5:  * 
                      6:  * The contents of this file constitute Original Code as defined in and
                      7:  * are subject to the Apple Public Source License Version 1.1 (the
                      8:  * "License").  You may not use this file except in compliance with the
                      9:  * License.  Please obtain a copy of the License at
                     10:  * http://www.apple.com/publicsource and read it before using this file.
                     11:  * 
                     12:  * This Original Code and all software distributed under the License are
                     13:  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
                     14:  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
                     15:  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
                     16:  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
                     17:  * License for the specific language governing rights and limitations
                     18:  * under the License.
                     19:  * 
                     20:  * @APPLE_LICENSE_HEADER_END@
                     21:  */
                     22: /*
                     23:  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
                     24:  *     The Regents of the University of California.  All rights reserved.
                     25:  *
                     26:  * Redistribution and use in source and binary forms, with or without
                     27:  * modification, are permitted provided that the following conditions
                     28:  * are met:
                     29:  * 1. Redistributions of source code must retain the above copyright
                     30:  *    notice, this list of conditions and the following disclaimer.
                     31:  * 2. Redistributions in binary form must reproduce the above copyright
                     32:  *    notice, this list of conditions and the following disclaimer in the
                     33:  *    documentation and/or other materials provided with the distribution.
                     34:  * 3. All advertising materials mentioning features or use of this software
                     35:  *    must display the following acknowledgement:
                     36:  *     This product includes software developed by the University of
                     37:  *     California, Berkeley and its contributors.
                     38:  * 4. Neither the name of the University nor the names of its contributors
                     39:  *    may be used to endorse or promote products derived from this software
                     40:  *    without specific prior written permission.
                     41:  *
                     42:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     43:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     44:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     45:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     46:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     47:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     48:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     49:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     50:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     51:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     52:  * SUCH DAMAGE.
                     53:  *
                     54:  *     @(#)tcp_input.c 8.12 (Berkeley) 5/24/95
                     55:  */
                     56: 
                     57: #if ISFB31
                     58: #include "opt_ipfw.h"          /* for ipfw_fwd         */
                     59: #include "opt_tcpdebug.h"
                     60: #endif
                     61: 
                     62: #include <sys/param.h>
                     63: #include <sys/systm.h>
                     64: #include <sys/kernel.h>
                     65: #include <sys/sysctl.h>
                     66: #include <sys/malloc.h>
                     67: #include <sys/mbuf.h>
                     68: #include <sys/proc.h>          /* for proc0 declaration */
                     69: #include <sys/protosw.h>
                     70: #include <sys/socket.h>
                     71: #include <sys/socketvar.h>
                     72: #include <sys/syslog.h>
                     73: 
                     74: #include <kern/cpu_number.h>   /* before tcp_seq.h, for tcp_random18() */
                     75: 
                     76: #include <net/if.h>
                     77: #include <net/route.h>
                     78: 
                     79: #include <netinet/in.h>
                     80: #include <netinet/in_systm.h>
                     81: #include <netinet/ip.h>
                     82: #include <netinet/ip_icmp.h>   /* for ICMP_BANDLIM             */
                     83: #include <netinet/in_pcb.h>
                     84: #include <netinet/ip_var.h>
                     85: #include <netinet/icmp_var.h>  /* for ICMP_BANDLIM             */
                     86: #include <netinet/tcp.h>
                     87: #include <netinet/tcp_fsm.h>
                     88: #include <netinet/tcp_seq.h>
                     89: #include <netinet/tcp_timer.h>
                     90: #include <netinet/tcp_var.h>
                     91: #include <netinet/tcpip.h>
                     92: #if TCPDEBUG
                     93: #include <netinet/tcp_debug.h>
                     94: static struct  tcpiphdr tcp_saveti;
                     95: #endif
                     96: 
                     97: static int     tcprexmtthresh = 3;
                     98: tcp_seq        tcp_iss;
                     99: tcp_cc tcp_ccgen;
                    100: 
                    101: struct tcpstat tcpstat;
                    102: SYSCTL_STRUCT(_net_inet_tcp, TCPCTL_STATS, stats,
                    103:        CTLFLAG_RD, &tcpstat , tcpstat, "");
                    104: 
                    105: static int log_in_vain = 0;
                    106: SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW, 
                    107:        &log_in_vain, 0, "");
                    108: 
                    109: int tcp_delack_enabled = 1;
                    110: SYSCTL_INT(_net_inet_tcp, OID_AUTO, delayed_ack, CTLFLAG_RW, 
                    111:        &tcp_delack_enabled, 0, "");
                    112: 
                    113: u_long tcp_now;
                    114: struct inpcbhead tcb;
                    115: struct inpcbinfo tcbinfo;
                    116: 
                    117: static void     tcp_dooptions __P((struct tcpcb *,
                    118:            u_char *, int, struct tcpiphdr *, struct tcpopt *));
                    119: static void     tcp_pulloutofband __P((struct socket *,
                    120:            struct tcpiphdr *, struct mbuf *));
                    121: static int      tcp_reass __P((struct tcpcb *, struct tcpiphdr *, struct mbuf *));
                    122: static void     tcp_xmit_timer __P((struct tcpcb *, int));
                    123: 
                    124: 
                    125: /*
                    126:  * Insert segment ti into reassembly queue of tcp with
                    127:  * control block tp.  Return TH_FIN if reassembly now includes
                    128:  * a segment with FIN.  The macro form does the common case inline
                    129:  * (segment is the next to be received on an established connection,
                    130:  * and the queue is empty), avoiding linkage into and removal
                    131:  * from the queue and repetition of various conversions.
                    132:  * Set DELACK for segments received in order, but ack immediately
                    133:  * when segments are out of order (so fast retransmit can work).
                    134:  */
                    135: #define        TCP_REASS(tp, ti, m, so, flags) { \
                    136:        if ((ti)->ti_seq == (tp)->rcv_nxt && \
                    137:            (tp)->t_segq == NULL && \
                    138:            (tp)->t_state == TCPS_ESTABLISHED) { \
                    139:                if (tcp_delack_enabled) \
                    140:                        tp->t_flags |= TF_DELACK; \
                    141:                else \
                    142:                        tp->t_flags |= TF_ACKNOW; \
                    143:                (tp)->rcv_nxt += (ti)->ti_len; \
                    144:                flags = (ti)->ti_flags & TH_FIN; \
                    145:                tcpstat.tcps_rcvpack++;\
                    146:                tcpstat.tcps_rcvbyte += (ti)->ti_len;\
                    147:                sbappend(&(so)->so_rcv, (m)); \
                    148:                sorwakeup(so); \
                    149:        } else { \
                    150:                (flags) = tcp_reass((tp), (ti), (m)); \
                    151:                tp->t_flags |= TF_ACKNOW; \
                    152:        } \
                    153: }
                    154: 
                    155: static int
                    156: tcp_reass(tp, ti, m)
                    157:        register struct tcpcb *tp;
                    158:        register struct tcpiphdr *ti;
                    159:        struct mbuf *m;
                    160: {
                    161:        struct mbuf *q;
                    162:        struct mbuf *p;
                    163:        struct mbuf *nq;
                    164:        struct socket *so = tp->t_inpcb->inp_socket;
                    165:        int flags;
                    166: 
                    167: #define GETTCP(m)      ((struct tcpiphdr *)m->m_pkthdr.header)
                    168: 
                    169:        /*
                    170:         * Call with ti==0 after become established to
                    171:         * force pre-ESTABLISHED data up to user socket.
                    172:         */
                    173:        if (ti == 0)
                    174:                goto present;
                    175: 
                    176:        m->m_pkthdr.header = ti;
                    177: 
                    178:        /*
                    179:         * Find a segment which begins after this one does.
                    180:         */
                    181:        for (q = tp->t_segq, p = NULL; q; p = q, q = q->m_nextpkt)
                    182:                if (SEQ_GT(GETTCP(q)->ti_seq, ti->ti_seq))
                    183:                        break;
                    184: 
                    185:        /*
                    186:         * If there is a preceding segment, it may provide some of
                    187:         * our data already.  If so, drop the data from the incoming
                    188:         * segment.  If it provides all of our data, drop us.
                    189:         */
                    190:        if (p != NULL) {
                    191:                register int i;
                    192:                /* conversion to int (in i) handles seq wraparound */
                    193:                i = GETTCP(p)->ti_seq + GETTCP(p)->ti_len - ti->ti_seq;
                    194:                if (i > 0) {
                    195:                        if (i >= ti->ti_len) {
                    196:                                tcpstat.tcps_rcvduppack++;
                    197:                                tcpstat.tcps_rcvdupbyte += ti->ti_len;
                    198:                                m_freem(m);
                    199:                                /*
                    200:                                 * Try to present any queued data
                    201:                                 * at the left window edge to the user.
                    202:                                 * This is needed after the 3-WHS
                    203:                                 * completes.
                    204:                                 */
                    205:                                goto present;   /* ??? */
                    206:                        }
                    207:                        m_adj(m, i);
                    208:                        ti->ti_len -= i;
                    209:                        ti->ti_seq += i;
                    210:                }
                    211:        }
                    212:        tcpstat.tcps_rcvoopack++;
                    213:        tcpstat.tcps_rcvoobyte += ti->ti_len;
                    214: 
                    215:        /*
                    216:         * While we overlap succeeding segments trim them or,
                    217:         * if they are completely covered, dequeue them.
                    218:         */
                    219:        while (q) {
                    220:                register int i = (ti->ti_seq + ti->ti_len) - GETTCP(q)->ti_seq;
                    221:                if (i <= 0)
                    222:                        break;
                    223:                if (i < GETTCP(q)->ti_len) {
                    224:                        GETTCP(q)->ti_seq += i;
                    225:                        GETTCP(q)->ti_len -= i;
                    226:                        m_adj(q, i);
                    227:                        break;
                    228:                }
                    229: 
                    230:                nq = q->m_nextpkt;
                    231:                if (p)
                    232:                        p->m_nextpkt = nq;
                    233:                else
                    234:                        tp->t_segq = nq;
                    235:                m_freem(q);
                    236:                q = nq;
                    237:        }
                    238: 
                    239:        if (p == NULL) {
                    240:                m->m_nextpkt = tp->t_segq;
                    241:                tp->t_segq = m;
                    242:        } else {
                    243:                m->m_nextpkt = p->m_nextpkt;
                    244:                p->m_nextpkt = m;
                    245:        }
                    246: 
                    247: present:
                    248:        /*
                    249:         * Present data to user, advancing rcv_nxt through
                    250:         * completed sequence space.
                    251:         */
                    252:        if (!TCPS_HAVEESTABLISHED(tp->t_state))
                    253:                return (0);
                    254:        q = tp->t_segq;
                    255:        if (!q || GETTCP(q)->ti_seq != tp->rcv_nxt)
                    256:                return (0);
                    257:        do {
                    258:                tp->rcv_nxt += GETTCP(q)->ti_len;
                    259:                flags = GETTCP(q)->ti_flags & TH_FIN;
                    260:                nq = q->m_nextpkt;
                    261:                tp->t_segq = nq;
                    262:                q->m_nextpkt = NULL;
                    263:                if (so->so_state & SS_CANTRCVMORE)
                    264:                        m_freem(q);
                    265:                else
                    266:                        sbappend(&so->so_rcv, q);
                    267:                q = nq;
                    268:        } while (q && GETTCP(q)->ti_seq == tp->rcv_nxt);
                    269:        sorwakeup(so);
                    270:        return (flags);
                    271: 
                    272: #undef GETTCP
                    273: }
                    274: 
                    275: /*
                    276:  * TCP input routine, follows pages 65-76 of the
                    277:  * protocol specification dated September, 1981 very closely.
                    278:  */
                    279: void
                    280: tcp_input(m, iphlen)
                    281:        register struct mbuf *m;
                    282:        int iphlen;
                    283: {
                    284:        register struct tcpiphdr *ti;
                    285:        register struct inpcb *inp;
                    286:        u_char *optp = NULL;
                    287:        int optlen = 0;
                    288:        int len, tlen, off;
                    289:        register struct tcpcb *tp = 0;
                    290:        register int tiflags;
                    291:        struct socket *so = 0;
                    292:        int todrop, acked, ourfinisacked, needoutput = 0;
                    293:        struct in_addr laddr;
                    294:        int dropsocket = 0;
                    295:        int iss = 0;
                    296:        u_long tiwin;
                    297:        struct tcpopt to;               /* options in this segment */
                    298:        struct rmxp_tao *taop;          /* pointer to our TAO cache entry */
                    299:        struct rmxp_tao tao_noncached;  /* in case there's no cached entry */
                    300: #if TCPDEBUG
                    301:        short ostate = 0;
                    302: #endif
                    303:        struct proc *proc0=current_proc();
                    304: 
                    305:        bzero((char *)&to, sizeof(to));
                    306: 
                    307:        tcpstat.tcps_rcvtotal++;
                    308:        /*
                    309:         * Get IP and TCP header together in first mbuf.
                    310:         * Note: IP leaves IP header in first mbuf.
                    311:         */
                    312:        ti = mtod(m, struct tcpiphdr *);
                    313:        if (iphlen > sizeof (struct ip))
                    314:                ip_stripoptions(m, (struct mbuf *)0);
                    315:        if (m->m_len < sizeof (struct tcpiphdr)) {
                    316:                if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) {
                    317:                        tcpstat.tcps_rcvshort++;
                    318:                        return;
                    319:                }
                    320:                ti = mtod(m, struct tcpiphdr *);
                    321:        }
                    322: 
                    323:        /*
                    324:         * Checksum extended TCP header and data.
                    325:         */
                    326:        tlen = ((struct ip *)ti)->ip_len;
                    327:        len = sizeof (struct ip) + tlen;
                    328:        bzero(ti->ti_x1, sizeof(ti->ti_x1));
                    329:        ti->ti_len = (u_short)tlen;
                    330:        HTONS(ti->ti_len);
                    331:        ti->ti_sum = in_cksum(m, len);
                    332:        if (ti->ti_sum) {
                    333:                tcpstat.tcps_rcvbadsum++;
                    334:                goto drop;
                    335:        }
                    336: 
                    337:        /*
                    338:         * Check that TCP offset makes sense,
                    339:         * pull out TCP options and adjust length.              XXX
                    340:         */
                    341:        off = ti->ti_off << 2;
                    342:        if (off < sizeof (struct tcphdr) || off > tlen) {
                    343:                tcpstat.tcps_rcvbadoff++;
                    344:                goto drop;
                    345:        }
                    346:        tlen -= off;
                    347:        ti->ti_len = tlen;
                    348:        if (off > sizeof (struct tcphdr)) {
                    349:                if (m->m_len < sizeof(struct ip) + off) {
                    350:                        if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) {
                    351:                                tcpstat.tcps_rcvshort++;
                    352:                                return;
                    353:                        }
                    354:                        ti = mtod(m, struct tcpiphdr *);
                    355:                }
                    356:                optlen = off - sizeof (struct tcphdr);
                    357:                optp = mtod(m, u_char *) + sizeof (struct tcpiphdr);
                    358:        }
                    359:        tiflags = ti->ti_flags;
                    360: 
                    361:        /*
                    362:         * Convert TCP protocol specific fields to host format.
                    363:         */
                    364:        NTOHL(ti->ti_seq);
                    365:        NTOHL(ti->ti_ack);
                    366:        NTOHS(ti->ti_win);
                    367:        NTOHS(ti->ti_urp);
                    368: 
                    369:        /*
                    370:         * Drop TCP, IP headers and TCP options.
                    371:         */
                    372:        m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
                    373:        m->m_len  -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
                    374: 
                    375:        /*
                    376:         * Locate pcb for segment.
                    377:         */
                    378: findpcb:
                    379: #if IPFIREWALL_FORWARD
                    380:        if (ip_fw_fwd_addr != NULL) {
                    381:                /*
                    382:                 * Diverted. Pretend to be the destination.
                    383:                 * already got one like this? 
                    384:                 */
                    385:                inp = in_pcblookup_hash(&tcbinfo, ti->ti_src, ti->ti_sport,
                    386:                        ti->ti_dst, ti->ti_dport, 0);
                    387:                if (!inp) {
                    388:                        /* 
                    389:                         * No, then it's new. Try find the ambushing socket
                    390:                         */
                    391:                        if (!ip_fw_fwd_addr->sin_port) {
                    392:                                inp = in_pcblookup_hash(&tcbinfo, ti->ti_src,
                    393:                                    ti->ti_sport, ip_fw_fwd_addr->sin_addr,
                    394:                                    ti->ti_dport, 1);
                    395:                        } else {
                    396:                                inp = in_pcblookup_hash(&tcbinfo,
                    397:                                    ti->ti_src, ti->ti_sport,
                    398:                                    ip_fw_fwd_addr->sin_addr,
                    399:                                    ntohs(ip_fw_fwd_addr->sin_port), 1);
                    400:                        }
                    401:                }
                    402:                ip_fw_fwd_addr = NULL;
                    403:        } else
                    404: #endif /* IPFIREWALL_FORWARD */
                    405: 
                    406:        inp = in_pcblookup_hash(&tcbinfo, ti->ti_src, ti->ti_sport,
                    407:            ti->ti_dst, ti->ti_dport, 1);
                    408: 
                    409:        /*
                    410:         * If the state is CLOSED (i.e., TCB does not exist) then
                    411:         * all data in the incoming segment is discarded.
                    412:         * If the TCB exists but is in CLOSED state, it is embryonic,
                    413:         * but should either do a listen or a connect soon.
                    414:         */
                    415:        if (inp == NULL) {
                    416:                if (log_in_vain && tiflags & TH_SYN) {
                    417:                        char buf[4*sizeof "123"];
                    418: 
                    419:                        strcpy(buf, inet_ntoa(ti->ti_dst));
                    420:                        log(LOG_INFO,
                    421:                            "Connection attempt to TCP %s:%d from %s:%d\n",
                    422:                            buf, ntohs(ti->ti_dport), inet_ntoa(ti->ti_src),
                    423:                            ntohs(ti->ti_sport));
                    424:                }
                    425: #if ICMP_BANDLIM
                    426:                if (badport_bandlim(1) < 0)
                    427:                        goto drop;
                    428: #endif
                    429:                goto dropwithreset;
                    430:        }
                    431:        tp = intotcpcb(inp);
                    432:        if (tp == 0)
                    433:                goto dropwithreset;
                    434:        if (tp->t_state == TCPS_CLOSED)
                    435:                goto drop;
                    436: 
                    437:        /* Unscale the window into a 32-bit value. */
                    438:        if ((tiflags & TH_SYN) == 0)
                    439:                tiwin = ti->ti_win << tp->snd_scale;
                    440:        else
                    441:                tiwin = ti->ti_win;
                    442: 
                    443:        so = inp->inp_socket;
                    444:        if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
                    445: #if TCPDEBUG
                    446:                if (so->so_options & SO_DEBUG) {
                    447:                        ostate = tp->t_state;
                    448:                        tcp_saveti = *ti;
                    449:                }
                    450: #endif
                    451:                if (so->so_options & SO_ACCEPTCONN) {
                    452:                        register struct tcpcb *tp0 = tp;
                    453:                        struct socket *so2;
                    454:                        if ((tiflags & (TH_RST|TH_ACK|TH_SYN)) != TH_SYN) {
                    455:                                /*
                    456:                                 * Note: dropwithreset makes sure we don't
                    457:                                 * send a RST in response to a RST.
                    458:                                 */
                    459:                                if (tiflags & TH_ACK) {
                    460:                                        tcpstat.tcps_badsyn++;
                    461:                                        goto dropwithreset;
                    462:                                }
                    463:                                goto drop;
                    464:                        }
                    465:                        so2 = sonewconn(so, 0);
                    466:                        if (so2 == 0) {
                    467:                                tcpstat.tcps_listendrop++;
                    468:                                so2 = sodropablereq(so);
                    469:                                if (so2) {
                    470:                                        tcp_drop(sototcpcb(so2), ETIMEDOUT);
                    471:                                        so2 = sonewconn(so, 0);
                    472:                                }
                    473:                                if (!so2)
                    474:                                        goto drop;
                    475:                        }
                    476:                        so = so2;
                    477:                        /*
                    478:                         * This is ugly, but ....
                    479:                         *
                    480:                         * Mark socket as temporary until we're
                    481:                         * committed to keeping it.  The code at
                    482:                         * ``drop'' and ``dropwithreset'' check the
                    483:                         * flag dropsocket to see if the temporary
                    484:                         * socket created here should be discarded.
                    485:                         * We mark the socket as discardable until
                    486:                         * we're committed to it below in TCPS_LISTEN.
                    487:                         */
                    488:                        dropsocket++;
                    489:                        inp = (struct inpcb *)so->so_pcb;
                    490:                        inp->inp_laddr = ti->ti_dst;
                    491:                        inp->inp_lport = ti->ti_dport;
                    492:                        if (in_pcbinshash(inp) != 0) {
                    493:                                /*
                    494:                                 * Undo the assignments above if we failed to put
                    495:                                 * the PCB on the hash lists.
                    496:                                 */
                    497:                                inp->inp_laddr.s_addr = INADDR_ANY;
                    498:                                inp->inp_lport = 0;
                    499:                                goto drop;
                    500:                        }
                    501:                        inp->inp_options = ip_srcroute();
                    502:                        tp = intotcpcb(inp);
                    503:                        tp->t_state = TCPS_LISTEN;
                    504:                        tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT);
                    505: 
                    506:                        /* Compute proper scaling value from buffer space */
                    507:                        while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
                    508:                           TCP_MAXWIN << tp->request_r_scale < so->so_rcv.sb_hiwat)
                    509:                                tp->request_r_scale++;
                    510:                }
                    511:        }
                    512: 
                    513:        /*
                    514:         * Segment received on connection.
                    515:         * Reset idle time and keep-alive timer.
                    516:         */
                    517:        tp->t_idle = 0;
                    518:        if (TCPS_HAVEESTABLISHED(tp->t_state))
                    519:                tp->t_timer[TCPT_KEEP] = tcp_keepidle;
                    520: 
                    521:        /*
                    522:         * Process options if not in LISTEN state,
                    523:         * else do it below (after getting remote address).
                    524:         */
                    525:        if (tp->t_state != TCPS_LISTEN)
                    526:                tcp_dooptions(tp, optp, optlen, ti, &to);
                    527: 
                    528:        /*
                    529:         * Header prediction: check for the two common cases
                    530:         * of a uni-directional data xfer.  If the packet has
                    531:         * no control flags, is in-sequence, the window didn't
                    532:         * change and we're not retransmitting, it's a
                    533:         * candidate.  If the length is zero and the ack moved
                    534:         * forward, we're the sender side of the xfer.  Just
                    535:         * free the data acked & wake any higher level process
                    536:         * that was blocked waiting for space.  If the length
                    537:         * is non-zero and the ack didn't move, we're the
                    538:         * receiver side.  If we're getting packets in-order
                    539:         * (the reassembly queue is empty), add the data to
                    540:         * the socket buffer and note that we need a delayed ack.
                    541:         * Make sure that the hidden state-flags are also off.
                    542:         * Since we check for TCPS_ESTABLISHED above, it can only
                    543:         * be TH_NEEDSYN.
                    544:         */
                    545:        if (tp->t_state == TCPS_ESTABLISHED &&
                    546:            (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
                    547:            ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
                    548:            ((to.to_flag & TOF_TS) == 0 ||
                    549:             TSTMP_GEQ(to.to_tsval, tp->ts_recent)) &&
                    550:            /*
                    551:             * Using the CC option is compulsory if once started:
                    552:             *   the segment is OK if no T/TCP was negotiated or
                    553:             *   if the segment has a CC option equal to CCrecv
                    554:             */
                    555:            ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) != (TF_REQ_CC|TF_RCVD_CC) ||
                    556:             (to.to_flag & TOF_CC) != 0 && to.to_cc == tp->cc_recv) &&
                    557:            ti->ti_seq == tp->rcv_nxt &&
                    558:            tiwin && tiwin == tp->snd_wnd &&
                    559:            tp->snd_nxt == tp->snd_max) {
                    560: 
                    561:                /*
                    562:                 * If last ACK falls within this segment's sequence numbers,
                    563:                 * record the timestamp.
                    564:                 * NOTE that the test is modified according to the latest
                    565:                 * proposal of the [email protected] list (Braden 1993/04/26).
                    566:                 */
                    567:                if ((to.to_flag & TOF_TS) != 0 &&
                    568:                   SEQ_LEQ(ti->ti_seq, tp->last_ack_sent)) {
                    569:                        tp->ts_recent_age = tcp_now;
                    570:                        tp->ts_recent = to.to_tsval;
                    571:                }
                    572: 
                    573:                if (ti->ti_len == 0) {
                    574:                        if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
                    575:                            SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
                    576:                            tp->snd_cwnd >= tp->snd_wnd &&
                    577:                            tp->t_dupacks < tcprexmtthresh) {
                    578:                                /*
                    579:                                 * this is a pure ack for outstanding data.
                    580:                                 */
                    581:                                ++tcpstat.tcps_predack;
                    582:                                if ((to.to_flag & TOF_TS) != 0)
                    583:                                        tcp_xmit_timer(tp,
                    584:                                            tcp_now - to.to_tsecr + 1);
                    585:                                else if (tp->t_rtt &&
                    586:                                            SEQ_GT(ti->ti_ack, tp->t_rtseq))
                    587:                                        tcp_xmit_timer(tp, tp->t_rtt);
                    588:                                acked = ti->ti_ack - tp->snd_una;
                    589:                                tcpstat.tcps_rcvackpack++;
                    590:                                tcpstat.tcps_rcvackbyte += acked;
                    591:                                sbdrop(&so->so_snd, acked);
                    592:                                tp->snd_una = ti->ti_ack;
                    593:                                m_freem(m);
                    594: 
                    595:                                /*
                    596:                                 * If all outstanding data are acked, stop
                    597:                                 * retransmit timer, otherwise restart timer
                    598:                                 * using current (possibly backed-off) value.
                    599:                                 * If process is waiting for space,
                    600:                                 * wakeup/selwakeup/signal.  If data
                    601:                                 * are ready to send, let tcp_output
                    602:                                 * decide between more output or persist.
                    603:                                 */
                    604:                                if (tp->snd_una == tp->snd_max)
                    605:                                        tp->t_timer[TCPT_REXMT] = 0;
                    606:                                else if (tp->t_timer[TCPT_PERSIST] == 0)
                    607:                                        tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
                    608: 
                    609:                                sowwakeup(so);
                    610:                                if (so->so_snd.sb_cc)
                    611:                                        (void) tcp_output(tp);
                    612:                                return;
                    613:                        }
                    614:                } else if (ti->ti_ack == tp->snd_una &&
                    615:                    tp->t_segq == NULL &&
                    616:                    ti->ti_len <= sbspace(&so->so_rcv)) {
                    617:                        /*
                    618:                         * this is a pure, in-sequence data packet
                    619:                         * with nothing on the reassembly queue and
                    620:                         * we have enough buffer space to take it.
                    621:                         */
                    622:                        ++tcpstat.tcps_preddat;
                    623:                        tp->rcv_nxt += ti->ti_len;
                    624:                        tcpstat.tcps_rcvpack++;
                    625:                        tcpstat.tcps_rcvbyte += ti->ti_len;
                    626:                        /*
                    627:                         * Add data to socket buffer.
                    628:                         */
                    629:                        sbappend(&so->so_rcv, m);
                    630:                        sorwakeup(so);
                    631:                        if (tcp_delack_enabled) {
                    632:                                tp->t_flags |= TF_DELACK;
                    633:                        } else {
                    634:                                tp->t_flags |= TF_ACKNOW;
                    635:                                tcp_output(tp);
                    636:                        }
                    637:                        return;
                    638:                }
                    639:        }
                    640: 
                    641:        /*
                    642:         * Calculate amount of space in receive window,
                    643:         * and then do TCP input processing.
                    644:         * Receive window is amount of space in rcv queue,
                    645:         * but not less than advertised window.
                    646:         */
                    647:        { int win;
                    648: 
                    649:        win = sbspace(&so->so_rcv);
                    650:        if (win < 0)
                    651:                win = 0;
                    652:        tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
                    653:        }
                    654: 
                    655:        switch (tp->t_state) {
                    656: 
                    657:        /*
                    658:         * If the state is LISTEN then ignore segment if it contains an RST.
                    659:         * If the segment contains an ACK then it is bad and send a RST.
                    660:         * If it does not contain a SYN then it is not interesting; drop it.
                    661:         * If it is from this socket, drop it, it must be forged.
                    662:         * Don't bother responding if the destination was a broadcast.
                    663:         * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
                    664:         * tp->iss, and send a segment:
                    665:         *     <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
                    666:         * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
                    667:         * Fill in remote peer address fields if not previously specified.
                    668:         * Enter SYN_RECEIVED state, and process any other fields of this
                    669:         * segment in this state.
                    670:         */
                    671:        case TCPS_LISTEN: {
                    672:                register struct sockaddr_in *sin;
                    673: 
                    674:                if (tiflags & TH_RST)
                    675:                        goto drop;
                    676:                if (tiflags & TH_ACK)
                    677:                        goto dropwithreset;
                    678:                if ((tiflags & TH_SYN) == 0)
                    679:                        goto drop;
                    680:                if ((ti->ti_dport == ti->ti_sport) &&
                    681:                    (ti->ti_dst.s_addr == ti->ti_src.s_addr))
                    682:                        goto drop;
                    683:                /*
                    684:                 * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN
                    685:                 * in_broadcast() should never return true on a received
                    686:                 * packet with M_BCAST not set.
                    687:                 */
                    688:                if (m->m_flags & (M_BCAST|M_MCAST) ||
                    689:                    IN_MULTICAST(ntohl(ti->ti_dst.s_addr)))
                    690:                        goto drop;
                    691:                MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME,
                    692:                       M_NOWAIT);
                    693:                if (sin == NULL)
                    694:                        goto drop;
                    695:                sin->sin_family = AF_INET;
                    696:                sin->sin_len = sizeof(*sin);
                    697:                sin->sin_addr = ti->ti_src;
                    698:                sin->sin_port = ti->ti_sport;
                    699:                bzero((caddr_t)sin->sin_zero, sizeof(sin->sin_zero));
                    700:                laddr = inp->inp_laddr;
                    701:                if (inp->inp_laddr.s_addr == INADDR_ANY)
                    702:                        inp->inp_laddr = ti->ti_dst;
                    703:                if (in_pcbconnect(inp, (struct sockaddr *)sin, proc0)) {
                    704:                        inp->inp_laddr = laddr;
                    705:                        FREE(sin, M_SONAME);
                    706:                        goto drop;
                    707:                }
                    708:                FREE(sin, M_SONAME);
                    709:                tp->t_template = tcp_template(tp);
                    710:                if (tp->t_template == 0) {
                    711:                        tp = tcp_drop(tp, ENOBUFS);
                    712:                        dropsocket = 0;         /* socket is already gone */
                    713:                        goto drop;
                    714:                }
                    715:                if ((taop = tcp_gettaocache(inp)) == NULL) {
                    716:                        taop = &tao_noncached;
                    717:                        bzero(taop, sizeof(*taop));
                    718:                }
                    719:                tcp_dooptions(tp, optp, optlen, ti, &to);
                    720:                if (iss)
                    721:                        tp->iss = iss;
                    722:                else
                    723:                        tp->iss = tcp_iss;
                    724:                tcp_iss += TCP_ISSINCR/4;
                    725:                tp->irs = ti->ti_seq;
                    726:                tcp_sendseqinit(tp);
                    727:                tcp_rcvseqinit(tp);
                    728:                /*
                    729:                 * Initialization of the tcpcb for transaction;
                    730:                 *   set SND.WND = SEG.WND,
                    731:                 *   initialize CCsend and CCrecv.
                    732:                 */
                    733:                tp->snd_wnd = tiwin;    /* initial send-window */
                    734:                tp->cc_send = CC_INC(tcp_ccgen);
                    735:                tp->cc_recv = to.to_cc;
                    736:                /*
                    737:                 * Perform TAO test on incoming CC (SEG.CC) option, if any.
                    738:                 * - compare SEG.CC against cached CC from the same host,
                    739:                 *      if any.
                    740:                 * - if SEG.CC > chached value, SYN must be new and is accepted
                    741:                 *      immediately: save new CC in the cache, mark the socket
                    742:                 *      connected, enter ESTABLISHED state, turn on flag to
                    743:                 *      send a SYN in the next segment.
                    744:                 *      A virtual advertised window is set in rcv_adv to
                    745:                 *      initialize SWS prevention.  Then enter normal segment
                    746:                 *      processing: drop SYN, process data and FIN.
                    747:                 * - otherwise do a normal 3-way handshake.
                    748:                 */
                    749:                if ((to.to_flag & TOF_CC) != 0) {
                    750:                    if (((tp->t_flags & TF_NOPUSH) != 0) &&
                    751:                        taop->tao_cc != 0 && CC_GT(to.to_cc, taop->tao_cc)) {
                    752: 
                    753:                        taop->tao_cc = to.to_cc;
                    754:                        tp->t_state = TCPS_ESTABLISHED;
                    755: 
                    756:                        /*
                    757:                         * If there is a FIN, or if there is data and the
                    758:                         * connection is local, then delay SYN,ACK(SYN) in
                    759:                         * the hope of piggy-backing it on a response
                    760:                         * segment.  Otherwise must send ACK now in case
                    761:                         * the other side is slow starting.
                    762:                         */
                    763:                        if (tcp_delack_enabled && ((tiflags & TH_FIN) || (ti->ti_len != 0 &&
                    764:                            in_localaddr(inp->inp_faddr))))
                    765:                                tp->t_flags |= (TF_DELACK | TF_NEEDSYN);
                    766:                        else
                    767:                                tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
                    768: 
                    769:                        /*
                    770:                         * Limit the `virtual advertised window' to TCP_MAXWIN
                    771:                         * here.  Even if we requested window scaling, it will
                    772:                         * become effective only later when our SYN is acked.
                    773:                         */
                    774:                        tp->rcv_adv += min(tp->rcv_wnd, TCP_MAXWIN);
                    775:                        tcpstat.tcps_connects++;
                    776:                        soisconnected(so);
                    777:                        tp->t_timer[TCPT_KEEP] = tcp_keepinit;
                    778:                        dropsocket = 0;         /* committed to socket */
                    779:                        tcpstat.tcps_accepts++;
                    780:                        goto trimthenstep6;
                    781:                    }
                    782:                /* else do standard 3-way handshake */
                    783:                } else {
                    784:                    /*
                    785:                     * No CC option, but maybe CC.NEW:
                    786:                     *   invalidate cached value.
                    787:                     */
                    788:                     taop->tao_cc = 0;
                    789:                }
                    790:                /*
                    791:                 * TAO test failed or there was no CC option,
                    792:                 *    do a standard 3-way handshake.
                    793:                 */
                    794:                tp->t_flags |= TF_ACKNOW;
                    795:                tp->t_state = TCPS_SYN_RECEIVED;
                    796:                tp->t_timer[TCPT_KEEP] = tcp_keepinit;
                    797:                dropsocket = 0;         /* committed to socket */
                    798:                tcpstat.tcps_accepts++;
                    799:                goto trimthenstep6;
                    800:                }
                    801: 
                    802:        /*
                    803:         * If the state is SYN_RECEIVED:
                    804:         *      if seg contains an ACK, but not for our SYN/ACK, send a RST.
                    805:         */
                    806:        case TCPS_SYN_RECEIVED:
                    807:                if ((tiflags & TH_ACK) &&
                    808:                    (SEQ_LEQ(ti->ti_ack, tp->snd_una) ||
                    809:                     SEQ_GT(ti->ti_ack, tp->snd_max)))
                    810:                                goto dropwithreset;
                    811:                break;
                    812: 
                    813:        /*
                    814:         * If the state is SYN_SENT:
                    815:         *      if seg contains an ACK, but not for our SYN, drop the input.
                    816:         *      if seg contains a RST, then drop the connection.
                    817:         *      if seg does not contain SYN, then drop it.
                    818:         * Otherwise this is an acceptable SYN segment
                    819:         *      initialize tp->rcv_nxt and tp->irs
                    820:         *      if seg contains ack then advance tp->snd_una
                    821:         *      if SYN has been acked change to ESTABLISHED else SYN_RCVD state
                    822:         *      arrange for segment to be acked (eventually)
                    823:         *      continue processing rest of data/controls, beginning with URG
                    824:         */
                    825:        case TCPS_SYN_SENT:
                    826:                if ((taop = tcp_gettaocache(inp)) == NULL) {
                    827:                        taop = &tao_noncached;
                    828:                        bzero(taop, sizeof(*taop));
                    829:                }
                    830: 
                    831:                if ((tiflags & TH_ACK) &&
                    832:                    (SEQ_LEQ(ti->ti_ack, tp->iss) ||
                    833:                     SEQ_GT(ti->ti_ack, tp->snd_max))) {
                    834:                        /*
                    835:                         * If we have a cached CCsent for the remote host,
                    836:                         * hence we haven't just crashed and restarted,
                    837:                         * do not send a RST.  This may be a retransmission
                    838:                         * from the other side after our earlier ACK was lost.
                    839:                         * Our new SYN, when it arrives, will serve as the
                    840:                         * needed ACK.
                    841:                         */
                    842:                        if (taop->tao_ccsent != 0)
                    843:                                goto drop;
                    844:                        else
                    845:                                goto dropwithreset;
                    846:                }
                    847:                if (tiflags & TH_RST) {
                    848:                  if (tiflags & TH_ACK) {
                    849:                                tp = tcp_drop(tp, ECONNREFUSED);
                    850:                                postevent(so, 0, EV_RESET);
                    851:                  }
                    852:                        goto drop;
                    853:                }
                    854:                if ((tiflags & TH_SYN) == 0)
                    855:                        goto drop;
                    856:                tp->snd_wnd = ti->ti_win;       /* initial send window */
                    857:                tp->cc_recv = to.to_cc;         /* foreign CC */
                    858: 
                    859:                tp->irs = ti->ti_seq;
                    860:                tcp_rcvseqinit(tp);
                    861:                if (tiflags & TH_ACK) {
                    862:                        /*
                    863:                         * Our SYN was acked.  If segment contains CC.ECHO
                    864:                         * option, check it to make sure this segment really
                    865:                         * matches our SYN.  If not, just drop it as old
                    866:                         * duplicate, but send an RST if we're still playing
                    867:                         * by the old rules.  If no CC.ECHO option, make sure
                    868:                         * we don't get fooled into using T/TCP.
                    869:                         */
                    870:                        if (to.to_flag & TOF_CCECHO) {
                    871:                                if (tp->cc_send != to.to_ccecho)
                    872:                                        if (taop->tao_ccsent != 0)
                    873:                                                goto drop;
                    874:                                        else
                    875:                                                goto dropwithreset;
                    876:                        } else
                    877:                                tp->t_flags &= ~TF_RCVD_CC;
                    878:                        tcpstat.tcps_connects++;
                    879:                        soisconnected(so);
                    880:                        /* Do window scaling on this connection? */
                    881:                        if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
                    882:                                (TF_RCVD_SCALE|TF_REQ_SCALE)) {
                    883:                                tp->snd_scale = tp->requested_s_scale;
                    884:                                tp->rcv_scale = tp->request_r_scale;
                    885:                        }
                    886:                        /* Segment is acceptable, update cache if undefined. */
                    887:                        if (taop->tao_ccsent == 0)
                    888:                                taop->tao_ccsent = to.to_ccecho;
                    889: 
                    890:                        tp->rcv_adv += tp->rcv_wnd;
                    891:                        tp->snd_una++;          /* SYN is acked */
                    892:                        /*
                    893:                         * If there's data, delay ACK; if there's also a FIN
                    894:                         * ACKNOW will be turned on later.
                    895:                         */
                    896:                        if (tcp_delack_enabled && ti->ti_len != 0)
                    897:                                tp->t_flags |= TF_DELACK;
                    898:                        else
                    899:                                tp->t_flags |= TF_ACKNOW;
                    900:                        /*
                    901:                         * Received <SYN,ACK> in SYN_SENT[*] state.
                    902:                         * Transitions:
                    903:                         *      SYN_SENT  --> ESTABLISHED
                    904:                         *      SYN_SENT* --> FIN_WAIT_1
                    905:                         */
                    906:                        if (tp->t_flags & TF_NEEDFIN) {
                    907:                                tp->t_state = TCPS_FIN_WAIT_1;
                    908:                                tp->t_flags &= ~TF_NEEDFIN;
                    909:                                tiflags &= ~TH_SYN;
                    910:                        } else {
                    911:                                tp->t_state = TCPS_ESTABLISHED;
                    912:                                tp->t_timer[TCPT_KEEP] = tcp_keepidle;
                    913:                        }
                    914:                } else {
                    915:                /*
                    916:                 *  Received initial SYN in SYN-SENT[*] state => simul-
                    917:                 *  taneous open.  If segment contains CC option and there is
                    918:                 *  a cached CC, apply TAO test; if it succeeds, connection is
                    919:                 *  half-synchronized.  Otherwise, do 3-way handshake:
                    920:                 *        SYN-SENT -> SYN-RECEIVED
                    921:                 *        SYN-SENT* -> SYN-RECEIVED*
                    922:                 *  If there was no CC option, clear cached CC value.
                    923:                 */
                    924:                        tp->t_flags |= TF_ACKNOW;
                    925:                        tp->t_timer[TCPT_REXMT] = 0;
                    926:                        if (to.to_flag & TOF_CC) {
                    927:                                if (taop->tao_cc != 0 &&
                    928:                                    CC_GT(to.to_cc, taop->tao_cc)) {
                    929:                                        /*
                    930:                                         * update cache and make transition:
                    931:                                         *        SYN-SENT -> ESTABLISHED*
                    932:                                         *        SYN-SENT* -> FIN-WAIT-1*
                    933:                                         */
                    934:                                        taop->tao_cc = to.to_cc;
                    935:                                        if (tp->t_flags & TF_NEEDFIN) {
                    936:                                                tp->t_state = TCPS_FIN_WAIT_1;
                    937:                                                tp->t_flags &= ~TF_NEEDFIN;
                    938:                                        } else {
                    939:                                                tp->t_state = TCPS_ESTABLISHED;
                    940:                                                tp->t_timer[TCPT_KEEP] = tcp_keepidle;
                    941:                                        }
                    942:                                        tp->t_flags |= TF_NEEDSYN;
                    943:                                } else
                    944:                                        tp->t_state = TCPS_SYN_RECEIVED;
                    945:                        } else {
                    946:                                /* CC.NEW or no option => invalidate cache */
                    947:                                taop->tao_cc = 0;
                    948:                                tp->t_state = TCPS_SYN_RECEIVED;
                    949:                        }
                    950:                }
                    951: 
                    952: trimthenstep6:
                    953:                /*
                    954:                 * Advance ti->ti_seq to correspond to first data byte.
                    955:                 * If data, trim to stay within window,
                    956:                 * dropping FIN if necessary.
                    957:                 */
                    958:                ti->ti_seq++;
                    959:                if (ti->ti_len > tp->rcv_wnd) {
                    960:                        todrop = ti->ti_len - tp->rcv_wnd;
                    961:                        m_adj(m, -todrop);
                    962:                        ti->ti_len = tp->rcv_wnd;
                    963:                        tiflags &= ~TH_FIN;
                    964:                        tcpstat.tcps_rcvpackafterwin++;
                    965:                        tcpstat.tcps_rcvbyteafterwin += todrop;
                    966:                }
                    967:                tp->snd_wl1 = ti->ti_seq - 1;
                    968:                tp->rcv_up = ti->ti_seq;
                    969:                /*
                    970:                 *  Client side of transaction: already sent SYN and data.
                    971:                 *  If the remote host used T/TCP to validate the SYN,
                    972:                 *  our data will be ACK'd; if so, enter normal data segment
                    973:                 *  processing in the middle of step 5, ack processing.
                    974:                 *  Otherwise, goto step 6.
                    975:                 */
                    976:                if (tiflags & TH_ACK)
                    977:                        goto process_ACK;
                    978:                goto step6;
                    979:        /*
                    980:         * If the state is LAST_ACK or CLOSING or TIME_WAIT:
                    981:         *      if segment contains a SYN and CC [not CC.NEW] option:
                    982:         *              if state == TIME_WAIT and connection duration > MSL,
                    983:         *                  drop packet and send RST;
                    984:         *
                    985:         *              if SEG.CC > CCrecv then is new SYN, and can implicitly
                    986:         *                  ack the FIN (and data) in retransmission queue.
                    987:         *                  Complete close and delete TCPCB.  Then reprocess
                    988:         *                  segment, hoping to find new TCPCB in LISTEN state;
                    989:         *
                    990:         *              else must be old SYN; drop it.
                    991:         *      else do normal processing.
                    992:         */
                    993:        case TCPS_LAST_ACK:
                    994:        case TCPS_CLOSING:
                    995:        case TCPS_TIME_WAIT:
                    996:                if ((tiflags & TH_SYN) &&
                    997:                    (to.to_flag & TOF_CC) && tp->cc_recv != 0) {
                    998:                        if (tp->t_state == TCPS_TIME_WAIT &&
                    999:                                        tp->t_duration > TCPTV_MSL)
                   1000:                                goto dropwithreset;
                   1001:                        if (CC_GT(to.to_cc, tp->cc_recv)) {
                   1002:                                tp = tcp_close(tp);
                   1003:                                goto findpcb;
                   1004:                        }
                   1005:                        else
                   1006:                                goto drop;
                   1007:                }
                   1008:                break;  /* continue normal processing */
                   1009:        }
                   1010: 
                   1011:        /*
                   1012:         * States other than LISTEN or SYN_SENT.
                   1013:         * First check the RST flag and sequence number since reset segments
                   1014:         * are exempt from the timestamp and connection count tests.  This
                   1015:         * fixes a bug introduced by the Stevens, vol. 2, p. 960 bugfix
                   1016:         * below which allowed reset segments in half the sequence space
                   1017:         * to fall though and be processed (which gives forged reset
                   1018:         * segments with a random sequence number a 50 percent chance of
                   1019:         * killing a connection).
                   1020:         * Then check timestamp, if present.
                   1021:         * Then check the connection count, if present.
                   1022:         * Then check that at least some bytes of segment are within
                   1023:         * receive window.  If segment begins before rcv_nxt,
                   1024:         * drop leading data (and SYN); if nothing left, just ack.
                   1025:         *
                   1026:         *
                   1027:         * If the RST bit is set, check the sequence number to see
                   1028:         * if this is a valid reset segment.
                   1029:         * RFC 793 page 37:
                   1030:         *   In all states except SYN-SENT, all reset (RST) segments
                   1031:         *   are validated by checking their SEQ-fields.  A reset is
                   1032:         *   valid if its sequence number is in the window.
                   1033:         * Note: this does not take into account delayed ACKs, so
                   1034:         *   we should test against last_ack_sent instead of rcv_nxt.
                   1035:         *   Also, it does not make sense to allow reset segments with
                   1036:         *   sequence numbers greater than last_ack_sent to be processed
                   1037:         *   since these sequence numbers are just the acknowledgement
                   1038:         *   numbers in our outgoing packets being echoed back at us,
                   1039:         *   and these acknowledgement numbers are monotonically
                   1040:         *   increasing.
                   1041:         * If we have multiple segments in flight, the intial reset
                   1042:         * segment sequence numbers will be to the left of last_ack_sent,
                   1043:         * but they will eventually catch up.
                   1044:         * In any case, it never made sense to trim reset segments to
                   1045:         * fit the receive window since RFC 1122 says:
                   1046:         *   4.2.2.12  RST Segment: RFC-793 Section 3.4
                   1047:         *
                   1048:         *    A TCP SHOULD allow a received RST segment to include data.
                   1049:         *
                   1050:         *    DISCUSSION
                   1051:         *         It has been suggested that a RST segment could contain
                   1052:         *         ASCII text that encoded and explained the cause of the
                   1053:         *         RST.  No standard has yet been established for such
                   1054:         *         data.
                   1055:         *
                   1056:         * If the reset segment passes the sequence number test examine
                   1057:         * the state:
                   1058:         *    SYN_RECEIVED STATE:
                   1059:         *      If passive open, return to LISTEN state.
                   1060:         *      If active open, inform user that connection was refused.
                   1061:         *    ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
                   1062:         *      Inform user that connection was reset, and close tcb.
                   1063:         *    CLOSING, LAST_ACK, TIME_WAIT STATES
                   1064:         *      Close the tcb.
                   1065:         *    TIME_WAIT state:
                   1066:         *      Drop the segment - see Stevens, vol. 2, p. 964 and
                   1067:         *      RFC 1337.
                   1068:         */
                   1069:        if (tiflags & TH_RST) {
                   1070:                if (tp->last_ack_sent == ti->ti_seq) {
                   1071:                        switch (tp->t_state) {
                   1072: 
                   1073:                        case TCPS_SYN_RECEIVED:
                   1074:                                so->so_error = ECONNREFUSED;
                   1075:                                goto close;
                   1076: 
                   1077:                        case TCPS_ESTABLISHED:
                   1078:                        case TCPS_FIN_WAIT_1:
                   1079:                        case TCPS_FIN_WAIT_2:
                   1080:                        case TCPS_CLOSE_WAIT:
                   1081:                                so->so_error = ECONNRESET;
                   1082:                        close:
                   1083:                                postevent(so, 0, EV_RESET);
                   1084:                                tp->t_state = TCPS_CLOSED;
                   1085:                                tcpstat.tcps_drops++;
                   1086:                                tp = tcp_close(tp);
                   1087:                                break;
                   1088: 
                   1089:                        case TCPS_CLOSING:
                   1090:                        case TCPS_LAST_ACK:
                   1091:                                tp = tcp_close(tp);
                   1092:                                break;
                   1093: 
                   1094:                        case TCPS_TIME_WAIT:
                   1095:                                break;
                   1096:                        }
                   1097:                }
                   1098:                goto drop;
                   1099:        }
                   1100: 
                   1101:        /*
                   1102:         * RFC 1323 PAWS: If we have a timestamp reply on this segment
                   1103:         * and it's less than ts_recent, drop it.
                   1104:         */
                   1105:        if ((to.to_flag & TOF_TS) != 0 && tp->ts_recent &&
                   1106:            TSTMP_LT(to.to_tsval, tp->ts_recent)) {
                   1107: 
                   1108:                /* Check to see if ts_recent is over 24 days old.  */
                   1109:                if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) {
                   1110:                        /*
                   1111:                         * Invalidate ts_recent.  If this segment updates
                   1112:                         * ts_recent, the age will be reset later and ts_recent
                   1113:                         * will get a valid value.  If it does not, setting
                   1114:                         * ts_recent to zero will at least satisfy the
                   1115:                         * requirement that zero be placed in the timestamp
                   1116:                         * echo reply when ts_recent isn't valid.  The
                   1117:                         * age isn't reset until we get a valid ts_recent
                   1118:                         * because we don't want out-of-order segments to be
                   1119:                         * dropped when ts_recent is old.
                   1120:                         */
                   1121:                        tp->ts_recent = 0;
                   1122:                } else {
                   1123:                        tcpstat.tcps_rcvduppack++;
                   1124:                        tcpstat.tcps_rcvdupbyte += ti->ti_len;
                   1125:                        tcpstat.tcps_pawsdrop++;
                   1126:                        goto dropafterack;
                   1127:                }
                   1128:        }
                   1129: 
                   1130:        /*
                   1131:         * T/TCP mechanism
                   1132:         *   If T/TCP was negotiated and the segment doesn't have CC,
                   1133:         *   or if its CC is wrong then drop the segment.
                   1134:         *   RST segments do not have to comply with this.
                   1135:         */
                   1136:        if ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) == (TF_REQ_CC|TF_RCVD_CC) &&
                   1137:            ((to.to_flag & TOF_CC) == 0 || tp->cc_recv != to.to_cc))
                   1138:                goto dropafterack;
                   1139: 
                   1140:        /*
                   1141:         * In the SYN-RECEIVED state, validate that the packet belongs to
                   1142:         * this connection before trimming the data to fit the receive
                   1143:         * window.  Check the sequence number versus IRS since we know
                   1144:         * the sequence numbers haven't wrapped.  This is a partial fix
                   1145:         * for the "LAND" DoS attack.
                   1146:         */
                   1147:        if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(ti->ti_seq, tp->irs))
                   1148:                goto dropwithreset;
                   1149: 
                   1150:        todrop = tp->rcv_nxt - ti->ti_seq;
                   1151:        if (todrop > 0) {
                   1152:                if (tiflags & TH_SYN) {
                   1153:                        tiflags &= ~TH_SYN;
                   1154:                        ti->ti_seq++;
                   1155:                        if (ti->ti_urp > 1)
                   1156:                                ti->ti_urp--;
                   1157:                        else
                   1158:                                tiflags &= ~TH_URG;
                   1159:                        todrop--;
                   1160:                }
                   1161:                /*
                   1162:                 * Following if statement from Stevens, vol. 2, p. 960.
                   1163:                 */
                   1164:                if (todrop > ti->ti_len
                   1165:                    || (todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) {
                   1166:                        /*
                   1167:                         * Any valid FIN must be to the left of the window.
                   1168:                         * At this point the FIN must be a duplicate or out
                   1169:                         * of sequence; drop it.
                   1170:                         */
                   1171:                        tiflags &= ~TH_FIN;
                   1172: 
                   1173:                        /*
                   1174:                         * Send an ACK to resynchronize and drop any data.
                   1175:                         * But keep on processing for RST or ACK.
                   1176:                         */
                   1177:                        tp->t_flags |= TF_ACKNOW;
                   1178:                        todrop = ti->ti_len;
                   1179:                        tcpstat.tcps_rcvduppack++;
                   1180:                        tcpstat.tcps_rcvdupbyte += todrop;
                   1181:                } else {
                   1182:                        tcpstat.tcps_rcvpartduppack++;
                   1183:                        tcpstat.tcps_rcvpartdupbyte += todrop;
                   1184:                }
                   1185:                m_adj(m, todrop);
                   1186:                ti->ti_seq += todrop;
                   1187:                ti->ti_len -= todrop;
                   1188:                if (ti->ti_urp > todrop)
                   1189:                        ti->ti_urp -= todrop;
                   1190:                else {
                   1191:                        tiflags &= ~TH_URG;
                   1192:                        ti->ti_urp = 0;
                   1193:                }
                   1194:        }
                   1195: 
                   1196:        /*
                   1197:         * If new data are received on a connection after the
                   1198:         * user processes are gone, then RST the other end.
                   1199:         */
                   1200:        if ((so->so_state & SS_NOFDREF) &&
                   1201:            tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) {
                   1202:                tp = tcp_close(tp);
                   1203:                tcpstat.tcps_rcvafterclose++;
                   1204:                goto dropwithreset;
                   1205:        }
                   1206: 
                   1207:        /*
                   1208:         * If segment ends after window, drop trailing data
                   1209:         * (and PUSH and FIN); if nothing left, just ACK.
                   1210:         */
                   1211:        todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd);
                   1212:        if (todrop > 0) {
                   1213:                tcpstat.tcps_rcvpackafterwin++;
                   1214:                if (todrop >= ti->ti_len) {
                   1215:                        tcpstat.tcps_rcvbyteafterwin += ti->ti_len;
                   1216:                        /*
                   1217:                         * If a new connection request is received
                   1218:                         * while in TIME_WAIT, drop the old connection
                   1219:                         * and start over if the sequence numbers
                   1220:                         * are above the previous ones.
                   1221:                         */
                   1222:                        if (tiflags & TH_SYN &&
                   1223:                            tp->t_state == TCPS_TIME_WAIT &&
                   1224:                            SEQ_GT(ti->ti_seq, tp->rcv_nxt)) {
                   1225:                                iss = tp->rcv_nxt + TCP_ISSINCR;
                   1226:                                tp = tcp_close(tp);
                   1227:                                goto findpcb;
                   1228:                        }
                   1229:                        /*
                   1230:                         * If window is closed can only take segments at
                   1231:                         * window edge, and have to drop data and PUSH from
                   1232:                         * incoming segments.  Continue processing, but
                   1233:                         * remember to ack.  Otherwise, drop segment
                   1234:                         * and ack.
                   1235:                         */
                   1236:                        if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) {
                   1237:                                tp->t_flags |= TF_ACKNOW;
                   1238:                                tcpstat.tcps_rcvwinprobe++;
                   1239:                        } else
                   1240:                                goto dropafterack;
                   1241:                } else
                   1242:                        tcpstat.tcps_rcvbyteafterwin += todrop;
                   1243:                m_adj(m, -todrop);
                   1244:                ti->ti_len -= todrop;
                   1245:                tiflags &= ~(TH_PUSH|TH_FIN);
                   1246:        }
                   1247: 
                   1248:        /*
                   1249:         * If last ACK falls within this segment's sequence numbers,
                   1250:         * record its timestamp.
                   1251:         * NOTE that the test is modified according to the latest
                   1252:         * proposal of the [email protected] list (Braden 1993/04/26).
                   1253:         */
                   1254:        if ((to.to_flag & TOF_TS) != 0 &&
                   1255:            SEQ_LEQ(ti->ti_seq, tp->last_ack_sent)) {
                   1256:                tp->ts_recent_age = tcp_now;
                   1257:                tp->ts_recent = to.to_tsval;
                   1258:        }
                   1259: 
                   1260:        /*
                   1261:         * If a SYN is in the window, then this is an
                   1262:         * error and we send an RST and drop the connection.
                   1263:         */
                   1264:        if (tiflags & TH_SYN) {
                   1265:                tp = tcp_drop(tp, ECONNRESET);
                   1266:                postevent(so, 0, EV_RESET);
                   1267:                goto dropwithreset;
                   1268:        }
                   1269: 
                   1270:        /*
                   1271:         * If the ACK bit is off:  if in SYN-RECEIVED state or SENDSYN
                   1272:         * flag is on (half-synchronized state), then queue data for
                   1273:         * later processing; else drop segment and return.
                   1274:         */
                   1275:        if ((tiflags & TH_ACK) == 0) {
                   1276:                if (tp->t_state == TCPS_SYN_RECEIVED ||
                   1277:                    (tp->t_flags & TF_NEEDSYN))
                   1278:                        goto step6;
                   1279:                else
                   1280:                        goto drop;
                   1281:        }
                   1282: 
                   1283:        /*
                   1284:         * Ack processing.
                   1285:         */
                   1286:        switch (tp->t_state) {
                   1287: 
                   1288:        /*
                   1289:         * In SYN_RECEIVED state, the ack ACKs our SYN, so enter
                   1290:         * ESTABLISHED state and continue processing.
                   1291:         * The ACK was checked above.
                   1292:         */
                   1293:        case TCPS_SYN_RECEIVED:
                   1294: 
                   1295:                tcpstat.tcps_connects++;
                   1296:                soisconnected(so);
                   1297:                /* Do window scaling? */
                   1298:                if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
                   1299:                        (TF_RCVD_SCALE|TF_REQ_SCALE)) {
                   1300:                        tp->snd_scale = tp->requested_s_scale;
                   1301:                        tp->rcv_scale = tp->request_r_scale;
                   1302:                }
                   1303:                /*
                   1304:                 * Upon successful completion of 3-way handshake,
                   1305:                 * update cache.CC if it was undefined, pass any queued
                   1306:                 * data to the user, and advance state appropriately.
                   1307:                 */
                   1308:                if ((taop = tcp_gettaocache(inp)) != NULL &&
                   1309:                    taop->tao_cc == 0)
                   1310:                        taop->tao_cc = tp->cc_recv;
                   1311: 
                   1312:                /*
                   1313:                 * Make transitions:
                   1314:                 *      SYN-RECEIVED  -> ESTABLISHED
                   1315:                 *      SYN-RECEIVED* -> FIN-WAIT-1
                   1316:                 */
                   1317:                if (tp->t_flags & TF_NEEDFIN) {
                   1318:                        tp->t_state = TCPS_FIN_WAIT_1;
                   1319:                        tp->t_flags &= ~TF_NEEDFIN;
                   1320:                } else {
                   1321:                        tp->t_state = TCPS_ESTABLISHED;
                   1322:                        tp->t_timer[TCPT_KEEP] = tcp_keepidle;
                   1323:                }
                   1324:                /*
                   1325:                 * If segment contains data or ACK, will call tcp_reass()
                   1326:                 * later; if not, do so now to pass queued data to user.
                   1327:                 */
                   1328:                if (ti->ti_len == 0 && (tiflags & TH_FIN) == 0)
                   1329:                        (void) tcp_reass(tp, (struct tcpiphdr *)0,
                   1330:                            (struct mbuf *)0);
                   1331:                tp->snd_wl1 = ti->ti_seq - 1;
                   1332:                /* fall into ... */
                   1333: 
                   1334:        /*
                   1335:         * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
                   1336:         * ACKs.  If the ack is in the range
                   1337:         *      tp->snd_una < ti->ti_ack <= tp->snd_max
                   1338:         * then advance tp->snd_una to ti->ti_ack and drop
                   1339:         * data from the retransmission queue.  If this ACK reflects
                   1340:         * more up to date window information we update our window information.
                   1341:         */
                   1342:        case TCPS_ESTABLISHED:
                   1343:        case TCPS_FIN_WAIT_1:
                   1344:        case TCPS_FIN_WAIT_2:
                   1345:        case TCPS_CLOSE_WAIT:
                   1346:        case TCPS_CLOSING:
                   1347:        case TCPS_LAST_ACK:
                   1348:        case TCPS_TIME_WAIT:
                   1349: 
                   1350:                if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) {
                   1351:                        if (ti->ti_len == 0 && tiwin == tp->snd_wnd) {
                   1352:                                tcpstat.tcps_rcvdupack++;
                   1353:                                /*
                   1354:                                 * If we have outstanding data (other than
                   1355:                                 * a window probe), this is a completely
                   1356:                                 * duplicate ack (ie, window info didn't
                   1357:                                 * change), the ack is the biggest we've
                   1358:                                 * seen and we've seen exactly our rexmt
                   1359:                                 * threshhold of them, assume a packet
                   1360:                                 * has been dropped and retransmit it.
                   1361:                                 * Kludge snd_nxt & the congestion
                   1362:                                 * window so we send only this one
                   1363:                                 * packet.
                   1364:                                 *
                   1365:                                 * We know we're losing at the current
                   1366:                                 * window size so do congestion avoidance
                   1367:                                 * (set ssthresh to half the current window
                   1368:                                 * and pull our congestion window back to
                   1369:                                 * the new ssthresh).
                   1370:                                 *
                   1371:                                 * Dup acks mean that packets have left the
                   1372:                                 * network (they're now cached at the receiver)
                   1373:                                 * so bump cwnd by the amount in the receiver
                   1374:                                 * to keep a constant cwnd packets in the
                   1375:                                 * network.
                   1376:                                 */
                   1377:                                if (tp->t_timer[TCPT_REXMT] == 0 ||
                   1378:                                    ti->ti_ack != tp->snd_una)
                   1379:                                        tp->t_dupacks = 0;
                   1380:                                else if (++tp->t_dupacks == tcprexmtthresh) {
                   1381:                                        tcp_seq onxt = tp->snd_nxt;
                   1382:                                        u_int win =
                   1383:                                            min(tp->snd_wnd, tp->snd_cwnd) / 2 /
                   1384:                                                tp->t_maxseg;
                   1385: 
                   1386:                                        if (win < 2)
                   1387:                                                win = 2;
                   1388:                                        tp->snd_ssthresh = win * tp->t_maxseg;
                   1389:                                        tp->t_timer[TCPT_REXMT] = 0;
                   1390:                                        tp->t_rtt = 0;
                   1391:                                        tp->snd_nxt = ti->ti_ack;
                   1392:                                        tp->snd_cwnd = tp->t_maxseg;
                   1393:                                        (void) tcp_output(tp);
                   1394:                                        tp->snd_cwnd = tp->snd_ssthresh +
                   1395:                                               tp->t_maxseg * tp->t_dupacks;
                   1396:                                        if (SEQ_GT(onxt, tp->snd_nxt))
                   1397:                                                tp->snd_nxt = onxt;
                   1398:                                        goto drop;
                   1399:                                } else if (tp->t_dupacks > tcprexmtthresh) {
                   1400:                                        tp->snd_cwnd += tp->t_maxseg;
                   1401:                                        (void) tcp_output(tp);
                   1402:                                        goto drop;
                   1403:                                }
                   1404:                        } else
                   1405:                                tp->t_dupacks = 0;
                   1406:                        break;
                   1407:                }
                   1408:                /*
                   1409:                 * If the congestion window was inflated to account
                   1410:                 * for the other side's cached packets, retract it.
                   1411:                 */
                   1412:                if (tp->t_dupacks >= tcprexmtthresh &&
                   1413:                    tp->snd_cwnd > tp->snd_ssthresh)
                   1414:                        tp->snd_cwnd = tp->snd_ssthresh;
                   1415:                tp->t_dupacks = 0;
                   1416:                if (SEQ_GT(ti->ti_ack, tp->snd_max)) {
                   1417:                        tcpstat.tcps_rcvacktoomuch++;
                   1418:                        goto dropafterack;
                   1419:                }
                   1420:                /*
                   1421:                 *  If we reach this point, ACK is not a duplicate,
                   1422:                 *     i.e., it ACKs something we sent.
                   1423:                 */
                   1424:                if (tp->t_flags & TF_NEEDSYN) {
                   1425:                        /*
                   1426:                         * T/TCP: Connection was half-synchronized, and our
                   1427:                         * SYN has been ACK'd (so connection is now fully
                   1428:                         * synchronized).  Go to non-starred state,
                   1429:                         * increment snd_una for ACK of SYN, and check if
                   1430:                         * we can do window scaling.
                   1431:                         */
                   1432:                        tp->t_flags &= ~TF_NEEDSYN;
                   1433:                        tp->snd_una++;
                   1434:                        /* Do window scaling? */
                   1435:                        if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
                   1436:                                (TF_RCVD_SCALE|TF_REQ_SCALE)) {
                   1437:                                tp->snd_scale = tp->requested_s_scale;
                   1438:                                tp->rcv_scale = tp->request_r_scale;
                   1439:                        }
                   1440:                }
                   1441: 
                   1442: process_ACK:
                   1443:                acked = ti->ti_ack - tp->snd_una;
                   1444:                tcpstat.tcps_rcvackpack++;
                   1445:                tcpstat.tcps_rcvackbyte += acked;
                   1446: 
                   1447:                /*
                   1448:                 * If we have a timestamp reply, update smoothed
                   1449:                 * round trip time.  If no timestamp is present but
                   1450:                 * transmit timer is running and timed sequence
                   1451:                 * number was acked, update smoothed round trip time.
                   1452:                 * Since we now have an rtt measurement, cancel the
                   1453:                 * timer backoff (cf., Phil Karn's retransmit alg.).
                   1454:                 * Recompute the initial retransmit timer.
                   1455:                 */
                   1456:                if (to.to_flag & TOF_TS)
                   1457:                        tcp_xmit_timer(tp, tcp_now - to.to_tsecr + 1);
                   1458:                else if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))
                   1459:                        tcp_xmit_timer(tp,tp->t_rtt);
                   1460: 
                   1461:                /*
                   1462:                 * If all outstanding data is acked, stop retransmit
                   1463:                 * timer and remember to restart (more output or persist).
                   1464:                 * If there is more data to be acked, restart retransmit
                   1465:                 * timer, using current (possibly backed-off) value.
                   1466:                 */
                   1467:                if (ti->ti_ack == tp->snd_max) {
                   1468:                        tp->t_timer[TCPT_REXMT] = 0;
                   1469:                        needoutput = 1;
                   1470:                } else if (tp->t_timer[TCPT_PERSIST] == 0)
                   1471:                        tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
                   1472: 
                   1473:                /*
                   1474:                 * If no data (only SYN) was ACK'd,
                   1475:                 *    skip rest of ACK processing.
                   1476:                 */
                   1477:                if (acked == 0)
                   1478:                        goto step6;
                   1479: 
                   1480:                /*
                   1481:                 * When new data is acked, open the congestion window.
                   1482:                 * If the window gives us less than ssthresh packets
                   1483:                 * in flight, open exponentially (maxseg per packet).
                   1484:                 * Otherwise open linearly: maxseg per window
                   1485:                 * (maxseg^2 / cwnd per packet).
                   1486:                 */
                   1487:                {
                   1488:                register u_int cw = tp->snd_cwnd;
                   1489:                register u_int incr = tp->t_maxseg;
                   1490: 
                   1491:                if (cw > tp->snd_ssthresh)
                   1492:                        incr = incr * incr / cw;
                   1493:                tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale);
                   1494:                }
                   1495:                if (acked > so->so_snd.sb_cc) {
                   1496:                        tp->snd_wnd -= so->so_snd.sb_cc;
                   1497:                        sbdrop(&so->so_snd, (int)so->so_snd.sb_cc);
                   1498:                        ourfinisacked = 1;
                   1499:                } else {
                   1500:                        sbdrop(&so->so_snd, acked);
                   1501:                        tp->snd_wnd -= acked;
                   1502:                        ourfinisacked = 0;
                   1503:                }
                   1504:                sowwakeup(so);
                   1505:                tp->snd_una = ti->ti_ack;
                   1506:                if (SEQ_LT(tp->snd_nxt, tp->snd_una))
                   1507:                        tp->snd_nxt = tp->snd_una;
                   1508: 
                   1509:                switch (tp->t_state) {
                   1510: 
                   1511:                /*
                   1512:                 * In FIN_WAIT_1 STATE in addition to the processing
                   1513:                 * for the ESTABLISHED state if our FIN is now acknowledged
                   1514:                 * then enter FIN_WAIT_2.
                   1515:                 */
                   1516:                case TCPS_FIN_WAIT_1:
                   1517:                        if (ourfinisacked) {
                   1518:                                /*
                   1519:                                 * If we can't receive any more
                   1520:                                 * data, then closing user can proceed.
                   1521:                                 * Starting the timer is contrary to the
                   1522:                                 * specification, but if we don't get a FIN
                   1523:                                 * we'll hang forever.
                   1524:                                 */
                   1525:                                if (so->so_state & SS_CANTRCVMORE) {
                   1526:                                        soisdisconnected(so);
                   1527:                                        tp->t_timer[TCPT_2MSL] = tcp_maxidle;
                   1528:                                }
                   1529:                                tp->t_state = TCPS_FIN_WAIT_2;
                   1530:                        }
                   1531:                        break;
                   1532: 
                   1533:                /*
                   1534:                 * In CLOSING STATE in addition to the processing for
                   1535:                 * the ESTABLISHED state if the ACK acknowledges our FIN
                   1536:                 * then enter the TIME-WAIT state, otherwise ignore
                   1537:                 * the segment.
                   1538:                 */
                   1539:                case TCPS_CLOSING:
                   1540:                        if (ourfinisacked) {
                   1541:                                tp->t_state = TCPS_TIME_WAIT;
                   1542:                                tcp_canceltimers(tp);
                   1543:                                /* Shorten TIME_WAIT [RFC-1644, p.28] */
                   1544:                                if (tp->cc_recv != 0 &&
                   1545:                                    tp->t_duration < TCPTV_MSL)
                   1546:                                        tp->t_timer[TCPT_2MSL] =
                   1547:                                            tp->t_rxtcur * TCPTV_TWTRUNC;
                   1548:                                else
                   1549:                                        tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
                   1550:                                soisdisconnected(so);
                   1551:                        }
                   1552:                        break;
                   1553: 
                   1554:                /*
                   1555:                 * In LAST_ACK, we may still be waiting for data to drain
                   1556:                 * and/or to be acked, as well as for the ack of our FIN.
                   1557:                 * If our FIN is now acknowledged, delete the TCB,
                   1558:                 * enter the closed state and return.
                   1559:                 */
                   1560:                case TCPS_LAST_ACK:
                   1561:                        if (ourfinisacked) {
                   1562:                                tp = tcp_close(tp);
                   1563:                                goto drop;
                   1564:                        }
                   1565:                        break;
                   1566: 
                   1567:                /*
                   1568:                 * In TIME_WAIT state the only thing that should arrive
                   1569:                 * is a retransmission of the remote FIN.  Acknowledge
                   1570:                 * it and restart the finack timer.
                   1571:                 */
                   1572:                case TCPS_TIME_WAIT:
                   1573:                        tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
                   1574:                        goto dropafterack;
                   1575:                }
                   1576:        }
                   1577: 
                   1578: step6:
                   1579:        /*
                   1580:         * Update window information.
                   1581:         * Don't look at window if no ACK: TAC's send garbage on first SYN.
                   1582:         */
                   1583:        if ((tiflags & TH_ACK) &&
                   1584:            (SEQ_LT(tp->snd_wl1, ti->ti_seq) ||
                   1585:            (tp->snd_wl1 == ti->ti_seq && (SEQ_LT(tp->snd_wl2, ti->ti_ack) ||
                   1586:             (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) {
                   1587:                /* keep track of pure window updates */
                   1588:                if (ti->ti_len == 0 &&
                   1589:                    tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd)
                   1590:                        tcpstat.tcps_rcvwinupd++;
                   1591:                tp->snd_wnd = tiwin;
                   1592:                tp->snd_wl1 = ti->ti_seq;
                   1593:                tp->snd_wl2 = ti->ti_ack;
                   1594:                if (tp->snd_wnd > tp->max_sndwnd)
                   1595:                        tp->max_sndwnd = tp->snd_wnd;
                   1596:                needoutput = 1;
                   1597:        }
                   1598: 
                   1599:        /*
                   1600:         * Process segments with URG.
                   1601:         */
                   1602:        if ((tiflags & TH_URG) && ti->ti_urp &&
                   1603:            TCPS_HAVERCVDFIN(tp->t_state) == 0) {
                   1604:                /*
                   1605:                 * This is a kludge, but if we receive and accept
                   1606:                 * random urgent pointers, we'll crash in
                   1607:                 * soreceive.  It's hard to imagine someone
                   1608:                 * actually wanting to send this much urgent data.
                   1609:                 */
                   1610:                if (ti->ti_urp + so->so_rcv.sb_cc > sb_max) {
                   1611:                        ti->ti_urp = 0;                 /* XXX */
                   1612:                        tiflags &= ~TH_URG;             /* XXX */
                   1613:                        goto dodata;                    /* XXX */
                   1614:                }
                   1615:                /*
                   1616:                 * If this segment advances the known urgent pointer,
                   1617:                 * then mark the data stream.  This should not happen
                   1618:                 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
                   1619:                 * a FIN has been received from the remote side.
                   1620:                 * In these states we ignore the URG.
                   1621:                 *
                   1622:                 * According to RFC961 (Assigned Protocols),
                   1623:                 * the urgent pointer points to the last octet
                   1624:                 * of urgent data.  We continue, however,
                   1625:                 * to consider it to indicate the first octet
                   1626:                 * of data past the urgent section as the original
                   1627:                 * spec states (in one of two places).
                   1628:                 */
                   1629:                if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) {
                   1630:                        tp->rcv_up = ti->ti_seq + ti->ti_urp;
                   1631:                        so->so_oobmark = so->so_rcv.sb_cc +
                   1632:                            (tp->rcv_up - tp->rcv_nxt) - 1;
                   1633:                        if (so->so_oobmark == 0) {
                   1634:                                so->so_state |= SS_RCVATMARK;
                   1635:                                postevent(so, 0, EV_OOB);
                   1636:                        }
                   1637:                        sohasoutofband(so);
                   1638:                        tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
                   1639:                }
                   1640:                /*
                   1641:                 * Remove out of band data so doesn't get presented to user.
                   1642:                 * This can happen independent of advancing the URG pointer,
                   1643:                 * but if two URG's are pending at once, some out-of-band
                   1644:                 * data may creep in... ick.
                   1645:                 */
                   1646:                if (ti->ti_urp <= (u_long)ti->ti_len
                   1647: #if SO_OOBINLINE
                   1648:                     && (so->so_options & SO_OOBINLINE) == 0
                   1649: #endif
                   1650:                     )
                   1651:                        tcp_pulloutofband(so, ti, m);
                   1652:        } else
                   1653:                /*
                   1654:                 * If no out of band data is expected,
                   1655:                 * pull receive urgent pointer along
                   1656:                 * with the receive window.
                   1657:                 */
                   1658:                if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
                   1659:                        tp->rcv_up = tp->rcv_nxt;
                   1660: dodata:                                                        /* XXX */
                   1661: 
                   1662:        /*
                   1663:         * Process the segment text, merging it into the TCP sequencing queue,
                   1664:         * and arranging for acknowledgment of receipt if necessary.
                   1665:         * This process logically involves adjusting tp->rcv_wnd as data
                   1666:         * is presented to the user (this happens in tcp_usrreq.c,
                   1667:         * case PRU_RCVD).  If a FIN has already been received on this
                   1668:         * connection then we just ignore the text.
                   1669:         */
                   1670:        if ((ti->ti_len || (tiflags&TH_FIN)) &&
                   1671:            TCPS_HAVERCVDFIN(tp->t_state) == 0) {
                   1672:                TCP_REASS(tp, ti, m, so, tiflags);
                   1673:                /*
                   1674:                 * Note the amount of data that peer has sent into
                   1675:                 * our window, in order to estimate the sender's
                   1676:                 * buffer size.
                   1677:                 */
                   1678:                len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
                   1679:        } else {
                   1680:                m_freem(m);
                   1681:                tiflags &= ~TH_FIN;
                   1682:        }
                   1683: 
                   1684:        /*
                   1685:         * If FIN is received ACK the FIN and let the user know
                   1686:         * that the connection is closing.
                   1687:         */
                   1688:        if (tiflags & TH_FIN) {
                   1689:                if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
                   1690:                        socantrcvmore(so);
                   1691:                        postevent(so, 0, EV_FIN);
                   1692:                        /*
                   1693:                         *  If connection is half-synchronized
                   1694:                         *  (ie NEEDSYN flag on) then delay ACK,
                   1695:                         *  so it may be piggybacked when SYN is sent.
                   1696:                         *  Otherwise, since we received a FIN then no
                   1697:                         *  more input can be expected, send ACK now.
                   1698:                         */
                   1699:                        if (tcp_delack_enabled && (tp->t_flags & TF_NEEDSYN))
                   1700:                                tp->t_flags |= TF_DELACK;
                   1701:                        else
                   1702:                                tp->t_flags |= TF_ACKNOW;
                   1703:                        tp->rcv_nxt++;
                   1704:                }
                   1705:                switch (tp->t_state) {
                   1706: 
                   1707:                /*
                   1708:                 * In SYN_RECEIVED and ESTABLISHED STATES
                   1709:                 * enter the CLOSE_WAIT state.
                   1710:                 */
                   1711:                case TCPS_SYN_RECEIVED:
                   1712:                case TCPS_ESTABLISHED:
                   1713:                        tp->t_state = TCPS_CLOSE_WAIT;
                   1714:                        break;
                   1715: 
                   1716:                /*
                   1717:                 * If still in FIN_WAIT_1 STATE FIN has not been acked so
                   1718:                 * enter the CLOSING state.
                   1719:                 */
                   1720:                case TCPS_FIN_WAIT_1:
                   1721:                        tp->t_state = TCPS_CLOSING;
                   1722:                        break;
                   1723: 
                   1724:                /*
                   1725:                 * In FIN_WAIT_2 state enter the TIME_WAIT state,
                   1726:                 * starting the time-wait timer, turning off the other
                   1727:                 * standard timers.
                   1728:                 */
                   1729:                case TCPS_FIN_WAIT_2:
                   1730:                        tp->t_state = TCPS_TIME_WAIT;
                   1731:                        tcp_canceltimers(tp);
                   1732:                        /* Shorten TIME_WAIT [RFC-1644, p.28] */
                   1733:                        if (tp->cc_recv != 0 &&
                   1734:                            tp->t_duration < TCPTV_MSL) {
                   1735:                                tp->t_timer[TCPT_2MSL] =
                   1736:                                    tp->t_rxtcur * TCPTV_TWTRUNC;
                   1737:                                /* For transaction client, force ACK now. */
                   1738:                                tp->t_flags |= TF_ACKNOW;
                   1739:                        }
                   1740:                        else
                   1741:                                tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
                   1742:                        soisdisconnected(so);
                   1743:                        break;
                   1744: 
                   1745:                /*
                   1746:                 * In TIME_WAIT state restart the 2 MSL time_wait timer.
                   1747:                 */
                   1748:                case TCPS_TIME_WAIT:
                   1749:                        tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
                   1750:                        break;
                   1751:                }
                   1752:        }
                   1753: #if TCPDEBUG
                   1754:        if (so->so_options & SO_DEBUG)
                   1755:                tcp_trace(TA_INPUT, ostate, tp, &tcp_saveti, 0);
                   1756: #endif
                   1757: 
                   1758:        /*
                   1759:         * Return any desired output.
                   1760:         */
                   1761:        if (needoutput || (tp->t_flags & TF_ACKNOW))
                   1762:                (void) tcp_output(tp);
                   1763:        return;
                   1764: 
                   1765: dropafterack:
                   1766:        /*
                   1767:         * Generate an ACK dropping incoming segment if it occupies
                   1768:         * sequence space, where the ACK reflects our state.
                   1769:         *
                   1770:         * We can now skip the test for the RST flag since all
                   1771:         * paths to this code happen after packets containing
                   1772:         * RST have been dropped.
                   1773:         *
                   1774:         * In the SYN-RECEIVED state, don't send an ACK unless the
                   1775:         * segment we received passes the SYN-RECEIVED ACK test.
                   1776:         * If it fails send a RST.  This breaks the loop in the
                   1777:         * "LAND" DoS attack, and also prevents an ACK storm
                   1778:         * between two listening ports that have been sent forged
                   1779:         * SYN segments, each with the source address of the other.
                   1780:         */
                   1781:        if (tp->t_state == TCPS_SYN_RECEIVED && (tiflags & TH_ACK) &&
                   1782:            (SEQ_GT(tp->snd_una, ti->ti_ack) ||
                   1783:             SEQ_GT(ti->ti_ack, tp->snd_max)) )
                   1784:                goto dropwithreset;
                   1785: #if TCPDEBUG
                   1786:        if (so->so_options & SO_DEBUG)
                   1787:                tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0);
                   1788: #endif
                   1789:        m_freem(m);
                   1790:        tp->t_flags |= TF_ACKNOW;
                   1791:        (void) tcp_output(tp);
                   1792:        return;
                   1793: 
                   1794: dropwithreset:
                   1795:        /*
                   1796:         * Generate a RST, dropping incoming segment.
                   1797:         * Make ACK acceptable to originator of segment.
                   1798:         * Don't bother to respond if destination was broadcast/multicast.
                   1799:         */
                   1800:        if ((tiflags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST) ||
                   1801:            IN_MULTICAST(ntohl(ti->ti_dst.s_addr)))
                   1802:                goto drop;
                   1803: #if TCPDEBUG
                   1804:        if (tp == 0 || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
                   1805:                tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0);
                   1806: #endif
                   1807:        if (tiflags & TH_ACK)
                   1808:                tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST);
                   1809:        else {
                   1810:                if (tiflags & TH_SYN)
                   1811:                        ti->ti_len++;
                   1812:                tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0,
                   1813:                    TH_RST|TH_ACK);
                   1814:        }
                   1815:        /* destroy temporarily created socket */
                   1816:        if (dropsocket)
                   1817:                (void) soabort(so);
                   1818:        return;
                   1819: 
                   1820: drop:
                   1821:        /*
                   1822:         * Drop space held by incoming segment and return.
                   1823:         */
                   1824: #if TCPDEBUG
                   1825:        if (tp == 0 || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
                   1826:                tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0);
                   1827: #endif
                   1828:        m_freem(m);
                   1829:        /* destroy temporarily created socket */
                   1830:        if (dropsocket)
                   1831:                (void) soabort(so);
                   1832:        return;
                   1833: }
                   1834: 
                   1835: static void
                   1836: tcp_dooptions(tp, cp, cnt, ti, to)
                   1837:        struct tcpcb *tp;
                   1838:        u_char *cp;
                   1839:        int cnt;
                   1840:        struct tcpiphdr *ti;
                   1841:        struct tcpopt *to;
                   1842: {
                   1843:        u_short mss = 0;
                   1844:        int opt, optlen;
                   1845: 
                   1846:        for (; cnt > 0; cnt -= optlen, cp += optlen) {
                   1847:                opt = cp[0];
                   1848:                if (opt == TCPOPT_EOL)
                   1849:                        break;
                   1850:                if (opt == TCPOPT_NOP)
                   1851:                        optlen = 1;
                   1852:                else {
                   1853:                        optlen = cp[1];
                   1854:                        if (optlen <= 0)
                   1855:                                break;
                   1856:                }
                   1857:                switch (opt) {
                   1858: 
                   1859:                default:
                   1860:                        continue;
                   1861: 
                   1862:                case TCPOPT_MAXSEG:
                   1863:                        if (optlen != TCPOLEN_MAXSEG)
                   1864:                                continue;
                   1865:                        if (!(ti->ti_flags & TH_SYN))
                   1866:                                continue;
                   1867:                        bcopy((char *) cp + 2, (char *) &mss, sizeof(mss));
                   1868:                        NTOHS(mss);
                   1869:                        break;
                   1870: 
                   1871:                case TCPOPT_WINDOW:
                   1872:                        if (optlen != TCPOLEN_WINDOW)
                   1873:                                continue;
                   1874:                        if (!(ti->ti_flags & TH_SYN))
                   1875:                                continue;
                   1876:                        tp->t_flags |= TF_RCVD_SCALE;
                   1877:                        tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
                   1878:                        break;
                   1879: 
                   1880:                case TCPOPT_TIMESTAMP:
                   1881:                        if (optlen != TCPOLEN_TIMESTAMP)
                   1882:                                continue;
                   1883:                        to->to_flag |= TOF_TS;
                   1884:                        bcopy((char *)cp + 2,
                   1885:                            (char *)&to->to_tsval, sizeof(to->to_tsval));
                   1886:                        NTOHL(to->to_tsval);
                   1887:                        bcopy((char *)cp + 6,
                   1888:                            (char *)&to->to_tsecr, sizeof(to->to_tsecr));
                   1889:                        NTOHL(to->to_tsecr);
                   1890: 
                   1891:                        /*
                   1892:                         * A timestamp received in a SYN makes
                   1893:                         * it ok to send timestamp requests and replies.
                   1894:                         */
                   1895:                        if (ti->ti_flags & TH_SYN) {
                   1896:                                tp->t_flags |= TF_RCVD_TSTMP;
                   1897:                                tp->ts_recent = to->to_tsval;
                   1898:                                tp->ts_recent_age = tcp_now;
                   1899:                        }
                   1900:                        break;
                   1901:                case TCPOPT_CC:
                   1902:                        if (optlen != TCPOLEN_CC)
                   1903:                                continue;
                   1904:                        to->to_flag |= TOF_CC;
                   1905:                        bcopy((char *)cp + 2,
                   1906:                            (char *)&to->to_cc, sizeof(to->to_cc));
                   1907:                        NTOHL(to->to_cc);
                   1908:                        /*
                   1909:                         * A CC or CC.new option received in a SYN makes
                   1910:                         * it ok to send CC in subsequent segments.
                   1911:                         */
                   1912:                        if (ti->ti_flags & TH_SYN)
                   1913:                                tp->t_flags |= TF_RCVD_CC;
                   1914:                        break;
                   1915:                case TCPOPT_CCNEW:
                   1916:                        if (optlen != TCPOLEN_CC)
                   1917:                                continue;
                   1918:                        if (!(ti->ti_flags & TH_SYN))
                   1919:                                continue;
                   1920:                        to->to_flag |= TOF_CCNEW;
                   1921:                        bcopy((char *)cp + 2,
                   1922:                            (char *)&to->to_cc, sizeof(to->to_cc));
                   1923:                        NTOHL(to->to_cc);
                   1924:                        /*
                   1925:                         * A CC or CC.new option received in a SYN makes
                   1926:                         * it ok to send CC in subsequent segments.
                   1927:                         */
                   1928:                        tp->t_flags |= TF_RCVD_CC;
                   1929:                        break;
                   1930:                case TCPOPT_CCECHO:
                   1931:                        if (optlen != TCPOLEN_CC)
                   1932:                                continue;
                   1933:                        if (!(ti->ti_flags & TH_SYN))
                   1934:                                continue;
                   1935:                        to->to_flag |= TOF_CCECHO;
                   1936:                        bcopy((char *)cp + 2,
                   1937:                            (char *)&to->to_ccecho, sizeof(to->to_ccecho));
                   1938:                        NTOHL(to->to_ccecho);
                   1939:                        break;
                   1940:                }
                   1941:        }
                   1942:        if (ti->ti_flags & TH_SYN)
                   1943:                tcp_mss(tp, mss);       /* sets t_maxseg */
                   1944: }
                   1945: 
                   1946: /*
                   1947:  * Pull out of band byte out of a segment so
                   1948:  * it doesn't appear in the user's data queue.
                   1949:  * It is still reflected in the segment length for
                   1950:  * sequencing purposes.
                   1951:  */
                   1952: static void
                   1953: tcp_pulloutofband(so, ti, m)
                   1954:        struct socket *so;
                   1955:        struct tcpiphdr *ti;
                   1956:        register struct mbuf *m;
                   1957: {
                   1958:        int cnt = ti->ti_urp - 1;
                   1959: 
                   1960:        while (cnt >= 0) {
                   1961:                if (m->m_len > cnt) {
                   1962:                        char *cp = mtod(m, caddr_t) + cnt;
                   1963:                        struct tcpcb *tp = sototcpcb(so);
                   1964: 
                   1965:                        tp->t_iobc = *cp;
                   1966:                        tp->t_oobflags |= TCPOOB_HAVEDATA;
                   1967:                        bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1));
                   1968:                        m->m_len--;
                   1969:                        return;
                   1970:                }
                   1971:                cnt -= m->m_len;
                   1972:                m = m->m_next;
                   1973:                if (m == 0)
                   1974:                        break;
                   1975:        }
                   1976:        panic("tcp_pulloutofband");
                   1977: }
                   1978: 
                   1979: /*
                   1980:  * Collect new round-trip time estimate
                   1981:  * and update averages and current timeout.
                   1982:  */
                   1983: static void
                   1984: tcp_xmit_timer(tp, rtt)
                   1985:        register struct tcpcb *tp;
                   1986:        short rtt;
                   1987: {
                   1988:        register int delta;
                   1989: 
                   1990:        tcpstat.tcps_rttupdated++;
                   1991:        tp->t_rttupdated++;
                   1992:        if (tp->t_srtt != 0) {
                   1993:                /*
                   1994:                 * srtt is stored as fixed point with 5 bits after the
                   1995:                 * binary point (i.e., scaled by 8).  The following magic
                   1996:                 * is equivalent to the smoothing algorithm in rfc793 with
                   1997:                 * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
                   1998:                 * point).  Adjust rtt to origin 0.
                   1999:                 */
                   2000:                delta = ((rtt - 1) << TCP_DELTA_SHIFT)
                   2001:                        - (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT));
                   2002: 
                   2003:                if ((tp->t_srtt += delta) <= 0)
                   2004:                        tp->t_srtt = 1;
                   2005: 
                   2006:                /*
                   2007:                 * We accumulate a smoothed rtt variance (actually, a
                   2008:                 * smoothed mean difference), then set the retransmit
                   2009:                 * timer to smoothed rtt + 4 times the smoothed variance.
                   2010:                 * rttvar is stored as fixed point with 4 bits after the
                   2011:                 * binary point (scaled by 16).  The following is
                   2012:                 * equivalent to rfc793 smoothing with an alpha of .75
                   2013:                 * (rttvar = rttvar*3/4 + |delta| / 4).  This replaces
                   2014:                 * rfc793's wired-in beta.
                   2015:                 */
                   2016:                if (delta < 0)
                   2017:                        delta = -delta;
                   2018:                delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT);
                   2019:                if ((tp->t_rttvar += delta) <= 0)
                   2020:                        tp->t_rttvar = 1;
                   2021:        } else {
                   2022:                /*
                   2023:                 * No rtt measurement yet - use the unsmoothed rtt.
                   2024:                 * Set the variance to half the rtt (so our first
                   2025:                 * retransmit happens at 3*rtt).
                   2026:                 */
                   2027:                tp->t_srtt = rtt << TCP_RTT_SHIFT;
                   2028:                tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
                   2029:        }
                   2030:        tp->t_rtt = 0;
                   2031:        tp->t_rxtshift = 0;
                   2032: 
                   2033:        /*
                   2034:         * the retransmit should happen at rtt + 4 * rttvar.
                   2035:         * Because of the way we do the smoothing, srtt and rttvar
                   2036:         * will each average +1/2 tick of bias.  When we compute
                   2037:         * the retransmit timer, we want 1/2 tick of rounding and
                   2038:         * 1 extra tick because of +-1/2 tick uncertainty in the
                   2039:         * firing of the timer.  The bias will give us exactly the
                   2040:         * 1.5 tick we need.  But, because the bias is
                   2041:         * statistical, we have to test that we don't drop below
                   2042:         * the minimum feasible timer (which is 2 ticks).
                   2043:         */
                   2044:        TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
                   2045:                      max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX);
                   2046: 
                   2047:        /*
                   2048:         * We received an ack for a packet that wasn't retransmitted;
                   2049:         * it is probably safe to discard any error indications we've
                   2050:         * received recently.  This isn't quite right, but close enough
                   2051:         * for now (a route might have failed after we sent a segment,
                   2052:         * and the return path might not be symmetrical).
                   2053:         */
                   2054:        tp->t_softerror = 0;
                   2055: }
                   2056: 
                   2057: /*
                   2058:  * Determine a reasonable value for maxseg size.
                   2059:  * If the route is known, check route for mtu.
                   2060:  * If none, use an mss that can be handled on the outgoing
                   2061:  * interface without forcing IP to fragment; if bigger than
                   2062:  * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
                   2063:  * to utilize large mbufs.  If no route is found, route has no mtu,
                   2064:  * or the destination isn't local, use a default, hopefully conservative
                   2065:  * size (usually 512 or the default IP max size, but no more than the mtu
                   2066:  * of the interface), as we can't discover anything about intervening
                   2067:  * gateways or networks.  We also initialize the congestion/slow start
                   2068:  * window to be a single segment if the destination isn't local.
                   2069:  * While looking at the routing entry, we also initialize other path-dependent
                   2070:  * parameters from pre-set or cached values in the routing entry.
                   2071:  *
                   2072:  * Also take into account the space needed for options that we
                   2073:  * send regularly.  Make maxseg shorter by that amount to assure
                   2074:  * that we can send maxseg amount of data even when the options
                   2075:  * are present.  Store the upper limit of the length of options plus
                   2076:  * data in maxopd.
                   2077:  *
                   2078:  * NOTE that this routine is only called when we process an incoming
                   2079:  * segment, for outgoing segments only tcp_mssopt is called.
                   2080:  *
                   2081:  * In case of T/TCP, we call this routine during implicit connection
                   2082:  * setup as well (offer = -1), to initialize maxseg from the cached
                   2083:  * MSS of our peer.
                   2084:  */
                   2085: void
                   2086: tcp_mss(tp, offer)
                   2087:        struct tcpcb *tp;
                   2088:        int offer;
                   2089: {
                   2090:        register struct rtentry *rt;
                   2091:        struct ifnet *ifp;
                   2092:        register int rtt, mss;
                   2093:        u_long bufsize;
                   2094:        struct inpcb *inp;
                   2095:        struct socket *so;
                   2096:        struct rmxp_tao *taop;
                   2097:        int origoffer = offer;
                   2098: 
                   2099:        inp = tp->t_inpcb;
                   2100:        if ((rt = tcp_rtlookup(inp)) == NULL) {
                   2101:                tp->t_maxopd = tp->t_maxseg = tcp_mssdflt;
                   2102:                return;
                   2103:        }
                   2104:        ifp = rt->rt_ifp;
                   2105:        so = inp->inp_socket;
                   2106: 
                   2107:        taop = rmx_taop(rt->rt_rmx);
                   2108:        /*
                   2109:         * Offer == -1 means that we didn't receive SYN yet,
                   2110:         * use cached value in that case;
                   2111:         */
                   2112:        if (offer == -1)
                   2113:                offer = taop->tao_mssopt;
                   2114:        /*
                   2115:         * Offer == 0 means that there was no MSS on the SYN segment,
                   2116:         * in this case we use tcp_mssdflt.
                   2117:         */
                   2118:        if (offer == 0)
                   2119:                offer = tcp_mssdflt;
                   2120:        else
                   2121:                /*
                   2122:                 * Sanity check: make sure that maxopd will be large
                   2123:                 * enough to allow some data on segments even is the
                   2124:                 * all the option space is used (40bytes).  Otherwise
                   2125:                 * funny things may happen in tcp_output.
                   2126:                 */
                   2127:                offer = max(offer, 64);
                   2128:        taop->tao_mssopt = offer;
                   2129: 
                   2130:        /*
                   2131:         * While we're here, check if there's an initial rtt
                   2132:         * or rttvar.  Convert from the route-table units
                   2133:         * to scaled multiples of the slow timeout timer.
                   2134:         */
                   2135:        if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
                   2136:                /*
                   2137:                 * XXX the lock bit for RTT indicates that the value
                   2138:                 * is also a minimum value; this is subject to time.
                   2139:                 */
                   2140:                if (rt->rt_rmx.rmx_locks & RTV_RTT)
                   2141:                        tp->t_rttmin = rtt / (RTM_RTTUNIT / PR_SLOWHZ);
                   2142:                tp->t_srtt = rtt / (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
                   2143:                tcpstat.tcps_usedrtt++;
                   2144:                if (rt->rt_rmx.rmx_rttvar) {
                   2145:                        tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
                   2146:                            (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
                   2147:                        tcpstat.tcps_usedrttvar++;
                   2148:                } else {
                   2149:                        /* default variation is +- 1 rtt */
                   2150:                        tp->t_rttvar =
                   2151:                            tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
                   2152:                }
                   2153:                TCPT_RANGESET(tp->t_rxtcur,
                   2154:                    ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
                   2155:                    tp->t_rttmin, TCPTV_REXMTMAX);
                   2156:        }
                   2157:        /*
                   2158:         * if there's an mtu associated with the route, use it
                   2159:         */
                   2160:        if (rt->rt_rmx.rmx_mtu)
                   2161:                mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr);
                   2162:        else
                   2163:        {
                   2164:                mss = ifp->if_mtu - sizeof(struct tcpiphdr);
                   2165:                if (!in_localaddr(inp->inp_faddr))
                   2166:                        mss = min(mss, tcp_mssdflt);
                   2167:        }
                   2168:        mss = min(mss, offer);
                   2169:        /*
                   2170:         * maxopd stores the maximum length of data AND options
                   2171:         * in a segment; maxseg is the amount of data in a normal
                   2172:         * segment.  We need to store this value (maxopd) apart
                   2173:         * from maxseg, because now every segment carries options
                   2174:         * and thus we normally have somewhat less data in segments.
                   2175:         */
                   2176:        tp->t_maxopd = mss;
                   2177: 
                   2178:        /*
                   2179:         * In case of T/TCP, origoffer==-1 indicates, that no segments
                   2180:         * were received yet.  In this case we just guess, otherwise
                   2181:         * we do the same as before T/TCP.
                   2182:         */
                   2183:        if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
                   2184:            (origoffer == -1 ||
                   2185:             (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP))
                   2186:                mss -= TCPOLEN_TSTAMP_APPA;
                   2187:        if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
                   2188:            (origoffer == -1 ||
                   2189:             (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC))
                   2190:                mss -= TCPOLEN_CC_APPA;
                   2191: 
                   2192: #if    (MCLBYTES & (MCLBYTES - 1)) == 0
                   2193:                if (mss > MCLBYTES)
                   2194:                        mss &= ~(MCLBYTES-1);
                   2195: #else
                   2196:                if (mss > MCLBYTES)
                   2197:                        mss = mss / MCLBYTES * MCLBYTES;
                   2198: #endif
                   2199:        /*
                   2200:         * If there's a pipesize, change the socket buffer
                   2201:         * to that size.  Make the socket buffers an integral
                   2202:         * number of mss units; if the mss is larger than
                   2203:         * the socket buffer, decrease the mss.
                   2204:         */
                   2205: #if RTV_SPIPE
                   2206:        if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0)
                   2207: #endif
                   2208:                bufsize = so->so_snd.sb_hiwat;
                   2209:        if (bufsize < mss)
                   2210:                mss = bufsize;
                   2211:        else {
                   2212:                bufsize = roundup(bufsize, mss);
                   2213:                if (bufsize > sb_max)
                   2214:                        bufsize = sb_max;
                   2215:                (void)sbreserve(&so->so_snd, bufsize);
                   2216:        }
                   2217:        tp->t_maxseg = mss;
                   2218: 
                   2219: #if RTV_RPIPE
                   2220:        if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0)
                   2221: #endif
                   2222:                bufsize = so->so_rcv.sb_hiwat;
                   2223:        if (bufsize > mss) {
                   2224:                bufsize = roundup(bufsize, mss);
                   2225:                if (bufsize > sb_max)
                   2226:                        bufsize = sb_max;
                   2227:                (void)sbreserve(&so->so_rcv, bufsize);
                   2228:        }
                   2229:        /*
                   2230:         * Don't force slow-start on local network.
                   2231:         */
                   2232:        if (!in_localaddr(inp->inp_faddr))
                   2233:                tp->snd_cwnd = mss;
                   2234: 
                   2235:        if (rt->rt_rmx.rmx_ssthresh) {
                   2236:                /*
                   2237:                 * There's some sort of gateway or interface
                   2238:                 * buffer limit on the path.  Use this to set
                   2239:                 * the slow start threshhold, but set the
                   2240:                 * threshold to no less than 2*mss.
                   2241:                 */
                   2242:                tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh);
                   2243:                tcpstat.tcps_usedssthresh++;
                   2244:        }
                   2245: }
                   2246: 
                   2247: /*
                   2248:  * Determine the MSS option to send on an outgoing SYN.
                   2249:  */
                   2250: int
                   2251: tcp_mssopt(tp)
                   2252:        struct tcpcb *tp;
                   2253: {
                   2254:        struct rtentry *rt;
                   2255: 
                   2256:        rt = tcp_rtlookup(tp->t_inpcb);
                   2257:        if (rt == NULL)
                   2258:                return tcp_mssdflt;
                   2259: 
                   2260:        return rt->rt_ifp->if_mtu - sizeof(struct tcpiphdr);
                   2261: }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.