Annotation of 43BSDReno/sys/netinet/tcp_input.c, revision 1.1

1.1     ! root        1: /*
        !             2:  * Copyright (c) 1982, 1986, 1988, 1990 Regents of the University of California.
        !             3:  * All rights reserved.
        !             4:  *
        !             5:  * Redistribution is only permitted until one year after the first shipment
        !             6:  * of 4.4BSD by the Regents.  Otherwise, redistribution and use in source and
        !             7:  * binary forms are permitted provided that: (1) source distributions retain
        !             8:  * this entire copyright notice and comment, and (2) distributions including
        !             9:  * binaries display the following acknowledgement:  This product includes
        !            10:  * software developed by the University of California, Berkeley and its
        !            11:  * contributors'' in the documentation or other materials provided with the
        !            12:  * distribution and in all advertising materials mentioning features or use
        !            13:  * of this software.  Neither the name of the University nor the names of
        !            14:  * its contributors may be used to endorse or promote products derived from
        !            15:  * this software without specific prior written permission.
        !            16:  * THIS SOFTWARE IS PROVIDED AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
        !            17:  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
        !            18:  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
        !            19:  *
        !            20:  *     @(#)tcp_input.c 7.25 (Berkeley) 6/30/90
        !            21:  */
        !            22: 
        !            23: #include "param.h"
        !            24: #include "systm.h"
        !            25: #include "malloc.h"
        !            26: #include "mbuf.h"
        !            27: #include "protosw.h"
        !            28: #include "socket.h"
        !            29: #include "socketvar.h"
        !            30: #include "errno.h"
        !            31: 
        !            32: #include "../net/if.h"
        !            33: #include "../net/route.h"
        !            34: 
        !            35: #include "in.h"
        !            36: #include "in_systm.h"
        !            37: #include "ip.h"
        !            38: #include "in_pcb.h"
        !            39: #include "ip_var.h"
        !            40: #include "tcp.h"
        !            41: #include "tcp_fsm.h"
        !            42: #include "tcp_seq.h"
        !            43: #include "tcp_timer.h"
        !            44: #include "tcp_var.h"
        !            45: #include "tcpip.h"
        !            46: #include "tcp_debug.h"
        !            47: 
        !            48: int    tcprexmtthresh = 3;
        !            49: int    tcppredack;     /* XXX debugging: times hdr predict ok for acks */
        !            50: int    tcppreddat;     /* XXX # times header prediction ok for data packets */
        !            51: int    tcppcbcachemiss;
        !            52: struct tcpiphdr tcp_saveti;
        !            53: struct inpcb *tcp_last_inpcb = &tcb;
        !            54: 
        !            55: struct tcpcb *tcp_newtcpcb();
        !            56: 
        !            57: /*
        !            58:  * Insert segment ti into reassembly queue of tcp with
        !            59:  * control block tp.  Return TH_FIN if reassembly now includes
        !            60:  * a segment with FIN.  The macro form does the common case inline
        !            61:  * (segment is the next to be received on an established connection,
        !            62:  * and the queue is empty), avoiding linkage into and removal
        !            63:  * from the queue and repetition of various conversions.
        !            64:  * Set DELACK for segments received in order, but ack immediately
        !            65:  * when segments are out of order (so fast retransmit can work).
        !            66:  */
        !            67: #define        TCP_REASS(tp, ti, m, so, flags) { \
        !            68:        if ((ti)->ti_seq == (tp)->rcv_nxt && \
        !            69:            (tp)->seg_next == (struct tcpiphdr *)(tp) && \
        !            70:            (tp)->t_state == TCPS_ESTABLISHED) { \
        !            71:                tp->t_flags |= TF_DELACK; \
        !            72:                (tp)->rcv_nxt += (ti)->ti_len; \
        !            73:                flags = (ti)->ti_flags & TH_FIN; \
        !            74:                tcpstat.tcps_rcvpack++;\
        !            75:                tcpstat.tcps_rcvbyte += (ti)->ti_len;\
        !            76:                sbappend(&(so)->so_rcv, (m)); \
        !            77:                sorwakeup(so); \
        !            78:        } else { \
        !            79:                (flags) = tcp_reass((tp), (ti), (m)); \
        !            80:                tp->t_flags |= TF_ACKNOW; \
        !            81:        } \
        !            82: }
        !            83: 
        !            84: tcp_reass(tp, ti, m)
        !            85:        register struct tcpcb *tp;
        !            86:        register struct tcpiphdr *ti;
        !            87:        struct mbuf *m;
        !            88: {
        !            89:        register struct tcpiphdr *q;
        !            90:        struct socket *so = tp->t_inpcb->inp_socket;
        !            91:        int flags;
        !            92: 
        !            93:        /*
        !            94:         * Call with ti==0 after become established to
        !            95:         * force pre-ESTABLISHED data up to user socket.
        !            96:         */
        !            97:        if (ti == 0)
        !            98:                goto present;
        !            99: 
        !           100:        /*
        !           101:         * Find a segment which begins after this one does.
        !           102:         */
        !           103:        for (q = tp->seg_next; q != (struct tcpiphdr *)tp;
        !           104:            q = (struct tcpiphdr *)q->ti_next)
        !           105:                if (SEQ_GT(q->ti_seq, ti->ti_seq))
        !           106:                        break;
        !           107: 
        !           108:        /*
        !           109:         * If there is a preceding segment, it may provide some of
        !           110:         * our data already.  If so, drop the data from the incoming
        !           111:         * segment.  If it provides all of our data, drop us.
        !           112:         */
        !           113:        if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) {
        !           114:                register int i;
        !           115:                q = (struct tcpiphdr *)q->ti_prev;
        !           116:                /* conversion to int (in i) handles seq wraparound */
        !           117:                i = q->ti_seq + q->ti_len - ti->ti_seq;
        !           118:                if (i > 0) {
        !           119:                        if (i >= ti->ti_len) {
        !           120:                                tcpstat.tcps_rcvduppack++;
        !           121:                                tcpstat.tcps_rcvdupbyte += ti->ti_len;
        !           122:                                m_freem(m);
        !           123:                                return (0);
        !           124:                        }
        !           125:                        m_adj(m, i);
        !           126:                        ti->ti_len -= i;
        !           127:                        ti->ti_seq += i;
        !           128:                }
        !           129:                q = (struct tcpiphdr *)(q->ti_next);
        !           130:        }
        !           131:        tcpstat.tcps_rcvoopack++;
        !           132:        tcpstat.tcps_rcvoobyte += ti->ti_len;
        !           133:        REASS_MBUF(ti) = m;             /* XXX */
        !           134: 
        !           135:        /*
        !           136:         * While we overlap succeeding segments trim them or,
        !           137:         * if they are completely covered, dequeue them.
        !           138:         */
        !           139:        while (q != (struct tcpiphdr *)tp) {
        !           140:                register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq;
        !           141:                if (i <= 0)
        !           142:                        break;
        !           143:                if (i < q->ti_len) {
        !           144:                        q->ti_seq += i;
        !           145:                        q->ti_len -= i;
        !           146:                        m_adj(REASS_MBUF(q), i);
        !           147:                        break;
        !           148:                }
        !           149:                q = (struct tcpiphdr *)q->ti_next;
        !           150:                m = REASS_MBUF((struct tcpiphdr *)q->ti_prev);
        !           151:                remque(q->ti_prev);
        !           152:                m_freem(m);
        !           153:        }
        !           154: 
        !           155:        /*
        !           156:         * Stick new segment in its place.
        !           157:         */
        !           158:        insque(ti, q->ti_prev);
        !           159: 
        !           160: present:
        !           161:        /*
        !           162:         * Present data to user, advancing rcv_nxt through
        !           163:         * completed sequence space.
        !           164:         */
        !           165:        if (TCPS_HAVERCVDSYN(tp->t_state) == 0)
        !           166:                return (0);
        !           167:        ti = tp->seg_next;
        !           168:        if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt)
        !           169:                return (0);
        !           170:        if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len)
        !           171:                return (0);
        !           172:        do {
        !           173:                tp->rcv_nxt += ti->ti_len;
        !           174:                flags = ti->ti_flags & TH_FIN;
        !           175:                remque(ti);
        !           176:                m = REASS_MBUF(ti);
        !           177:                ti = (struct tcpiphdr *)ti->ti_next;
        !           178:                if (so->so_state & SS_CANTRCVMORE)
        !           179:                        m_freem(m);
        !           180:                else
        !           181:                        sbappend(&so->so_rcv, m);
        !           182:        } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt);
        !           183:        sorwakeup(so);
        !           184:        return (flags);
        !           185: }
        !           186: 
        !           187: /*
        !           188:  * TCP input routine, follows pages 65-76 of the
        !           189:  * protocol specification dated September, 1981 very closely.
        !           190:  */
        !           191: tcp_input(m, iphlen)
        !           192:        register struct mbuf *m;
        !           193:        int iphlen;
        !           194: {
        !           195:        register struct tcpiphdr *ti;
        !           196:        register struct inpcb *inp;
        !           197:        struct mbuf *om = 0;
        !           198:        int len, tlen, off;
        !           199:        register struct tcpcb *tp = 0;
        !           200:        register int tiflags;
        !           201:        struct socket *so;
        !           202:        int todrop, acked, ourfinisacked, needoutput = 0;
        !           203:        short ostate;
        !           204:        struct in_addr laddr;
        !           205:        int dropsocket = 0;
        !           206:        int iss = 0;
        !           207: 
        !           208:        tcpstat.tcps_rcvtotal++;
        !           209:        /*
        !           210:         * Get IP and TCP header together in first mbuf.
        !           211:         * Note: IP leaves IP header in first mbuf.
        !           212:         */
        !           213:        ti = mtod(m, struct tcpiphdr *);
        !           214:        if (iphlen > sizeof (struct ip))
        !           215:                ip_stripoptions(m, (struct mbuf *)0);
        !           216:        if (m->m_len < sizeof (struct tcpiphdr)) {
        !           217:                if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) {
        !           218:                        tcpstat.tcps_rcvshort++;
        !           219:                        return;
        !           220:                }
        !           221:                ti = mtod(m, struct tcpiphdr *);
        !           222:        }
        !           223: 
        !           224:        /*
        !           225:         * Checksum extended TCP header and data.
        !           226:         */
        !           227:        tlen = ((struct ip *)ti)->ip_len;
        !           228:        len = sizeof (struct ip) + tlen;
        !           229:        ti->ti_next = ti->ti_prev = 0;
        !           230:        ti->ti_x1 = 0;
        !           231:        ti->ti_len = (u_short)tlen;
        !           232:        HTONS(ti->ti_len);
        !           233:        if (ti->ti_sum = in_cksum(m, len)) {
        !           234:                tcpstat.tcps_rcvbadsum++;
        !           235:                goto drop;
        !           236:        }
        !           237: 
        !           238:        /*
        !           239:         * Check that TCP offset makes sense,
        !           240:         * pull out TCP options and adjust length.              XXX
        !           241:         */
        !           242:        off = ti->ti_off << 2;
        !           243:        if (off < sizeof (struct tcphdr) || off > tlen) {
        !           244:                tcpstat.tcps_rcvbadoff++;
        !           245:                goto drop;
        !           246:        }
        !           247:        tlen -= off;
        !           248:        ti->ti_len = tlen;
        !           249:        if (off > sizeof (struct tcphdr)) {
        !           250:                if (m->m_len < sizeof(struct ip) + off) {
        !           251:                        if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) {
        !           252:                                tcpstat.tcps_rcvshort++;
        !           253:                                return;
        !           254:                        }
        !           255:                        ti = mtod(m, struct tcpiphdr *);
        !           256:                }
        !           257:                om = m_get(M_DONTWAIT, MT_DATA);
        !           258:                if (om == 0)
        !           259:                        goto drop;
        !           260:                om->m_len = off - sizeof (struct tcphdr);
        !           261:                { caddr_t op = mtod(m, caddr_t) + sizeof (struct tcpiphdr);
        !           262:                  bcopy(op, mtod(om, caddr_t), (unsigned)om->m_len);
        !           263:                  m->m_len -= om->m_len;
        !           264:                  m->m_pkthdr.len -= om->m_len;
        !           265:                  bcopy(op+om->m_len, op,
        !           266:                   (unsigned)(m->m_len-sizeof (struct tcpiphdr)));
        !           267:                }
        !           268:        }
        !           269:        tiflags = ti->ti_flags;
        !           270: 
        !           271:        /*
        !           272:         * Convert TCP protocol specific fields to host format.
        !           273:         */
        !           274:        NTOHL(ti->ti_seq);
        !           275:        NTOHL(ti->ti_ack);
        !           276:        NTOHS(ti->ti_win);
        !           277:        NTOHS(ti->ti_urp);
        !           278: 
        !           279:        /*
        !           280:         * Locate pcb for segment.
        !           281:         */
        !           282: findpcb:
        !           283:        inp = tcp_last_inpcb;
        !           284:        if (inp->inp_lport != ti->ti_dport ||
        !           285:            inp->inp_fport != ti->ti_sport ||
        !           286:            inp->inp_faddr.s_addr != ti->ti_src.s_addr ||
        !           287:            inp->inp_laddr.s_addr != ti->ti_dst.s_addr) {
        !           288:                inp = in_pcblookup(&tcb, ti->ti_src, ti->ti_sport,
        !           289:                    ti->ti_dst, ti->ti_dport, INPLOOKUP_WILDCARD);
        !           290:                if (inp)
        !           291:                        tcp_last_inpcb = inp;
        !           292:                ++tcppcbcachemiss;
        !           293:        }
        !           294: 
        !           295:        /*
        !           296:         * If the state is CLOSED (i.e., TCB does not exist) then
        !           297:         * all data in the incoming segment is discarded.
        !           298:         * If the TCB exists but is in CLOSED state, it is embryonic,
        !           299:         * but should either do a listen or a connect soon.
        !           300:         */
        !           301:        if (inp == 0)
        !           302:                goto dropwithreset;
        !           303:        tp = intotcpcb(inp);
        !           304:        if (tp == 0)
        !           305:                goto dropwithreset;
        !           306:        if (tp->t_state == TCPS_CLOSED)
        !           307:                goto drop;
        !           308:        so = inp->inp_socket;
        !           309:        if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
        !           310:                if (so->so_options & SO_DEBUG) {
        !           311:                        ostate = tp->t_state;
        !           312:                        tcp_saveti = *ti;
        !           313:                }
        !           314:                if (so->so_options & SO_ACCEPTCONN) {
        !           315:                        so = sonewconn(so, 0);
        !           316:                        if (so == 0)
        !           317:                                goto drop;
        !           318:                        /*
        !           319:                         * This is ugly, but ....
        !           320:                         *
        !           321:                         * Mark socket as temporary until we're
        !           322:                         * committed to keeping it.  The code at
        !           323:                         * ``drop'' and ``dropwithreset'' check the
        !           324:                         * flag dropsocket to see if the temporary
        !           325:                         * socket created here should be discarded.
        !           326:                         * We mark the socket as discardable until
        !           327:                         * we're committed to it below in TCPS_LISTEN.
        !           328:                         */
        !           329:                        dropsocket++;
        !           330:                        inp = (struct inpcb *)so->so_pcb;
        !           331:                        inp->inp_laddr = ti->ti_dst;
        !           332:                        inp->inp_lport = ti->ti_dport;
        !           333: #if BSD>=43
        !           334:                        inp->inp_options = ip_srcroute();
        !           335: #endif
        !           336:                        tp = intotcpcb(inp);
        !           337:                        tp->t_state = TCPS_LISTEN;
        !           338:                }
        !           339:        }
        !           340: 
        !           341:        /*
        !           342:         * Segment received on connection.
        !           343:         * Reset idle time and keep-alive timer.
        !           344:         */
        !           345:        tp->t_idle = 0;
        !           346:        tp->t_timer[TCPT_KEEP] = tcp_keepidle;
        !           347: 
        !           348:        /*
        !           349:         * Process options if not in LISTEN state,
        !           350:         * else do it below (after getting remote address).
        !           351:         */
        !           352:        if (om && tp->t_state != TCPS_LISTEN) {
        !           353:                tcp_dooptions(tp, om, ti);
        !           354:                om = 0;
        !           355:        }
        !           356:        /* 
        !           357:         * Header prediction: check for the two common cases
        !           358:         * of a uni-directional data xfer.  If the packet has
        !           359:         * no control flags, is in-sequence, the window didn't
        !           360:         * change and we're not retransmitting, it's a
        !           361:         * candidate.  If the length is zero and the ack moved
        !           362:         * forward, we're the sender side of the xfer.  Just
        !           363:         * free the data acked & wake any higher level process
        !           364:         * that was blocked waiting for space.  If the length
        !           365:         * is non-zero and the ack didn't move, we're the
        !           366:         * receiver side.  If we're getting packets in-order
        !           367:         * (the reassembly queue is empty), add the data to
        !           368:         * the socket buffer and note that we need a delayed ack.
        !           369:         */
        !           370:        if (tp->t_state == TCPS_ESTABLISHED &&
        !           371:            (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
        !           372:            ti->ti_seq == tp->rcv_nxt &&
        !           373:            ti->ti_win && ti->ti_win == tp->snd_wnd &&
        !           374:            tp->snd_nxt == tp->snd_max) {
        !           375:                if (ti->ti_len == 0) {
        !           376:                        if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
        !           377:                            SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
        !           378:                            tp->snd_cwnd >= tp->snd_wnd) {
        !           379:                                /*
        !           380:                                 * this is a pure ack for outstanding data.
        !           381:                                 */
        !           382:                                ++tcppredack;
        !           383:                                if (tp->t_rtt && SEQ_GT(ti->ti_ack,tp->t_rtseq))
        !           384:                                        tcp_xmit_timer(tp);
        !           385:                                acked = ti->ti_ack - tp->snd_una;
        !           386:                                tcpstat.tcps_rcvackpack++;
        !           387:                                tcpstat.tcps_rcvackbyte += acked;
        !           388:                                sbdrop(&so->so_snd, acked);
        !           389:                                tp->snd_una = ti->ti_ack;
        !           390:                                m_freem(m);
        !           391: 
        !           392:                                /*
        !           393:                                 * If all outstanding data are acked, stop
        !           394:                                 * retransmit timer, otherwise restart timer
        !           395:                                 * using current (possibly backed-off) value.
        !           396:                                 * If process is waiting for space,
        !           397:                                 * wakeup/selwakeup/signal.  If data
        !           398:                                 * are ready to send, let tcp_output
        !           399:                                 * decide between more output or persist.
        !           400:                                 */
        !           401:                                if (tp->snd_una == tp->snd_max)
        !           402:                                        tp->t_timer[TCPT_REXMT] = 0;
        !           403:                                else if (tp->t_timer[TCPT_PERSIST] == 0)
        !           404:                                        tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
        !           405: 
        !           406:                                if (so->so_snd.sb_flags & SB_NOTIFY)
        !           407:                                        sowwakeup(so);
        !           408:                                if (so->so_snd.sb_cc)
        !           409:                                        (void) tcp_output(tp);
        !           410:                                return;
        !           411:                        }
        !           412:                } else if (ti->ti_ack == tp->snd_una &&
        !           413:                    tp->seg_next == (struct tcpiphdr *)tp &&
        !           414:                    ti->ti_len <= sbspace(&so->so_rcv)) {
        !           415:                        /*
        !           416:                         * this is a pure, in-sequence data packet
        !           417:                         * with nothing on the reassembly queue and
        !           418:                         * we have enough buffer space to take it.
        !           419:                         */
        !           420:                        ++tcppreddat;
        !           421:                        tp->rcv_nxt += ti->ti_len;
        !           422:                        tcpstat.tcps_rcvpack++;
        !           423:                        tcpstat.tcps_rcvbyte += ti->ti_len;
        !           424:                        /*
        !           425:                         * Drop TCP and IP headers then add data
        !           426:                         * to socket buffer
        !           427:                         */
        !           428:                        m->m_data += sizeof(struct tcpiphdr);
        !           429:                        m->m_len -= sizeof(struct tcpiphdr);
        !           430:                        sbappend(&so->so_rcv, m);
        !           431:                        sorwakeup(so);
        !           432:                        tp->t_flags |= TF_DELACK;
        !           433:                        return;
        !           434:                }
        !           435:        }
        !           436: 
        !           437:        /*
        !           438:         * Drop TCP and IP headers; TCP options were dropped above.
        !           439:         */
        !           440:        m->m_data += sizeof(struct tcpiphdr);
        !           441:        m->m_len -= sizeof(struct tcpiphdr);
        !           442: 
        !           443:        /*
        !           444:         * Calculate amount of space in receive window,
        !           445:         * and then do TCP input processing.
        !           446:         * Receive window is amount of space in rcv queue,
        !           447:         * but not less than advertised window.
        !           448:         */
        !           449:        { int win;
        !           450: 
        !           451:        win = sbspace(&so->so_rcv);
        !           452:        if (win < 0)
        !           453:                win = 0;
        !           454:        tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt));
        !           455:        }
        !           456: 
        !           457:        switch (tp->t_state) {
        !           458: 
        !           459:        /*
        !           460:         * If the state is LISTEN then ignore segment if it contains an RST.
        !           461:         * If the segment contains an ACK then it is bad and send a RST.
        !           462:         * If it does not contain a SYN then it is not interesting; drop it.
        !           463:         * Don't bother responding if the destination was a broadcast.
        !           464:         * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
        !           465:         * tp->iss, and send a segment:
        !           466:         *     <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
        !           467:         * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
        !           468:         * Fill in remote peer address fields if not previously specified.
        !           469:         * Enter SYN_RECEIVED state, and process any other fields of this
        !           470:         * segment in this state.
        !           471:         */
        !           472:        case TCPS_LISTEN: {
        !           473:                struct mbuf *am;
        !           474:                register struct sockaddr_in *sin;
        !           475: 
        !           476:                if (tiflags & TH_RST)
        !           477:                        goto drop;
        !           478:                if (tiflags & TH_ACK)
        !           479:                        goto dropwithreset;
        !           480:                if ((tiflags & TH_SYN) == 0)
        !           481:                        goto drop;
        !           482:                if (m->m_flags & M_BCAST)
        !           483:                        goto drop;
        !           484:                am = m_get(M_DONTWAIT, MT_SONAME);      /* XXX */
        !           485:                if (am == NULL)
        !           486:                        goto drop;
        !           487:                am->m_len = sizeof (struct sockaddr_in);
        !           488:                sin = mtod(am, struct sockaddr_in *);
        !           489:                sin->sin_family = AF_INET;
        !           490:                sin->sin_len = sizeof(*sin);
        !           491:                sin->sin_addr = ti->ti_src;
        !           492:                sin->sin_port = ti->ti_sport;
        !           493:                laddr = inp->inp_laddr;
        !           494:                if (inp->inp_laddr.s_addr == INADDR_ANY)
        !           495:                        inp->inp_laddr = ti->ti_dst;
        !           496:                if (in_pcbconnect(inp, am)) {
        !           497:                        inp->inp_laddr = laddr;
        !           498:                        (void) m_free(am);
        !           499:                        goto drop;
        !           500:                }
        !           501:                (void) m_free(am);
        !           502:                tp->t_template = tcp_template(tp);
        !           503:                if (tp->t_template == 0) {
        !           504:                        tp = tcp_drop(tp, ENOBUFS);
        !           505:                        dropsocket = 0;         /* socket is already gone */
        !           506:                        goto drop;
        !           507:                }
        !           508:                if (om) {
        !           509:                        tcp_dooptions(tp, om, ti);
        !           510:                        om = 0;
        !           511:                }
        !           512:                if (iss)
        !           513:                        tp->iss = iss;
        !           514:                else
        !           515:                        tp->iss = tcp_iss;
        !           516:                tcp_iss += TCP_ISSINCR/2;
        !           517:                tp->irs = ti->ti_seq;
        !           518:                tcp_sendseqinit(tp);
        !           519:                tcp_rcvseqinit(tp);
        !           520:                tp->t_flags |= TF_ACKNOW;
        !           521:                tp->t_state = TCPS_SYN_RECEIVED;
        !           522:                tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
        !           523:                dropsocket = 0;         /* committed to socket */
        !           524:                tcpstat.tcps_accepts++;
        !           525:                goto trimthenstep6;
        !           526:                }
        !           527: 
        !           528:        /*
        !           529:         * If the state is SYN_SENT:
        !           530:         *      if seg contains an ACK, but not for our SYN, drop the input.
        !           531:         *      if seg contains a RST, then drop the connection.
        !           532:         *      if seg does not contain SYN, then drop it.
        !           533:         * Otherwise this is an acceptable SYN segment
        !           534:         *      initialize tp->rcv_nxt and tp->irs
        !           535:         *      if seg contains ack then advance tp->snd_una
        !           536:         *      if SYN has been acked change to ESTABLISHED else SYN_RCVD state
        !           537:         *      arrange for segment to be acked (eventually)
        !           538:         *      continue processing rest of data/controls, beginning with URG
        !           539:         */
        !           540:        case TCPS_SYN_SENT:
        !           541:                if ((tiflags & TH_ACK) &&
        !           542:                    (SEQ_LEQ(ti->ti_ack, tp->iss) ||
        !           543:                     SEQ_GT(ti->ti_ack, tp->snd_max)))
        !           544:                        goto dropwithreset;
        !           545:                if (tiflags & TH_RST) {
        !           546:                        if (tiflags & TH_ACK)
        !           547:                                tp = tcp_drop(tp, ECONNREFUSED);
        !           548:                        goto drop;
        !           549:                }
        !           550:                if ((tiflags & TH_SYN) == 0)
        !           551:                        goto drop;
        !           552:                if (tiflags & TH_ACK) {
        !           553:                        tp->snd_una = ti->ti_ack;
        !           554:                        if (SEQ_LT(tp->snd_nxt, tp->snd_una))
        !           555:                                tp->snd_nxt = tp->snd_una;
        !           556:                }
        !           557:                tp->t_timer[TCPT_REXMT] = 0;
        !           558:                tp->irs = ti->ti_seq;
        !           559:                tcp_rcvseqinit(tp);
        !           560:                tp->t_flags |= TF_ACKNOW;
        !           561:                if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
        !           562:                        tcpstat.tcps_connects++;
        !           563:                        soisconnected(so);
        !           564:                        tp->t_state = TCPS_ESTABLISHED;
        !           565:                        (void) tcp_reass(tp, (struct tcpiphdr *)0,
        !           566:                                (struct mbuf *)0);
        !           567:                        /*
        !           568:                         * if we didn't have to retransmit the SYN,
        !           569:                         * use its rtt as our initial srtt & rtt var.
        !           570:                         */
        !           571:                        if (tp->t_rtt)
        !           572:                                tcp_xmit_timer(tp);
        !           573:                } else
        !           574:                        tp->t_state = TCPS_SYN_RECEIVED;
        !           575: 
        !           576: trimthenstep6:
        !           577:                /*
        !           578:                 * Advance ti->ti_seq to correspond to first data byte.
        !           579:                 * If data, trim to stay within window,
        !           580:                 * dropping FIN if necessary.
        !           581:                 */
        !           582:                ti->ti_seq++;
        !           583:                if (ti->ti_len > tp->rcv_wnd) {
        !           584:                        todrop = ti->ti_len - tp->rcv_wnd;
        !           585:                        m_adj(m, -todrop);
        !           586:                        ti->ti_len = tp->rcv_wnd;
        !           587:                        tiflags &= ~TH_FIN;
        !           588:                        tcpstat.tcps_rcvpackafterwin++;
        !           589:                        tcpstat.tcps_rcvbyteafterwin += todrop;
        !           590:                }
        !           591:                tp->snd_wl1 = ti->ti_seq - 1;
        !           592:                tp->rcv_up = ti->ti_seq;
        !           593:                goto step6;
        !           594:        }
        !           595: 
        !           596:        /*
        !           597:         * States other than LISTEN or SYN_SENT.
        !           598:         * First check that at least some bytes of segment are within 
        !           599:         * receive window.  If segment begins before rcv_nxt,
        !           600:         * drop leading data (and SYN); if nothing left, just ack.
        !           601:         */
        !           602:        todrop = tp->rcv_nxt - ti->ti_seq;
        !           603:        if (todrop > 0) {
        !           604:                if (tiflags & TH_SYN) {
        !           605:                        tiflags &= ~TH_SYN;
        !           606:                        ti->ti_seq++;
        !           607:                        if (ti->ti_urp > 1) 
        !           608:                                ti->ti_urp--;
        !           609:                        else
        !           610:                                tiflags &= ~TH_URG;
        !           611:                        todrop--;
        !           612:                }
        !           613:                if (todrop > ti->ti_len ||
        !           614:                    todrop == ti->ti_len && (tiflags&TH_FIN) == 0) {
        !           615:                        tcpstat.tcps_rcvduppack++;
        !           616:                        tcpstat.tcps_rcvdupbyte += ti->ti_len;
        !           617:                        /*
        !           618:                         * If segment is just one to the left of the window,
        !           619:                         * check two special cases:
        !           620:                         * 1. Don't toss RST in response to 4.2-style keepalive.
        !           621:                         * 2. If the only thing to drop is a FIN, we can drop
        !           622:                         *    it, but check the ACK or we will get into FIN
        !           623:                         *    wars if our FINs crossed (both CLOSING).
        !           624:                         * In either case, send ACK to resynchronize,
        !           625:                         * but keep on processing for RST or ACK.
        !           626:                         */
        !           627:                        if ((tiflags & TH_FIN && todrop == ti->ti_len + 1)
        !           628: #ifdef TCP_COMPAT_42
        !           629:                          || (tiflags & TH_RST && ti->ti_seq == tp->rcv_nxt - 1)
        !           630: #endif
        !           631:                           ) {
        !           632:                                todrop = ti->ti_len;
        !           633:                                tiflags &= ~TH_FIN;
        !           634:                                tp->t_flags |= TF_ACKNOW;
        !           635:                        } else
        !           636:                                goto dropafterack;
        !           637:                } else {
        !           638:                        tcpstat.tcps_rcvpartduppack++;
        !           639:                        tcpstat.tcps_rcvpartdupbyte += todrop;
        !           640:                }
        !           641:                m_adj(m, todrop);
        !           642:                ti->ti_seq += todrop;
        !           643:                ti->ti_len -= todrop;
        !           644:                if (ti->ti_urp > todrop)
        !           645:                        ti->ti_urp -= todrop;
        !           646:                else {
        !           647:                        tiflags &= ~TH_URG;
        !           648:                        ti->ti_urp = 0;
        !           649:                }
        !           650:        }
        !           651: 
        !           652:        /*
        !           653:         * If new data are received on a connection after the
        !           654:         * user processes are gone, then RST the other end.
        !           655:         */
        !           656:        if ((so->so_state & SS_NOFDREF) &&
        !           657:            tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) {
        !           658:                tp = tcp_close(tp);
        !           659:                tcpstat.tcps_rcvafterclose++;
        !           660:                goto dropwithreset;
        !           661:        }
        !           662: 
        !           663:        /*
        !           664:         * If segment ends after window, drop trailing data
        !           665:         * (and PUSH and FIN); if nothing left, just ACK.
        !           666:         */
        !           667:        todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd);
        !           668:        if (todrop > 0) {
        !           669:                tcpstat.tcps_rcvpackafterwin++;
        !           670:                if (todrop >= ti->ti_len) {
        !           671:                        tcpstat.tcps_rcvbyteafterwin += ti->ti_len;
        !           672:                        /*
        !           673:                         * If a new connection request is received
        !           674:                         * while in TIME_WAIT, drop the old connection
        !           675:                         * and start over if the sequence numbers
        !           676:                         * are above the previous ones.
        !           677:                         */
        !           678:                        if (tiflags & TH_SYN &&
        !           679:                            tp->t_state == TCPS_TIME_WAIT &&
        !           680:                            SEQ_GT(ti->ti_seq, tp->rcv_nxt)) {
        !           681:                                iss = tp->rcv_nxt + TCP_ISSINCR;
        !           682:                                tp = tcp_close(tp);
        !           683:                                goto findpcb;
        !           684:                        }
        !           685:                        /*
        !           686:                         * If window is closed can only take segments at
        !           687:                         * window edge, and have to drop data and PUSH from
        !           688:                         * incoming segments.  Continue processing, but
        !           689:                         * remember to ack.  Otherwise, drop segment
        !           690:                         * and ack.
        !           691:                         */
        !           692:                        if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) {
        !           693:                                tp->t_flags |= TF_ACKNOW;
        !           694:                                tcpstat.tcps_rcvwinprobe++;
        !           695:                        } else
        !           696:                                goto dropafterack;
        !           697:                } else
        !           698:                        tcpstat.tcps_rcvbyteafterwin += todrop;
        !           699:                m_adj(m, -todrop);
        !           700:                ti->ti_len -= todrop;
        !           701:                tiflags &= ~(TH_PUSH|TH_FIN);
        !           702:        }
        !           703: 
        !           704:        /*
        !           705:         * If the RST bit is set examine the state:
        !           706:         *    SYN_RECEIVED STATE:
        !           707:         *      If passive open, return to LISTEN state.
        !           708:         *      If active open, inform user that connection was refused.
        !           709:         *    ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
        !           710:         *      Inform user that connection was reset, and close tcb.
        !           711:         *    CLOSING, LAST_ACK, TIME_WAIT STATES
        !           712:         *      Close the tcb.
        !           713:         */
        !           714:        if (tiflags&TH_RST) switch (tp->t_state) {
        !           715: 
        !           716:        case TCPS_SYN_RECEIVED:
        !           717:                so->so_error = ECONNREFUSED;
        !           718:                goto close;
        !           719: 
        !           720:        case TCPS_ESTABLISHED:
        !           721:        case TCPS_FIN_WAIT_1:
        !           722:        case TCPS_FIN_WAIT_2:
        !           723:        case TCPS_CLOSE_WAIT:
        !           724:                so->so_error = ECONNRESET;
        !           725:        close:
        !           726:                tp->t_state = TCPS_CLOSED;
        !           727:                tcpstat.tcps_drops++;
        !           728:                tp = tcp_close(tp);
        !           729:                goto drop;
        !           730: 
        !           731:        case TCPS_CLOSING:
        !           732:        case TCPS_LAST_ACK:
        !           733:        case TCPS_TIME_WAIT:
        !           734:                tp = tcp_close(tp);
        !           735:                goto drop;
        !           736:        }
        !           737: 
        !           738:        /*
        !           739:         * If a SYN is in the window, then this is an
        !           740:         * error and we send an RST and drop the connection.
        !           741:         */
        !           742:        if (tiflags & TH_SYN) {
        !           743:                tp = tcp_drop(tp, ECONNRESET);
        !           744:                goto dropwithreset;
        !           745:        }
        !           746: 
        !           747:        /*
        !           748:         * If the ACK bit is off we drop the segment and return.
        !           749:         */
        !           750:        if ((tiflags & TH_ACK) == 0)
        !           751:                goto drop;
        !           752:        
        !           753:        /*
        !           754:         * Ack processing.
        !           755:         */
        !           756:        switch (tp->t_state) {
        !           757: 
        !           758:        /*
        !           759:         * In SYN_RECEIVED state if the ack ACKs our SYN then enter
        !           760:         * ESTABLISHED state and continue processing, otherwise
        !           761:         * send an RST.
        !           762:         */
        !           763:        case TCPS_SYN_RECEIVED:
        !           764:                if (SEQ_GT(tp->snd_una, ti->ti_ack) ||
        !           765:                    SEQ_GT(ti->ti_ack, tp->snd_max))
        !           766:                        goto dropwithreset;
        !           767:                tcpstat.tcps_connects++;
        !           768:                soisconnected(so);
        !           769:                tp->t_state = TCPS_ESTABLISHED;
        !           770:                (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0);
        !           771:                tp->snd_wl1 = ti->ti_seq - 1;
        !           772:                /* fall into ... */
        !           773: 
        !           774:        /*
        !           775:         * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
        !           776:         * ACKs.  If the ack is in the range
        !           777:         *      tp->snd_una < ti->ti_ack <= tp->snd_max
        !           778:         * then advance tp->snd_una to ti->ti_ack and drop
        !           779:         * data from the retransmission queue.  If this ACK reflects
        !           780:         * more up to date window information we update our window information.
        !           781:         */
        !           782:        case TCPS_ESTABLISHED:
        !           783:        case TCPS_FIN_WAIT_1:
        !           784:        case TCPS_FIN_WAIT_2:
        !           785:        case TCPS_CLOSE_WAIT:
        !           786:        case TCPS_CLOSING:
        !           787:        case TCPS_LAST_ACK:
        !           788:        case TCPS_TIME_WAIT:
        !           789: 
        !           790:                if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) {
        !           791:                        if (ti->ti_len == 0 && ti->ti_win == tp->snd_wnd) {
        !           792:                                tcpstat.tcps_rcvdupack++;
        !           793:                                /*
        !           794:                                 * If we have outstanding data (other than
        !           795:                                 * a window probe), this is a completely
        !           796:                                 * duplicate ack (ie, window info didn't
        !           797:                                 * change), the ack is the biggest we've
        !           798:                                 * seen and we've seen exactly our rexmt
        !           799:                                 * threshhold of them, assume a packet
        !           800:                                 * has been dropped and retransmit it.
        !           801:                                 * Kludge snd_nxt & the congestion
        !           802:                                 * window so we send only this one
        !           803:                                 * packet.
        !           804:                                 *
        !           805:                                 * We know we're losing at the current
        !           806:                                 * window size so do congestion avoidance
        !           807:                                 * (set ssthresh to half the current window
        !           808:                                 * and pull our congestion window back to
        !           809:                                 * the new ssthresh).
        !           810:                                 *
        !           811:                                 * Dup acks mean that packets have left the
        !           812:                                 * network (they're now cached at the receiver) 
        !           813:                                 * so bump cwnd by the amount in the receiver
        !           814:                                 * to keep a constant cwnd packets in the
        !           815:                                 * network.
        !           816:                                 */
        !           817:                                if (tp->t_timer[TCPT_REXMT] == 0 ||
        !           818:                                    ti->ti_ack != tp->snd_una)
        !           819:                                        tp->t_dupacks = 0;
        !           820:                                else if (++tp->t_dupacks == tcprexmtthresh) {
        !           821:                                        tcp_seq onxt = tp->snd_nxt;
        !           822:                                        u_int win =
        !           823:                                            min(tp->snd_wnd, tp->snd_cwnd) / 2 /
        !           824:                                                tp->t_maxseg;
        !           825: 
        !           826:                                        if (win < 2)
        !           827:                                                win = 2;
        !           828:                                        tp->snd_ssthresh = win * tp->t_maxseg;
        !           829:                                        tp->t_timer[TCPT_REXMT] = 0;
        !           830:                                        tp->t_rtt = 0;
        !           831:                                        tp->snd_nxt = ti->ti_ack;
        !           832:                                        tp->snd_cwnd = tp->t_maxseg;
        !           833:                                        (void) tcp_output(tp);
        !           834:                                        tp->snd_cwnd = tp->snd_ssthresh +
        !           835:                                               tp->t_maxseg * tp->t_dupacks;
        !           836:                                        if (SEQ_GT(onxt, tp->snd_nxt))
        !           837:                                                tp->snd_nxt = onxt;
        !           838:                                        goto drop;
        !           839:                                } else if (tp->t_dupacks > tcprexmtthresh) {
        !           840:                                        tp->snd_cwnd += tp->t_maxseg;
        !           841:                                        (void) tcp_output(tp);
        !           842:                                        goto drop;
        !           843:                                }
        !           844:                        } else
        !           845:                                tp->t_dupacks = 0;
        !           846:                        break;
        !           847:                }
        !           848:                /*
        !           849:                 * If the congestion window was inflated to account
        !           850:                 * for the other side's cached packets, retract it.
        !           851:                 */
        !           852:                if (tp->t_dupacks > tcprexmtthresh &&
        !           853:                    tp->snd_cwnd > tp->snd_ssthresh)
        !           854:                        tp->snd_cwnd = tp->snd_ssthresh;
        !           855:                tp->t_dupacks = 0;
        !           856:                if (SEQ_GT(ti->ti_ack, tp->snd_max)) {
        !           857:                        tcpstat.tcps_rcvacktoomuch++;
        !           858:                        goto dropafterack;
        !           859:                }
        !           860:                acked = ti->ti_ack - tp->snd_una;
        !           861:                tcpstat.tcps_rcvackpack++;
        !           862:                tcpstat.tcps_rcvackbyte += acked;
        !           863: 
        !           864:                /*
        !           865:                 * If transmit timer is running and timed sequence
        !           866:                 * number was acked, update smoothed round trip time.
        !           867:                 * Since we now have an rtt measurement, cancel the
        !           868:                 * timer backoff (cf., Phil Karn's retransmit alg.).
        !           869:                 * Recompute the initial retransmit timer.
        !           870:                 */
        !           871:                if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))
        !           872:                        tcp_xmit_timer(tp);
        !           873: 
        !           874:                /*
        !           875:                 * If all outstanding data is acked, stop retransmit
        !           876:                 * timer and remember to restart (more output or persist).
        !           877:                 * If there is more data to be acked, restart retransmit
        !           878:                 * timer, using current (possibly backed-off) value.
        !           879:                 */
        !           880:                if (ti->ti_ack == tp->snd_max) {
        !           881:                        tp->t_timer[TCPT_REXMT] = 0;
        !           882:                        needoutput = 1;
        !           883:                } else if (tp->t_timer[TCPT_PERSIST] == 0)
        !           884:                        tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
        !           885:                /*
        !           886:                 * When new data is acked, open the congestion window.
        !           887:                 * If the window gives us less than ssthresh packets
        !           888:                 * in flight, open exponentially (maxseg per packet).
        !           889:                 * Otherwise open linearly: maxseg per window
        !           890:                 * (maxseg^2 / cwnd per packet), plus a constant
        !           891:                 * fraction of a packet (maxseg/8) to help larger windows
        !           892:                 * open quickly enough.
        !           893:                 */
        !           894:                {
        !           895:                register u_int cw = tp->snd_cwnd;
        !           896:                register u_int incr = tp->t_maxseg;
        !           897: 
        !           898:                if (cw > tp->snd_ssthresh)
        !           899:                        incr = incr * incr / cw + incr / 8;
        !           900:                tp->snd_cwnd = min(cw + incr, TCP_MAXWIN);
        !           901:                }
        !           902:                if (acked > so->so_snd.sb_cc) {
        !           903:                        tp->snd_wnd -= so->so_snd.sb_cc;
        !           904:                        sbdrop(&so->so_snd, (int)so->so_snd.sb_cc);
        !           905:                        ourfinisacked = 1;
        !           906:                } else {
        !           907:                        sbdrop(&so->so_snd, acked);
        !           908:                        tp->snd_wnd -= acked;
        !           909:                        ourfinisacked = 0;
        !           910:                }
        !           911:                if (so->so_snd.sb_flags & SB_NOTIFY)
        !           912:                        sowwakeup(so);
        !           913:                tp->snd_una = ti->ti_ack;
        !           914:                if (SEQ_LT(tp->snd_nxt, tp->snd_una))
        !           915:                        tp->snd_nxt = tp->snd_una;
        !           916: 
        !           917:                switch (tp->t_state) {
        !           918: 
        !           919:                /*
        !           920:                 * In FIN_WAIT_1 STATE in addition to the processing
        !           921:                 * for the ESTABLISHED state if our FIN is now acknowledged
        !           922:                 * then enter FIN_WAIT_2.
        !           923:                 */
        !           924:                case TCPS_FIN_WAIT_1:
        !           925:                        if (ourfinisacked) {
        !           926:                                /*
        !           927:                                 * If we can't receive any more
        !           928:                                 * data, then closing user can proceed.
        !           929:                                 * Starting the timer is contrary to the
        !           930:                                 * specification, but if we don't get a FIN
        !           931:                                 * we'll hang forever.
        !           932:                                 */
        !           933:                                if (so->so_state & SS_CANTRCVMORE) {
        !           934:                                        soisdisconnected(so);
        !           935:                                        tp->t_timer[TCPT_2MSL] = tcp_maxidle;
        !           936:                                }
        !           937:                                tp->t_state = TCPS_FIN_WAIT_2;
        !           938:                        }
        !           939:                        break;
        !           940: 
        !           941:                /*
        !           942:                 * In CLOSING STATE in addition to the processing for
        !           943:                 * the ESTABLISHED state if the ACK acknowledges our FIN
        !           944:                 * then enter the TIME-WAIT state, otherwise ignore
        !           945:                 * the segment.
        !           946:                 */
        !           947:                case TCPS_CLOSING:
        !           948:                        if (ourfinisacked) {
        !           949:                                tp->t_state = TCPS_TIME_WAIT;
        !           950:                                tcp_canceltimers(tp);
        !           951:                                tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
        !           952:                                soisdisconnected(so);
        !           953:                        }
        !           954:                        break;
        !           955: 
        !           956:                /*
        !           957:                 * In LAST_ACK, we may still be waiting for data to drain
        !           958:                 * and/or to be acked, as well as for the ack of our FIN.
        !           959:                 * If our FIN is now acknowledged, delete the TCB,
        !           960:                 * enter the closed state and return.
        !           961:                 */
        !           962:                case TCPS_LAST_ACK:
        !           963:                        if (ourfinisacked) {
        !           964:                                tp = tcp_close(tp);
        !           965:                                goto drop;
        !           966:                        }
        !           967:                        break;
        !           968: 
        !           969:                /*
        !           970:                 * In TIME_WAIT state the only thing that should arrive
        !           971:                 * is a retransmission of the remote FIN.  Acknowledge
        !           972:                 * it and restart the finack timer.
        !           973:                 */
        !           974:                case TCPS_TIME_WAIT:
        !           975:                        tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
        !           976:                        goto dropafterack;
        !           977:                }
        !           978:        }
        !           979: 
        !           980: step6:
        !           981:        /*
        !           982:         * Update window information.
        !           983:         * Don't look at window if no ACK: TAC's send garbage on first SYN.
        !           984:         */
        !           985:        if ((tiflags & TH_ACK) &&
        !           986:            (SEQ_LT(tp->snd_wl1, ti->ti_seq) || tp->snd_wl1 == ti->ti_seq &&
        !           987:            (SEQ_LT(tp->snd_wl2, ti->ti_ack) ||
        !           988:             tp->snd_wl2 == ti->ti_ack && ti->ti_win > tp->snd_wnd))) {
        !           989:                /* keep track of pure window updates */
        !           990:                if (ti->ti_len == 0 &&
        !           991:                    tp->snd_wl2 == ti->ti_ack && ti->ti_win > tp->snd_wnd)
        !           992:                        tcpstat.tcps_rcvwinupd++;
        !           993:                tp->snd_wnd = ti->ti_win;
        !           994:                tp->snd_wl1 = ti->ti_seq;
        !           995:                tp->snd_wl2 = ti->ti_ack;
        !           996:                if (tp->snd_wnd > tp->max_sndwnd)
        !           997:                        tp->max_sndwnd = tp->snd_wnd;
        !           998:                needoutput = 1;
        !           999:        }
        !          1000: 
        !          1001:        /*
        !          1002:         * Process segments with URG.
        !          1003:         */
        !          1004:        if ((tiflags & TH_URG) && ti->ti_urp &&
        !          1005:            TCPS_HAVERCVDFIN(tp->t_state) == 0) {
        !          1006:                /*
        !          1007:                 * This is a kludge, but if we receive and accept
        !          1008:                 * random urgent pointers, we'll crash in
        !          1009:                 * soreceive.  It's hard to imagine someone
        !          1010:                 * actually wanting to send this much urgent data.
        !          1011:                 */
        !          1012:                if (ti->ti_urp + so->so_rcv.sb_cc > SB_MAX) {
        !          1013:                        ti->ti_urp = 0;                 /* XXX */
        !          1014:                        tiflags &= ~TH_URG;             /* XXX */
        !          1015:                        goto dodata;                    /* XXX */
        !          1016:                }
        !          1017:                /*
        !          1018:                 * If this segment advances the known urgent pointer,
        !          1019:                 * then mark the data stream.  This should not happen
        !          1020:                 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
        !          1021:                 * a FIN has been received from the remote side. 
        !          1022:                 * In these states we ignore the URG.
        !          1023:                 *
        !          1024:                 * According to RFC961 (Assigned Protocols),
        !          1025:                 * the urgent pointer points to the last octet
        !          1026:                 * of urgent data.  We continue, however,
        !          1027:                 * to consider it to indicate the first octet
        !          1028:                 * of data past the urgent section as the original 
        !          1029:                 * spec states (in one of two places).
        !          1030:                 */
        !          1031:                if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) {
        !          1032:                        tp->rcv_up = ti->ti_seq + ti->ti_urp;
        !          1033:                        so->so_oobmark = so->so_rcv.sb_cc +
        !          1034:                            (tp->rcv_up - tp->rcv_nxt) - 1;
        !          1035:                        if (so->so_oobmark == 0)
        !          1036:                                so->so_state |= SS_RCVATMARK;
        !          1037:                        sohasoutofband(so);
        !          1038:                        tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
        !          1039:                }
        !          1040:                /*
        !          1041:                 * Remove out of band data so doesn't get presented to user.
        !          1042:                 * This can happen independent of advancing the URG pointer,
        !          1043:                 * but if two URG's are pending at once, some out-of-band
        !          1044:                 * data may creep in... ick.
        !          1045:                 */
        !          1046:                if (ti->ti_urp <= ti->ti_len
        !          1047: #ifdef SO_OOBINLINE
        !          1048:                     && (so->so_options & SO_OOBINLINE) == 0
        !          1049: #endif
        !          1050:                     )
        !          1051:                        tcp_pulloutofband(so, ti, m);
        !          1052:        } else
        !          1053:                /*
        !          1054:                 * If no out of band data is expected,
        !          1055:                 * pull receive urgent pointer along
        !          1056:                 * with the receive window.
        !          1057:                 */
        !          1058:                if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
        !          1059:                        tp->rcv_up = tp->rcv_nxt;
        !          1060: dodata:                                                        /* XXX */
        !          1061: 
        !          1062:        /*
        !          1063:         * Process the segment text, merging it into the TCP sequencing queue,
        !          1064:         * and arranging for acknowledgment of receipt if necessary.
        !          1065:         * This process logically involves adjusting tp->rcv_wnd as data
        !          1066:         * is presented to the user (this happens in tcp_usrreq.c,
        !          1067:         * case PRU_RCVD).  If a FIN has already been received on this
        !          1068:         * connection then we just ignore the text.
        !          1069:         */
        !          1070:        if ((ti->ti_len || (tiflags&TH_FIN)) &&
        !          1071:            TCPS_HAVERCVDFIN(tp->t_state) == 0) {
        !          1072:                TCP_REASS(tp, ti, m, so, tiflags);
        !          1073:                /*
        !          1074:                 * Note the amount of data that peer has sent into
        !          1075:                 * our window, in order to estimate the sender's
        !          1076:                 * buffer size.
        !          1077:                 */
        !          1078:                len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
        !          1079:        } else {
        !          1080:                m_freem(m);
        !          1081:                tiflags &= ~TH_FIN;
        !          1082:        }
        !          1083: 
        !          1084:        /*
        !          1085:         * If FIN is received ACK the FIN and let the user know
        !          1086:         * that the connection is closing.
        !          1087:         */
        !          1088:        if (tiflags & TH_FIN) {
        !          1089:                if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
        !          1090:                        socantrcvmore(so);
        !          1091:                        tp->t_flags |= TF_ACKNOW;
        !          1092:                        tp->rcv_nxt++;
        !          1093:                }
        !          1094:                switch (tp->t_state) {
        !          1095: 
        !          1096:                /*
        !          1097:                 * In SYN_RECEIVED and ESTABLISHED STATES
        !          1098:                 * enter the CLOSE_WAIT state.
        !          1099:                 */
        !          1100:                case TCPS_SYN_RECEIVED:
        !          1101:                case TCPS_ESTABLISHED:
        !          1102:                        tp->t_state = TCPS_CLOSE_WAIT;
        !          1103:                        break;
        !          1104: 
        !          1105:                /*
        !          1106:                 * If still in FIN_WAIT_1 STATE FIN has not been acked so
        !          1107:                 * enter the CLOSING state.
        !          1108:                 */
        !          1109:                case TCPS_FIN_WAIT_1:
        !          1110:                        tp->t_state = TCPS_CLOSING;
        !          1111:                        break;
        !          1112: 
        !          1113:                /*
        !          1114:                 * In FIN_WAIT_2 state enter the TIME_WAIT state,
        !          1115:                 * starting the time-wait timer, turning off the other 
        !          1116:                 * standard timers.
        !          1117:                 */
        !          1118:                case TCPS_FIN_WAIT_2:
        !          1119:                        tp->t_state = TCPS_TIME_WAIT;
        !          1120:                        tcp_canceltimers(tp);
        !          1121:                        tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
        !          1122:                        soisdisconnected(so);
        !          1123:                        break;
        !          1124: 
        !          1125:                /*
        !          1126:                 * In TIME_WAIT state restart the 2 MSL time_wait timer.
        !          1127:                 */
        !          1128:                case TCPS_TIME_WAIT:
        !          1129:                        tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
        !          1130:                        break;
        !          1131:                }
        !          1132:        }
        !          1133:        if (so->so_options & SO_DEBUG)
        !          1134:                tcp_trace(TA_INPUT, ostate, tp, &tcp_saveti, 0);
        !          1135: 
        !          1136:        /*
        !          1137:         * Return any desired output.
        !          1138:         */
        !          1139:        if (needoutput || (tp->t_flags & TF_ACKNOW))
        !          1140:                (void) tcp_output(tp);
        !          1141:        return;
        !          1142: 
        !          1143: dropafterack:
        !          1144:        /*
        !          1145:         * Generate an ACK dropping incoming segment if it occupies
        !          1146:         * sequence space, where the ACK reflects our state.
        !          1147:         */
        !          1148:        if (tiflags & TH_RST)
        !          1149:                goto drop;
        !          1150:        m_freem(m);
        !          1151:        tp->t_flags |= TF_ACKNOW;
        !          1152:        (void) tcp_output(tp);
        !          1153:        return;
        !          1154: 
        !          1155: dropwithreset:
        !          1156:        if (om) {
        !          1157:                (void) m_free(om);
        !          1158:                om = 0;
        !          1159:        }
        !          1160:        /*
        !          1161:         * Generate a RST, dropping incoming segment.
        !          1162:         * Make ACK acceptable to originator of segment.
        !          1163:         * Don't bother to respond if destination was broadcast.
        !          1164:         */
        !          1165:        if ((tiflags & TH_RST) || m->m_flags & M_BCAST)
        !          1166:                goto drop;
        !          1167:        if (tiflags & TH_ACK)
        !          1168:                tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST);
        !          1169:        else {
        !          1170:                if (tiflags & TH_SYN)
        !          1171:                        ti->ti_len++;
        !          1172:                tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0,
        !          1173:                    TH_RST|TH_ACK);
        !          1174:        }
        !          1175:        /* destroy temporarily created socket */
        !          1176:        if (dropsocket)
        !          1177:                (void) soabort(so);
        !          1178:        return;
        !          1179: 
        !          1180: drop:
        !          1181:        if (om)
        !          1182:                (void) m_free(om);
        !          1183:        /*
        !          1184:         * Drop space held by incoming segment and return.
        !          1185:         */
        !          1186:        if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
        !          1187:                tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0);
        !          1188:        m_freem(m);
        !          1189:        /* destroy temporarily created socket */
        !          1190:        if (dropsocket)
        !          1191:                (void) soabort(so);
        !          1192:        return;
        !          1193: }
        !          1194: 
        !          1195: tcp_dooptions(tp, om, ti)
        !          1196:        struct tcpcb *tp;
        !          1197:        struct mbuf *om;
        !          1198:        struct tcpiphdr *ti;
        !          1199: {
        !          1200:        register u_char *cp;
        !          1201:        u_short mss;
        !          1202:        int opt, optlen, cnt;
        !          1203: 
        !          1204:        cp = mtod(om, u_char *);
        !          1205:        cnt = om->m_len;
        !          1206:        for (; cnt > 0; cnt -= optlen, cp += optlen) {
        !          1207:                opt = cp[0];
        !          1208:                if (opt == TCPOPT_EOL)
        !          1209:                        break;
        !          1210:                if (opt == TCPOPT_NOP)
        !          1211:                        optlen = 1;
        !          1212:                else {
        !          1213:                        optlen = cp[1];
        !          1214:                        if (optlen <= 0)
        !          1215:                                break;
        !          1216:                }
        !          1217:                switch (opt) {
        !          1218: 
        !          1219:                default:
        !          1220:                        continue;
        !          1221: 
        !          1222:                case TCPOPT_MAXSEG:
        !          1223:                        if (optlen != 4)
        !          1224:                                continue;
        !          1225:                        if (!(ti->ti_flags & TH_SYN))
        !          1226:                                continue;
        !          1227:                        bcopy((char *) cp + 2, (char *) &mss, sizeof(mss));
        !          1228:                        NTOHS(mss);
        !          1229:                        (void) tcp_mss(tp, mss);        /* sets t_maxseg */
        !          1230:                        break;
        !          1231:                }
        !          1232:        }
        !          1233:        (void) m_free(om);
        !          1234: }
        !          1235: 
        !          1236: /*
        !          1237:  * Pull out of band byte out of a segment so
        !          1238:  * it doesn't appear in the user's data queue.
        !          1239:  * It is still reflected in the segment length for
        !          1240:  * sequencing purposes.
        !          1241:  */
        !          1242: tcp_pulloutofband(so, ti, m)
        !          1243:        struct socket *so;
        !          1244:        struct tcpiphdr *ti;
        !          1245:        register struct mbuf *m;
        !          1246: {
        !          1247:        int cnt = ti->ti_urp - 1;
        !          1248:        
        !          1249:        while (cnt >= 0) {
        !          1250:                if (m->m_len > cnt) {
        !          1251:                        char *cp = mtod(m, caddr_t) + cnt;
        !          1252:                        struct tcpcb *tp = sototcpcb(so);
        !          1253: 
        !          1254:                        tp->t_iobc = *cp;
        !          1255:                        tp->t_oobflags |= TCPOOB_HAVEDATA;
        !          1256:                        bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1));
        !          1257:                        m->m_len--;
        !          1258:                        return;
        !          1259:                }
        !          1260:                cnt -= m->m_len;
        !          1261:                m = m->m_next;
        !          1262:                if (m == 0)
        !          1263:                        break;
        !          1264:        }
        !          1265:        panic("tcp_pulloutofband");
        !          1266: }
        !          1267: 
        !          1268: /*
        !          1269:  * Collect new round-trip time estimate
        !          1270:  * and update averages and current timeout.
        !          1271:  */
        !          1272: tcp_xmit_timer(tp)
        !          1273:        register struct tcpcb *tp;
        !          1274: {
        !          1275:        register short delta;
        !          1276: 
        !          1277:        tcpstat.tcps_rttupdated++;
        !          1278:        if (tp->t_srtt != 0) {
        !          1279:                /*
        !          1280:                 * srtt is stored as fixed point with 3 bits after the
        !          1281:                 * binary point (i.e., scaled by 8).  The following magic
        !          1282:                 * is equivalent to the smoothing algorithm in rfc793 with
        !          1283:                 * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
        !          1284:                 * point).  Adjust t_rtt to origin 0.
        !          1285:                 */
        !          1286:                delta = tp->t_rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT);
        !          1287:                if ((tp->t_srtt += delta) <= 0)
        !          1288:                        tp->t_srtt = 1;
        !          1289:                /*
        !          1290:                 * We accumulate a smoothed rtt variance (actually, a
        !          1291:                 * smoothed mean difference), then set the retransmit
        !          1292:                 * timer to smoothed rtt + 4 times the smoothed variance.
        !          1293:                 * rttvar is stored as fixed point with 2 bits after the
        !          1294:                 * binary point (scaled by 4).  The following is
        !          1295:                 * equivalent to rfc793 smoothing with an alpha of .75
        !          1296:                 * (rttvar = rttvar*3/4 + |delta| / 4).  This replaces
        !          1297:                 * rfc793's wired-in beta.
        !          1298:                 */
        !          1299:                if (delta < 0)
        !          1300:                        delta = -delta;
        !          1301:                delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT);
        !          1302:                if ((tp->t_rttvar += delta) <= 0)
        !          1303:                        tp->t_rttvar = 1;
        !          1304:        } else {
        !          1305:                /* 
        !          1306:                 * No rtt measurement yet - use the unsmoothed rtt.
        !          1307:                 * Set the variance to half the rtt (so our first
        !          1308:                 * retransmit happens at 2*rtt)
        !          1309:                 */
        !          1310:                tp->t_srtt = tp->t_rtt << TCP_RTT_SHIFT;
        !          1311:                tp->t_rttvar = tp->t_rtt << (TCP_RTTVAR_SHIFT - 1);
        !          1312:        }
        !          1313:        tp->t_rtt = 0;
        !          1314:        tp->t_rxtshift = 0;
        !          1315: 
        !          1316:        /*
        !          1317:         * the retransmit should happen at rtt + 4 * rttvar.
        !          1318:         * Because of the way we do the smoothing, srtt and rttvar
        !          1319:         * will each average +1/2 tick of bias.  When we compute
        !          1320:         * the retransmit timer, we want 1/2 tick of rounding and
        !          1321:         * 1 extra tick because of +-1/2 tick uncertainty in the
        !          1322:         * firing of the timer.  The bias will give us exactly the
        !          1323:         * 1.5 tick we need.  But, because the bias is
        !          1324:         * statistical, we have to test that we don't drop below
        !          1325:         * the minimum feasible timer (which is 2 ticks).
        !          1326:         */
        !          1327:        TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
        !          1328:            tp->t_rttmin, TCPTV_REXMTMAX);
        !          1329:        
        !          1330:        /*
        !          1331:         * We received an ack for a packet that wasn't retransmitted;
        !          1332:         * it is probably safe to discard any error indications we've
        !          1333:         * received recently.  This isn't quite right, but close enough
        !          1334:         * for now (a route might have failed after we sent a segment,
        !          1335:         * and the return path might not be symmetrical).
        !          1336:         */
        !          1337:        tp->t_softerror = 0;
        !          1338: }
        !          1339: 
        !          1340: /*
        !          1341:  * Determine a reasonable value for maxseg size.
        !          1342:  * If the route is known, check route for mtu.
        !          1343:  * If none, use an mss that can be handled on the outgoing
        !          1344:  * interface without forcing IP to fragment; if bigger than
        !          1345:  * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
        !          1346:  * to utilize large mbufs.  If no route is found, route has no mtu,
        !          1347:  * or the destination isn't local, use a default, hopefully conservative
        !          1348:  * size (usually 512 or the default IP max size, but no more than the mtu
        !          1349:  * of the interface), as we can't discover anything about intervening
        !          1350:  * gateways or networks.  We also initialize the congestion/slow start
        !          1351:  * window to be a single segment if the destination isn't local.
        !          1352:  * While looking at the routing entry, we also initialize other path-dependent
        !          1353:  * parameters from pre-set or cached values in the routing entry.
        !          1354:  */
        !          1355: 
        !          1356: tcp_mss(tp, offer)
        !          1357:        register struct tcpcb *tp;
        !          1358:        u_short offer;
        !          1359: {
        !          1360:        struct route *ro;
        !          1361:        register struct rtentry *rt;
        !          1362:        struct ifnet *ifp;
        !          1363:        register int rtt, mss;
        !          1364:        u_long bufsize;
        !          1365:        struct inpcb *inp;
        !          1366:        struct socket *so;
        !          1367:        extern int tcp_mssdflt, tcp_rttdflt;
        !          1368: 
        !          1369:        inp = tp->t_inpcb;
        !          1370:        ro = &inp->inp_route;
        !          1371: 
        !          1372:        if ((rt = ro->ro_rt) == (struct rtentry *)0) {
        !          1373:                /* No route yet, so try to acquire one */
        !          1374:                if (inp->inp_faddr.s_addr != INADDR_ANY) {
        !          1375:                        ro->ro_dst.sa_family = AF_INET;
        !          1376:                        ro->ro_dst.sa_len = sizeof(ro->ro_dst);
        !          1377:                        ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
        !          1378:                                inp->inp_faddr;
        !          1379:                        rtalloc(ro);
        !          1380:                }
        !          1381:                if ((rt = ro->ro_rt) == (struct rtentry *)0)
        !          1382:                        return (tcp_mssdflt);
        !          1383:        }
        !          1384:        ifp = rt->rt_ifp;
        !          1385:        so = inp->inp_socket;
        !          1386: 
        !          1387: #ifdef RTV_MTU /* if route characteristics exist ... */
        !          1388:        /*
        !          1389:         * While we're here, check if there's an initial rtt
        !          1390:         * or rttvar.  Convert from the route-table units
        !          1391:         * to scaled multiples of the slow timeout timer.
        !          1392:         */
        !          1393:        if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
        !          1394:                if (rt->rt_rmx.rmx_locks & RTV_MTU)
        !          1395:                        tp->t_rttmin = rtt / (RTM_RTTUNIT / PR_SLOWHZ);
        !          1396:                tp->t_srtt = rtt / (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
        !          1397:                if (rt->rt_rmx.rmx_rttvar)
        !          1398:                        tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
        !          1399:                            (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
        !          1400:                else
        !          1401:                        /* default variation is +- 1 rtt */
        !          1402:                        tp->t_rttvar =
        !          1403:                            tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
        !          1404:                TCPT_RANGESET(tp->t_rxtcur,
        !          1405:                    ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
        !          1406:                    tp->t_rttmin, TCPTV_REXMTMAX);
        !          1407:        }
        !          1408:        /*
        !          1409:         * if there's an mtu associated with the route, use it
        !          1410:         */
        !          1411:        if (rt->rt_rmx.rmx_mtu)
        !          1412:                mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr);
        !          1413:        else
        !          1414: #endif /* RTV_MTU */
        !          1415:        {
        !          1416:                mss = ifp->if_mtu - sizeof(struct tcpiphdr);
        !          1417: #if    (MCLBYTES & (MCLBYTES - 1)) == 0
        !          1418:                if (mss > MCLBYTES)
        !          1419:                        mss &= ~(MCLBYTES-1);
        !          1420: #else
        !          1421:                if (mss > MCLBYTES)
        !          1422:                        mss = mss / MCLBYTES * MCLBYTES;
        !          1423: #endif
        !          1424:                if (!in_localaddr(inp->inp_faddr))
        !          1425:                        mss = min(mss, tcp_mssdflt);
        !          1426:        }
        !          1427:        /*
        !          1428:         * The current mss, t_maxseg, is initialized to the default value.
        !          1429:         * If we compute a smaller value, reduce the current mss.
        !          1430:         * If we compute a larger value, return it for use in sending
        !          1431:         * a max seg size option, but don't store it for use
        !          1432:         * unless we received an offer at least that large from peer.
        !          1433:         * However, do not accept offers under 32 bytes.
        !          1434:         */
        !          1435:        if (offer)
        !          1436:                mss = min(mss, offer);
        !          1437:        mss = max(mss, 32);             /* sanity */
        !          1438:        if (mss < tp->t_maxseg || offer != 0) {
        !          1439:                /*
        !          1440:                 * If there's a pipesize, change the socket buffer
        !          1441:                 * to that size.  Make the socket buffers an integral
        !          1442:                 * number of mss units; if the mss is larger than
        !          1443:                 * the socket buffer, decrease the mss.
        !          1444:                 */
        !          1445: #ifdef RTV_SPIPE
        !          1446:                if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0)
        !          1447: #endif
        !          1448:                        bufsize = so->so_snd.sb_hiwat;
        !          1449:                if (bufsize < mss)
        !          1450:                        mss = bufsize;
        !          1451:                else {
        !          1452:                        bufsize = min(bufsize, SB_MAX) / mss * mss;
        !          1453:                        (void) sbreserve(&so->so_snd, bufsize);
        !          1454:                }
        !          1455:                tp->t_maxseg = mss;
        !          1456: 
        !          1457: #ifdef RTV_RPIPE
        !          1458:                if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0)
        !          1459: #endif
        !          1460:                        bufsize = so->so_rcv.sb_hiwat;
        !          1461:                if (bufsize > mss) {
        !          1462:                        bufsize = min(bufsize, SB_MAX) / mss * mss;
        !          1463:                        (void) sbreserve(&so->so_rcv, bufsize);
        !          1464:                }
        !          1465:        }
        !          1466:        tp->snd_cwnd = mss;
        !          1467: 
        !          1468: #ifdef RTV_SSTHRESH
        !          1469:        if (rt->rt_rmx.rmx_ssthresh) {
        !          1470:                /*
        !          1471:                 * There's some sort of gateway or interface
        !          1472:                 * buffer limit on the path.  Use this to set
        !          1473:                 * the slow start threshhold, but set the
        !          1474:                 * threshold to no less than 2*mss.
        !          1475:                 */
        !          1476:                tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh);
        !          1477:        }
        !          1478: #endif /* RTV_MTU */
        !          1479:        return (mss);
        !          1480: }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.