Annotation of qemu/slirp/tcp_input.c, revision 1.1.1.4

1.1       root        1: /*
                      2:  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994
                      3:  *     The Regents of the University of California.  All rights reserved.
                      4:  *
                      5:  * Redistribution and use in source and binary forms, with or without
                      6:  * modification, are permitted provided that the following conditions
                      7:  * are met:
                      8:  * 1. Redistributions of source code must retain the above copyright
                      9:  *    notice, this list of conditions and the following disclaimer.
                     10:  * 2. Redistributions in binary form must reproduce the above copyright
                     11:  *    notice, this list of conditions and the following disclaimer in the
                     12:  *    documentation and/or other materials provided with the distribution.
1.1.1.4 ! root       13:  * 3. Neither the name of the University nor the names of its contributors
1.1       root       14:  *    may be used to endorse or promote products derived from this software
                     15:  *    without specific prior written permission.
                     16:  *
                     17:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     18:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     19:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     20:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     21:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     22:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     23:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     24:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     25:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     26:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     27:  * SUCH DAMAGE.
                     28:  *
                     29:  *     @(#)tcp_input.c 8.5 (Berkeley) 4/10/94
                     30:  * tcp_input.c,v 1.10 1994/10/13 18:36:32 wollman Exp
                     31:  */
                     32: 
                     33: /*
                     34:  * Changes and additions relating to SLiRP
                     35:  * Copyright (c) 1995 Danny Gasparovski.
1.1.1.3   root       36:  *
                     37:  * Please read the file COPYRIGHT for the
1.1       root       38:  * terms and conditions of the copyright.
                     39:  */
                     40: 
                     41: #include <slirp.h>
                     42: #include "ip_icmp.h"
                     43: 
                     44: struct socket tcb;
                     45: 
1.1.1.3   root       46: #define        TCPREXMTTHRESH 3
1.1       root       47: struct socket *tcp_last_so = &tcb;
                     48: 
                     49: tcp_seq tcp_iss;                /* tcp initial send seq # */
                     50: 
                     51: #define TCP_PAWS_IDLE  (24 * 24 * 60 * 60 * PR_SLOWHZ)
                     52: 
                     53: /* for modulo comparisons of timestamps */
                     54: #define TSTMP_LT(a,b)  ((int)((a)-(b)) < 0)
                     55: #define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0)
                     56: 
                     57: /*
                     58:  * Insert segment ti into reassembly queue of tcp with
                     59:  * control block tp.  Return TH_FIN if reassembly now includes
                     60:  * a segment with FIN.  The macro form does the common case inline
                     61:  * (segment is the next to be received on an established connection,
                     62:  * and the queue is empty), avoiding linkage into and removal
                     63:  * from the queue and repetition of various conversions.
                     64:  * Set DELACK for segments received in order, but ack immediately
                     65:  * when segments are out of order (so fast retransmit can work).
                     66:  */
                     67: #ifdef TCP_ACK_HACK
                     68: #define TCP_REASS(tp, ti, m, so, flags) {\
                     69:        if ((ti)->ti_seq == (tp)->rcv_nxt && \
1.1.1.4 ! root       70:            tcpfrag_list_empty(tp) && \
1.1       root       71:            (tp)->t_state == TCPS_ESTABLISHED) {\
                     72:                if (ti->ti_flags & TH_PUSH) \
                     73:                        tp->t_flags |= TF_ACKNOW; \
                     74:                else \
                     75:                        tp->t_flags |= TF_DELACK; \
                     76:                (tp)->rcv_nxt += (ti)->ti_len; \
                     77:                flags = (ti)->ti_flags & TH_FIN; \
1.1.1.3   root       78:                STAT(tcpstat.tcps_rcvpack++);         \
                     79:                STAT(tcpstat.tcps_rcvbyte += (ti)->ti_len);   \
1.1       root       80:                if (so->so_emu) { \
                     81:                       if (tcp_emu((so),(m))) sbappend((so), (m)); \
                     82:               } else \
                     83:                       sbappend((so), (m)); \
                     84: /*               sorwakeup(so); */ \
                     85:        } else {\
                     86:                (flags) = tcp_reass((tp), (ti), (m)); \
                     87:                tp->t_flags |= TF_ACKNOW; \
                     88:        } \
                     89: }
                     90: #else
                     91: #define        TCP_REASS(tp, ti, m, so, flags) { \
                     92:        if ((ti)->ti_seq == (tp)->rcv_nxt && \
1.1.1.4 ! root       93:         tcpfrag_list_empty(tp) && \
1.1       root       94:            (tp)->t_state == TCPS_ESTABLISHED) { \
                     95:                tp->t_flags |= TF_DELACK; \
                     96:                (tp)->rcv_nxt += (ti)->ti_len; \
                     97:                flags = (ti)->ti_flags & TH_FIN; \
1.1.1.3   root       98:                STAT(tcpstat.tcps_rcvpack++);        \
                     99:                STAT(tcpstat.tcps_rcvbyte += (ti)->ti_len);  \
1.1       root      100:                if (so->so_emu) { \
                    101:                        if (tcp_emu((so),(m))) sbappend(so, (m)); \
                    102:                } else \
                    103:                        sbappend((so), (m)); \
                    104: /*             sorwakeup(so); */ \
                    105:        } else { \
                    106:                (flags) = tcp_reass((tp), (ti), (m)); \
                    107:                tp->t_flags |= TF_ACKNOW; \
                    108:        } \
                    109: }
                    110: #endif
1.1.1.3   root      111: static void tcp_dooptions(struct tcpcb *tp, u_char *cp, int cnt,
                    112:                           struct tcpiphdr *ti);
                    113: static void tcp_xmit_timer(register struct tcpcb *tp, int rtt);
                    114: 
                    115: static int
                    116: tcp_reass(register struct tcpcb *tp, register struct tcpiphdr *ti,
                    117:           struct mbuf *m)
1.1       root      118: {
                    119:        register struct tcpiphdr *q;
                    120:        struct socket *so = tp->t_socket;
                    121:        int flags;
1.1.1.3   root      122: 
1.1       root      123:        /*
                    124:         * Call with ti==0 after become established to
                    125:         * force pre-ESTABLISHED data up to user socket.
                    126:         */
                    127:        if (ti == 0)
                    128:                goto present;
                    129: 
                    130:        /*
                    131:         * Find a segment which begins after this one does.
                    132:         */
1.1.1.4 ! root      133:        for (q = tcpfrag_list_first(tp); !tcpfrag_list_end(q, tp);
        !           134:             q = tcpiphdr_next(q))
1.1       root      135:                if (SEQ_GT(q->ti_seq, ti->ti_seq))
                    136:                        break;
                    137: 
                    138:        /*
                    139:         * If there is a preceding segment, it may provide some of
                    140:         * our data already.  If so, drop the data from the incoming
                    141:         * segment.  If it provides all of our data, drop us.
                    142:         */
1.1.1.4 ! root      143:        if (!tcpfrag_list_end(tcpiphdr_prev(q), tp)) {
1.1       root      144:                register int i;
1.1.1.4 ! root      145:                q = tcpiphdr_prev(q);
1.1       root      146:                /* conversion to int (in i) handles seq wraparound */
                    147:                i = q->ti_seq + q->ti_len - ti->ti_seq;
                    148:                if (i > 0) {
                    149:                        if (i >= ti->ti_len) {
1.1.1.3   root      150:                                STAT(tcpstat.tcps_rcvduppack++);
                    151:                                STAT(tcpstat.tcps_rcvdupbyte += ti->ti_len);
1.1       root      152:                                m_freem(m);
                    153:                                /*
                    154:                                 * Try to present any queued data
                    155:                                 * at the left window edge to the user.
                    156:                                 * This is needed after the 3-WHS
                    157:                                 * completes.
                    158:                                 */
                    159:                                goto present;   /* ??? */
                    160:                        }
                    161:                        m_adj(m, i);
                    162:                        ti->ti_len -= i;
                    163:                        ti->ti_seq += i;
                    164:                }
1.1.1.4 ! root      165:                q = tcpiphdr_next(q);
1.1       root      166:        }
1.1.1.3   root      167:        STAT(tcpstat.tcps_rcvoopack++);
                    168:        STAT(tcpstat.tcps_rcvoobyte += ti->ti_len);
1.1.1.4 ! root      169:        ti->ti_mbuf = m;
1.1       root      170: 
                    171:        /*
                    172:         * While we overlap succeeding segments trim them or,
                    173:         * if they are completely covered, dequeue them.
                    174:         */
1.1.1.4 ! root      175:        while (!tcpfrag_list_end(q, tp)) {
1.1       root      176:                register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq;
                    177:                if (i <= 0)
                    178:                        break;
                    179:                if (i < q->ti_len) {
                    180:                        q->ti_seq += i;
                    181:                        q->ti_len -= i;
1.1.1.4 ! root      182:                        m_adj(q->ti_mbuf, i);
1.1       root      183:                        break;
                    184:                }
1.1.1.4 ! root      185:                q = tcpiphdr_next(q);
        !           186:                m = tcpiphdr_prev(q)->ti_mbuf;
        !           187:                remque(tcpiphdr2qlink(tcpiphdr_prev(q)));
1.1       root      188:                m_freem(m);
                    189:        }
                    190: 
                    191:        /*
                    192:         * Stick new segment in its place.
                    193:         */
1.1.1.4 ! root      194:        insque(tcpiphdr2qlink(ti), tcpiphdr2qlink(tcpiphdr_prev(q)));
1.1       root      195: 
                    196: present:
                    197:        /*
                    198:         * Present data to user, advancing rcv_nxt through
                    199:         * completed sequence space.
                    200:         */
                    201:        if (!TCPS_HAVEESTABLISHED(tp->t_state))
                    202:                return (0);
1.1.1.4 ! root      203:        ti = tcpfrag_list_first(tp);
        !           204:        if (tcpfrag_list_end(ti, tp) || ti->ti_seq != tp->rcv_nxt)
1.1       root      205:                return (0);
                    206:        if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len)
                    207:                return (0);
                    208:        do {
                    209:                tp->rcv_nxt += ti->ti_len;
                    210:                flags = ti->ti_flags & TH_FIN;
1.1.1.4 ! root      211:                remque(tcpiphdr2qlink(ti));
        !           212:                m = ti->ti_mbuf;
        !           213:                ti = tcpiphdr_next(ti);
1.1       root      214: /*             if (so->so_state & SS_FCANTRCVMORE) */
                    215:                if (so->so_state & SS_FCANTSENDMORE)
                    216:                        m_freem(m);
                    217:                else {
                    218:                        if (so->so_emu) {
                    219:                                if (tcp_emu(so,m)) sbappend(so, m);
                    220:                        } else
                    221:                                sbappend(so, m);
                    222:                }
                    223:        } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt);
                    224: /*     sorwakeup(so); */
                    225:        return (flags);
                    226: }
                    227: 
                    228: /*
                    229:  * TCP input routine, follows pages 65-76 of the
                    230:  * protocol specification dated September, 1981 very closely.
                    231:  */
                    232: void
                    233: tcp_input(m, iphlen, inso)
                    234:        register struct mbuf *m;
                    235:        int iphlen;
                    236:        struct socket *inso;
                    237: {
                    238:        struct ip save_ip, *ip;
                    239:        register struct tcpiphdr *ti;
                    240:        caddr_t optp = NULL;
                    241:        int optlen = 0;
                    242:        int len, tlen, off;
                    243:        register struct tcpcb *tp = 0;
                    244:        register int tiflags;
                    245:        struct socket *so = 0;
                    246:        int todrop, acked, ourfinisacked, needoutput = 0;
                    247: /*     int dropsocket = 0; */
                    248:        int iss = 0;
                    249:        u_long tiwin;
                    250:        int ret;
                    251: /*     int ts_present = 0; */
1.1.1.4 ! root      252:     struct ex_list *ex_ptr;
1.1       root      253: 
                    254:        DEBUG_CALL("tcp_input");
1.1.1.3   root      255:        DEBUG_ARGS((dfd," m = %8lx  iphlen = %2d  inso = %lx\n",
1.1       root      256:                    (long )m, iphlen, (long )inso ));
1.1.1.3   root      257: 
1.1       root      258:        /*
                    259:         * If called with m == 0, then we're continuing the connect
                    260:         */
                    261:        if (m == NULL) {
                    262:                so = inso;
1.1.1.3   root      263: 
1.1       root      264:                /* Re-set a few variables */
                    265:                tp = sototcpcb(so);
                    266:                m = so->so_m;
                    267:                so->so_m = 0;
                    268:                ti = so->so_ti;
                    269:                tiwin = ti->ti_win;
                    270:                tiflags = ti->ti_flags;
1.1.1.3   root      271: 
1.1       root      272:                goto cont_conn;
                    273:        }
1.1.1.3   root      274: 
                    275: 
                    276:        STAT(tcpstat.tcps_rcvtotal++);
1.1       root      277:        /*
                    278:         * Get IP and TCP header together in first mbuf.
                    279:         * Note: IP leaves IP header in first mbuf.
                    280:         */
                    281:        ti = mtod(m, struct tcpiphdr *);
                    282:        if (iphlen > sizeof(struct ip )) {
                    283:          ip_stripoptions(m, (struct mbuf *)0);
                    284:          iphlen=sizeof(struct ip );
                    285:        }
                    286:        /* XXX Check if too short */
1.1.1.3   root      287: 
1.1       root      288: 
                    289:        /*
                    290:         * Save a copy of the IP header in case we want restore it
                    291:         * for sending an ICMP error message in response.
                    292:         */
                    293:        ip=mtod(m, struct ip *);
1.1.1.3   root      294:        save_ip = *ip;
1.1       root      295:        save_ip.ip_len+= iphlen;
                    296: 
                    297:        /*
                    298:         * Checksum extended TCP header and data.
                    299:         */
                    300:        tlen = ((struct ip *)ti)->ip_len;
1.1.1.4 ! root      301:        tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = 0;
        !           302:     memset(&ti->ti_i.ih_mbuf, 0 , sizeof(struct mbuf_ptr));
1.1       root      303:        ti->ti_x1 = 0;
                    304:        ti->ti_len = htons((u_int16_t)tlen);
                    305:        len = sizeof(struct ip ) + tlen;
                    306:        /* keep checksum for ICMP reply
1.1.1.3   root      307:         * ti->ti_sum = cksum(m, len);
1.1       root      308:         * if (ti->ti_sum) { */
                    309:        if(cksum(m, len)) {
1.1.1.3   root      310:          STAT(tcpstat.tcps_rcvbadsum++);
1.1       root      311:          goto drop;
                    312:        }
                    313: 
                    314:        /*
                    315:         * Check that TCP offset makes sense,
                    316:         * pull out TCP options and adjust length.              XXX
                    317:         */
                    318:        off = ti->ti_off << 2;
                    319:        if (off < sizeof (struct tcphdr) || off > tlen) {
1.1.1.3   root      320:          STAT(tcpstat.tcps_rcvbadoff++);
1.1       root      321:          goto drop;
                    322:        }
                    323:        tlen -= off;
                    324:        ti->ti_len = tlen;
                    325:        if (off > sizeof (struct tcphdr)) {
                    326:          optlen = off - sizeof (struct tcphdr);
                    327:          optp = mtod(m, caddr_t) + sizeof (struct tcpiphdr);
                    328: 
1.1.1.3   root      329:                /*
1.1       root      330:                 * Do quick retrieval of timestamp options ("options
                    331:                 * prediction?").  If timestamp is the only option and it's
                    332:                 * formatted as recommended in RFC 1323 appendix A, we
                    333:                 * quickly get the values now and not bother calling
                    334:                 * tcp_dooptions(), etc.
                    335:                 */
                    336: /*             if ((optlen == TCPOLEN_TSTAMP_APPA ||
                    337:  *                  (optlen > TCPOLEN_TSTAMP_APPA &&
                    338:  *                     optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) &&
                    339:  *                  *(u_int32_t *)optp == htonl(TCPOPT_TSTAMP_HDR) &&
                    340:  *                  (ti->ti_flags & TH_SYN) == 0) {
                    341:  *                     ts_present = 1;
                    342:  *                     ts_val = ntohl(*(u_int32_t *)(optp + 4));
                    343:  *                     ts_ecr = ntohl(*(u_int32_t *)(optp + 8));
                    344:  *                     optp = NULL;   / * we've parsed the options * /
                    345:  *             }
                    346:  */
                    347:        }
                    348:        tiflags = ti->ti_flags;
1.1.1.3   root      349: 
1.1       root      350:        /*
                    351:         * Convert TCP protocol specific fields to host format.
                    352:         */
                    353:        NTOHL(ti->ti_seq);
                    354:        NTOHL(ti->ti_ack);
                    355:        NTOHS(ti->ti_win);
                    356:        NTOHS(ti->ti_urp);
                    357: 
                    358:        /*
                    359:         * Drop TCP, IP headers and TCP options.
                    360:         */
                    361:        m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
                    362:        m->m_len  -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
1.1.1.3   root      363: 
1.1.1.4 ! root      364:     if (slirp_restrict) {
        !           365:         for (ex_ptr = exec_list; ex_ptr; ex_ptr = ex_ptr->ex_next)
        !           366:             if (ex_ptr->ex_fport == ti->ti_dport &&
        !           367:                     (ntohl(ti->ti_dst.s_addr) & 0xff) == ex_ptr->ex_addr)
        !           368:                 break;
        !           369: 
        !           370:         if (!ex_ptr)
        !           371:             goto drop;
        !           372:     }
1.1       root      373:        /*
                    374:         * Locate pcb for segment.
                    375:         */
                    376: findso:
                    377:        so = tcp_last_so;
                    378:        if (so->so_fport != ti->ti_dport ||
                    379:            so->so_lport != ti->ti_sport ||
                    380:            so->so_laddr.s_addr != ti->ti_src.s_addr ||
                    381:            so->so_faddr.s_addr != ti->ti_dst.s_addr) {
                    382:                so = solookup(&tcb, ti->ti_src, ti->ti_sport,
                    383:                               ti->ti_dst, ti->ti_dport);
                    384:                if (so)
                    385:                        tcp_last_so = so;
1.1.1.3   root      386:                STAT(tcpstat.tcps_socachemiss++);
1.1       root      387:        }
                    388: 
                    389:        /*
                    390:         * If the state is CLOSED (i.e., TCB does not exist) then
                    391:         * all data in the incoming segment is discarded.
                    392:         * If the TCB exists but is in CLOSED state, it is embryonic,
                    393:         * but should either do a listen or a connect soon.
                    394:         *
                    395:         * state == CLOSED means we've done socreate() but haven't
1.1.1.3   root      396:         * attached it to a protocol yet...
                    397:         *
1.1       root      398:         * XXX If a TCB does not exist, and the TH_SYN flag is
                    399:         * the only flag set, then create a session, mark it
                    400:         * as if it was LISTENING, and continue...
                    401:         */
                    402:        if (so == 0) {
                    403:          if ((tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) != TH_SYN)
                    404:            goto dropwithreset;
1.1.1.3   root      405: 
1.1       root      406:          if ((so = socreate()) == NULL)
                    407:            goto dropwithreset;
                    408:          if (tcp_attach(so) < 0) {
                    409:            free(so); /* Not sofree (if it failed, it's not insqued) */
                    410:            goto dropwithreset;
                    411:          }
1.1.1.3   root      412: 
                    413:          sbreserve(&so->so_snd, TCP_SNDSPACE);
                    414:          sbreserve(&so->so_rcv, TCP_RCVSPACE);
                    415: 
1.1       root      416:          /*            tcp_last_so = so; */  /* XXX ? */
                    417:          /*            tp = sototcpcb(so);    */
1.1.1.3   root      418: 
1.1       root      419:          so->so_laddr = ti->ti_src;
                    420:          so->so_lport = ti->ti_sport;
                    421:          so->so_faddr = ti->ti_dst;
                    422:          so->so_fport = ti->ti_dport;
1.1.1.3   root      423: 
1.1       root      424:          if ((so->so_iptos = tcp_tos(so)) == 0)
                    425:            so->so_iptos = ((struct ip *)ti)->ip_tos;
1.1.1.3   root      426: 
1.1       root      427:          tp = sototcpcb(so);
                    428:          tp->t_state = TCPS_LISTEN;
                    429:        }
1.1.1.3   root      430: 
1.1       root      431:         /*
                    432:          * If this is a still-connecting socket, this probably
                    433:          * a retransmit of the SYN.  Whether it's a retransmit SYN
                    434:         * or something else, we nuke it.
                    435:          */
                    436:         if (so->so_state & SS_ISFCONNECTING)
                    437:                 goto drop;
                    438: 
                    439:        tp = sototcpcb(so);
1.1.1.3   root      440: 
1.1       root      441:        /* XXX Should never fail */
                    442:        if (tp == 0)
                    443:                goto dropwithreset;
                    444:        if (tp->t_state == TCPS_CLOSED)
                    445:                goto drop;
1.1.1.3   root      446: 
1.1       root      447:        /* Unscale the window into a 32-bit value. */
                    448: /*     if ((tiflags & TH_SYN) == 0)
                    449:  *             tiwin = ti->ti_win << tp->snd_scale;
                    450:  *     else
                    451:  */
                    452:                tiwin = ti->ti_win;
                    453: 
                    454:        /*
                    455:         * Segment received on connection.
                    456:         * Reset idle time and keep-alive timer.
                    457:         */
                    458:        tp->t_idle = 0;
1.1.1.3   root      459:        if (SO_OPTIONS)
                    460:           tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL;
1.1       root      461:        else
1.1.1.3   root      462:           tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE;
1.1       root      463: 
                    464:        /*
                    465:         * Process options if not in LISTEN state,
                    466:         * else do it below (after getting remote address).
                    467:         */
                    468:        if (optp && tp->t_state != TCPS_LISTEN)
1.1.1.3   root      469:                tcp_dooptions(tp, (u_char *)optp, optlen, ti);
1.1       root      470: /* , */
                    471: /*                     &ts_present, &ts_val, &ts_ecr); */
                    472: 
1.1.1.3   root      473:        /*
1.1       root      474:         * Header prediction: check for the two common cases
                    475:         * of a uni-directional data xfer.  If the packet has
                    476:         * no control flags, is in-sequence, the window didn't
                    477:         * change and we're not retransmitting, it's a
                    478:         * candidate.  If the length is zero and the ack moved
                    479:         * forward, we're the sender side of the xfer.  Just
                    480:         * free the data acked & wake any higher level process
                    481:         * that was blocked waiting for space.  If the length
                    482:         * is non-zero and the ack didn't move, we're the
                    483:         * receiver side.  If we're getting packets in-order
                    484:         * (the reassembly queue is empty), add the data to
                    485:         * the socket buffer and note that we need a delayed ack.
                    486:         *
                    487:         * XXX Some of these tests are not needed
                    488:         * eg: the tiwin == tp->snd_wnd prevents many more
                    489:         * predictions.. with no *real* advantage..
                    490:         */
                    491:        if (tp->t_state == TCPS_ESTABLISHED &&
                    492:            (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
                    493: /*         (!ts_present || TSTMP_GEQ(ts_val, tp->ts_recent)) && */
                    494:            ti->ti_seq == tp->rcv_nxt &&
                    495:            tiwin && tiwin == tp->snd_wnd &&
                    496:            tp->snd_nxt == tp->snd_max) {
1.1.1.3   root      497:                /*
1.1       root      498:                 * If last ACK falls within this segment's sequence numbers,
                    499:                 *  record the timestamp.
                    500:                 */
                    501: /*             if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) &&
                    502:  *                SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len)) {
                    503:  *                     tp->ts_recent_age = tcp_now;
                    504:  *                     tp->ts_recent = ts_val;
                    505:  *             }
                    506:  */
                    507:                if (ti->ti_len == 0) {
                    508:                        if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
                    509:                            SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
                    510:                            tp->snd_cwnd >= tp->snd_wnd) {
                    511:                                /*
                    512:                                 * this is a pure ack for outstanding data.
                    513:                                 */
1.1.1.3   root      514:                                STAT(tcpstat.tcps_predack++);
1.1       root      515: /*                             if (ts_present)
                    516:  *                                     tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
1.1.1.3   root      517:  *                             else
1.1       root      518:  */                                 if (tp->t_rtt &&
                    519:                                            SEQ_GT(ti->ti_ack, tp->t_rtseq))
                    520:                                        tcp_xmit_timer(tp, tp->t_rtt);
                    521:                                acked = ti->ti_ack - tp->snd_una;
1.1.1.3   root      522:                                STAT(tcpstat.tcps_rcvackpack++);
                    523:                                STAT(tcpstat.tcps_rcvackbyte += acked);
1.1       root      524:                                sbdrop(&so->so_snd, acked);
                    525:                                tp->snd_una = ti->ti_ack;
                    526:                                m_freem(m);
                    527: 
                    528:                                /*
                    529:                                 * If all outstanding data are acked, stop
                    530:                                 * retransmit timer, otherwise restart timer
                    531:                                 * using current (possibly backed-off) value.
                    532:                                 * If process is waiting for space,
                    533:                                 * wakeup/selwakeup/signal.  If data
                    534:                                 * are ready to send, let tcp_output
                    535:                                 * decide between more output or persist.
                    536:                                 */
                    537:                                if (tp->snd_una == tp->snd_max)
                    538:                                        tp->t_timer[TCPT_REXMT] = 0;
                    539:                                else if (tp->t_timer[TCPT_PERSIST] == 0)
                    540:                                        tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
                    541: 
1.1.1.3   root      542:                                /*
1.1       root      543:                                 * There's room in so_snd, sowwakup will read()
                    544:                                 * from the socket if we can
                    545:                                 */
                    546: /*                             if (so->so_snd.sb_flags & SB_NOTIFY)
                    547:  *                                     sowwakeup(so);
                    548:  */
1.1.1.3   root      549:                                /*
1.1       root      550:                                 * This is called because sowwakeup might have
                    551:                                 * put data into so_snd.  Since we don't so sowwakeup,
                    552:                                 * we don't need this.. XXX???
                    553:                                 */
                    554:                                if (so->so_snd.sb_cc)
                    555:                                        (void) tcp_output(tp);
                    556: 
                    557:                                return;
                    558:                        }
                    559:                } else if (ti->ti_ack == tp->snd_una &&
1.1.1.4 ! root      560:                    tcpfrag_list_empty(tp) &&
1.1       root      561:                    ti->ti_len <= sbspace(&so->so_rcv)) {
                    562:                        /*
                    563:                         * this is a pure, in-sequence data packet
                    564:                         * with nothing on the reassembly queue and
                    565:                         * we have enough buffer space to take it.
                    566:                         */
1.1.1.3   root      567:                        STAT(tcpstat.tcps_preddat++);
1.1       root      568:                        tp->rcv_nxt += ti->ti_len;
1.1.1.3   root      569:                        STAT(tcpstat.tcps_rcvpack++);
                    570:                        STAT(tcpstat.tcps_rcvbyte += ti->ti_len);
1.1       root      571:                        /*
                    572:                         * Add data to socket buffer.
                    573:                         */
                    574:                        if (so->so_emu) {
                    575:                                if (tcp_emu(so,m)) sbappend(so, m);
                    576:                        } else
                    577:                                sbappend(so, m);
1.1.1.3   root      578: 
                    579:                        /*
1.1       root      580:                         * XXX This is called when data arrives.  Later, check
                    581:                         * if we can actually write() to the socket
                    582:                         * XXX Need to check? It's be NON_BLOCKING
                    583:                         */
                    584: /*                     sorwakeup(so); */
1.1.1.3   root      585: 
1.1       root      586:                        /*
                    587:                         * If this is a short packet, then ACK now - with Nagel
                    588:                         *      congestion avoidance sender won't send more until
                    589:                         *      he gets an ACK.
1.1.1.3   root      590:                         *
1.1.1.2   root      591:                         * It is better to not delay acks at all to maximize
                    592:                         * TCP throughput.  See RFC 2581.
1.1.1.3   root      593:                         */
1.1.1.2   root      594:                        tp->t_flags |= TF_ACKNOW;
                    595:                        tcp_output(tp);
1.1       root      596:                        return;
                    597:                }
                    598:        } /* header prediction */
                    599:        /*
                    600:         * Calculate amount of space in receive window,
                    601:         * and then do TCP input processing.
                    602:         * Receive window is amount of space in rcv queue,
                    603:         * but not less than advertised window.
                    604:         */
                    605:        { int win;
                    606:           win = sbspace(&so->so_rcv);
                    607:          if (win < 0)
                    608:            win = 0;
                    609:          tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt));
                    610:        }
                    611: 
                    612:        switch (tp->t_state) {
                    613: 
                    614:        /*
                    615:         * If the state is LISTEN then ignore segment if it contains an RST.
                    616:         * If the segment contains an ACK then it is bad and send a RST.
                    617:         * If it does not contain a SYN then it is not interesting; drop it.
                    618:         * Don't bother responding if the destination was a broadcast.
                    619:         * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
                    620:         * tp->iss, and send a segment:
                    621:         *     <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
                    622:         * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
                    623:         * Fill in remote peer address fields if not previously specified.
                    624:         * Enter SYN_RECEIVED state, and process any other fields of this
                    625:         * segment in this state.
                    626:         */
                    627:        case TCPS_LISTEN: {
                    628: 
                    629:          if (tiflags & TH_RST)
                    630:            goto drop;
                    631:          if (tiflags & TH_ACK)
                    632:            goto dropwithreset;
                    633:          if ((tiflags & TH_SYN) == 0)
                    634:            goto drop;
1.1.1.3   root      635: 
1.1       root      636:          /*
                    637:           * This has way too many gotos...
                    638:           * But a bit of spaghetti code never hurt anybody :)
                    639:           */
1.1.1.3   root      640: 
1.1       root      641:          /*
                    642:           * If this is destined for the control address, then flag to
                    643:           * tcp_ctl once connected, otherwise connect
                    644:           */
                    645:          if ((so->so_faddr.s_addr&htonl(0xffffff00)) == special_addr.s_addr) {
                    646:            int lastbyte=ntohl(so->so_faddr.s_addr) & 0xff;
                    647:            if (lastbyte!=CTL_ALIAS && lastbyte!=CTL_DNS) {
                    648: #if 0
                    649:              if(lastbyte==CTL_CMD || lastbyte==CTL_EXEC) {
                    650:                /* Command or exec adress */
                    651:                so->so_state |= SS_CTL;
1.1.1.3   root      652:              } else
1.1       root      653: #endif
                    654:               {
                    655:                /* May be an add exec */
                    656:                for(ex_ptr = exec_list; ex_ptr; ex_ptr = ex_ptr->ex_next) {
1.1.1.3   root      657:                  if(ex_ptr->ex_fport == so->so_fport &&
1.1       root      658:                     lastbyte == ex_ptr->ex_addr) {
                    659:                    so->so_state |= SS_CTL;
                    660:                    break;
                    661:                  }
                    662:                }
                    663:              }
                    664:              if(so->so_state & SS_CTL) goto cont_input;
                    665:            }
                    666:            /* CTL_ALIAS: Do nothing, tcp_fconnect will be called on it */
                    667:          }
1.1.1.3   root      668: 
1.1       root      669:          if (so->so_emu & EMU_NOCONNECT) {
                    670:            so->so_emu &= ~EMU_NOCONNECT;
                    671:            goto cont_input;
                    672:          }
1.1.1.3   root      673: 
1.1       root      674:          if((tcp_fconnect(so) == -1) && (errno != EINPROGRESS) && (errno != EWOULDBLOCK)) {
                    675:            u_char code=ICMP_UNREACH_NET;
                    676:            DEBUG_MISC((dfd," tcp fconnect errno = %d-%s\n",
                    677:                        errno,strerror(errno)));
                    678:            if(errno == ECONNREFUSED) {
                    679:              /* ACK the SYN, send RST to refuse the connection */
                    680:              tcp_respond(tp, ti, m, ti->ti_seq+1, (tcp_seq)0,
1.1.1.3   root      681:                          TH_RST|TH_ACK);
1.1       root      682:            } else {
                    683:              if(errno == EHOSTUNREACH) code=ICMP_UNREACH_HOST;
                    684:              HTONL(ti->ti_seq);             /* restore tcp header */
                    685:              HTONL(ti->ti_ack);
                    686:              HTONS(ti->ti_win);
                    687:              HTONS(ti->ti_urp);
                    688:              m->m_data -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
                    689:              m->m_len  += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
                    690:              *ip=save_ip;
                    691:              icmp_error(m, ICMP_UNREACH,code, 0,strerror(errno));
                    692:            }
                    693:            tp = tcp_close(tp);
                    694:            m_free(m);
                    695:          } else {
                    696:            /*
                    697:             * Haven't connected yet, save the current mbuf
                    698:             * and ti, and return
                    699:             * XXX Some OS's don't tell us whether the connect()
                    700:             * succeeded or not.  So we must time it out.
                    701:             */
                    702:            so->so_m = m;
                    703:            so->so_ti = ti;
                    704:            tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
                    705:            tp->t_state = TCPS_SYN_RECEIVED;
                    706:          }
                    707:          return;
                    708: 
1.1.1.3   root      709:        cont_conn:
                    710:          /* m==NULL
1.1       root      711:           * Check if the connect succeeded
                    712:           */
                    713:          if (so->so_state & SS_NOFDREF) {
                    714:            tp = tcp_close(tp);
                    715:            goto dropwithreset;
                    716:          }
1.1.1.3   root      717:        cont_input:
1.1       root      718:          tcp_template(tp);
1.1.1.3   root      719: 
1.1       root      720:          if (optp)
                    721:            tcp_dooptions(tp, (u_char *)optp, optlen, ti);
                    722:          /* , */
                    723:          /*                            &ts_present, &ts_val, &ts_ecr); */
1.1.1.3   root      724: 
1.1       root      725:          if (iss)
                    726:            tp->iss = iss;
1.1.1.3   root      727:          else
1.1       root      728:            tp->iss = tcp_iss;
                    729:          tcp_iss += TCP_ISSINCR/2;
                    730:          tp->irs = ti->ti_seq;
                    731:          tcp_sendseqinit(tp);
                    732:          tcp_rcvseqinit(tp);
                    733:          tp->t_flags |= TF_ACKNOW;
                    734:          tp->t_state = TCPS_SYN_RECEIVED;
                    735:          tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
1.1.1.3   root      736:          STAT(tcpstat.tcps_accepts++);
1.1       root      737:          goto trimthenstep6;
                    738:        } /* case TCPS_LISTEN */
1.1.1.3   root      739: 
1.1       root      740:        /*
                    741:         * If the state is SYN_SENT:
                    742:         *      if seg contains an ACK, but not for our SYN, drop the input.
                    743:         *      if seg contains a RST, then drop the connection.
                    744:         *      if seg does not contain SYN, then drop it.
                    745:         * Otherwise this is an acceptable SYN segment
                    746:         *      initialize tp->rcv_nxt and tp->irs
                    747:         *      if seg contains ack then advance tp->snd_una
                    748:         *      if SYN has been acked change to ESTABLISHED else SYN_RCVD state
                    749:         *      arrange for segment to be acked (eventually)
                    750:         *      continue processing rest of data/controls, beginning with URG
                    751:         */
                    752:        case TCPS_SYN_SENT:
                    753:                if ((tiflags & TH_ACK) &&
                    754:                    (SEQ_LEQ(ti->ti_ack, tp->iss) ||
                    755:                     SEQ_GT(ti->ti_ack, tp->snd_max)))
                    756:                        goto dropwithreset;
                    757: 
                    758:                if (tiflags & TH_RST) {
                    759:                        if (tiflags & TH_ACK)
                    760:                                tp = tcp_drop(tp,0); /* XXX Check t_softerror! */
                    761:                        goto drop;
                    762:                }
                    763: 
                    764:                if ((tiflags & TH_SYN) == 0)
                    765:                        goto drop;
                    766:                if (tiflags & TH_ACK) {
                    767:                        tp->snd_una = ti->ti_ack;
                    768:                        if (SEQ_LT(tp->snd_nxt, tp->snd_una))
                    769:                                tp->snd_nxt = tp->snd_una;
                    770:                }
                    771: 
                    772:                tp->t_timer[TCPT_REXMT] = 0;
                    773:                tp->irs = ti->ti_seq;
                    774:                tcp_rcvseqinit(tp);
                    775:                tp->t_flags |= TF_ACKNOW;
                    776:                if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
1.1.1.3   root      777:                        STAT(tcpstat.tcps_connects++);
1.1       root      778:                        soisfconnected(so);
                    779:                        tp->t_state = TCPS_ESTABLISHED;
1.1.1.3   root      780: 
1.1       root      781:                        /* Do window scaling on this connection? */
                    782: /*                     if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
                    783:  *                             (TF_RCVD_SCALE|TF_REQ_SCALE)) {
                    784:  *                             tp->snd_scale = tp->requested_s_scale;
                    785:  *                             tp->rcv_scale = tp->request_r_scale;
                    786:  *                     }
                    787:  */
                    788:                        (void) tcp_reass(tp, (struct tcpiphdr *)0,
                    789:                                (struct mbuf *)0);
                    790:                        /*
                    791:                         * if we didn't have to retransmit the SYN,
                    792:                         * use its rtt as our initial srtt & rtt var.
                    793:                         */
                    794:                        if (tp->t_rtt)
                    795:                                tcp_xmit_timer(tp, tp->t_rtt);
                    796:                } else
                    797:                        tp->t_state = TCPS_SYN_RECEIVED;
                    798: 
                    799: trimthenstep6:
                    800:                /*
                    801:                 * Advance ti->ti_seq to correspond to first data byte.
                    802:                 * If data, trim to stay within window,
                    803:                 * dropping FIN if necessary.
                    804:                 */
                    805:                ti->ti_seq++;
                    806:                if (ti->ti_len > tp->rcv_wnd) {
                    807:                        todrop = ti->ti_len - tp->rcv_wnd;
                    808:                        m_adj(m, -todrop);
                    809:                        ti->ti_len = tp->rcv_wnd;
                    810:                        tiflags &= ~TH_FIN;
1.1.1.3   root      811:                        STAT(tcpstat.tcps_rcvpackafterwin++);
                    812:                        STAT(tcpstat.tcps_rcvbyteafterwin += todrop);
1.1       root      813:                }
                    814:                tp->snd_wl1 = ti->ti_seq - 1;
                    815:                tp->rcv_up = ti->ti_seq;
                    816:                goto step6;
                    817:        } /* switch tp->t_state */
                    818:        /*
                    819:         * States other than LISTEN or SYN_SENT.
                    820:         * First check timestamp, if present.
1.1.1.3   root      821:         * Then check that at least some bytes of segment are within
1.1       root      822:         * receive window.  If segment begins before rcv_nxt,
                    823:         * drop leading data (and SYN); if nothing left, just ack.
1.1.1.3   root      824:         *
1.1       root      825:         * RFC 1323 PAWS: If we have a timestamp reply on this segment
                    826:         * and it's less than ts_recent, drop it.
                    827:         */
                    828: /*     if (ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent &&
                    829:  *         TSTMP_LT(ts_val, tp->ts_recent)) {
                    830:  *
                    831:  */            /* Check to see if ts_recent is over 24 days old.  */
                    832: /*             if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) {
                    833:  */                    /*
                    834:  *                      * Invalidate ts_recent.  If this segment updates
                    835:  *                      * ts_recent, the age will be reset later and ts_recent
                    836:  *                      * will get a valid value.  If it does not, setting
                    837:  *                      * ts_recent to zero will at least satisfy the
                    838:  *                      * requirement that zero be placed in the timestamp
                    839:  *                      * echo reply when ts_recent isn't valid.  The
                    840:  *                      * age isn't reset until we get a valid ts_recent
                    841:  *                      * because we don't want out-of-order segments to be
                    842:  *                      * dropped when ts_recent is old.
                    843:  *                      */
                    844: /*                     tp->ts_recent = 0;
                    845:  *             } else {
                    846:  *                     tcpstat.tcps_rcvduppack++;
                    847:  *                     tcpstat.tcps_rcvdupbyte += ti->ti_len;
                    848:  *                     tcpstat.tcps_pawsdrop++;
                    849:  *                     goto dropafterack;
                    850:  *             }
                    851:  *     }
                    852:  */
                    853: 
                    854:        todrop = tp->rcv_nxt - ti->ti_seq;
                    855:        if (todrop > 0) {
                    856:                if (tiflags & TH_SYN) {
                    857:                        tiflags &= ~TH_SYN;
                    858:                        ti->ti_seq++;
1.1.1.3   root      859:                        if (ti->ti_urp > 1)
1.1       root      860:                                ti->ti_urp--;
                    861:                        else
                    862:                                tiflags &= ~TH_URG;
                    863:                        todrop--;
                    864:                }
                    865:                /*
                    866:                 * Following if statement from Stevens, vol. 2, p. 960.
                    867:                 */
                    868:                if (todrop > ti->ti_len
                    869:                    || (todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) {
                    870:                        /*
                    871:                         * Any valid FIN must be to the left of the window.
                    872:                         * At this point the FIN must be a duplicate or out
                    873:                         * of sequence; drop it.
                    874:                         */
                    875:                        tiflags &= ~TH_FIN;
1.1.1.3   root      876: 
1.1       root      877:                        /*
                    878:                         * Send an ACK to resynchronize and drop any data.
                    879:                         * But keep on processing for RST or ACK.
                    880:                         */
                    881:                        tp->t_flags |= TF_ACKNOW;
                    882:                        todrop = ti->ti_len;
1.1.1.3   root      883:                        STAT(tcpstat.tcps_rcvduppack++);
                    884:                        STAT(tcpstat.tcps_rcvdupbyte += todrop);
1.1       root      885:                } else {
1.1.1.3   root      886:                        STAT(tcpstat.tcps_rcvpartduppack++);
                    887:                        STAT(tcpstat.tcps_rcvpartdupbyte += todrop);
1.1       root      888:                }
                    889:                m_adj(m, todrop);
                    890:                ti->ti_seq += todrop;
                    891:                ti->ti_len -= todrop;
                    892:                if (ti->ti_urp > todrop)
                    893:                        ti->ti_urp -= todrop;
                    894:                else {
                    895:                        tiflags &= ~TH_URG;
                    896:                        ti->ti_urp = 0;
                    897:                }
                    898:        }
                    899:        /*
                    900:         * If new data are received on a connection after the
                    901:         * user processes are gone, then RST the other end.
                    902:         */
                    903:        if ((so->so_state & SS_NOFDREF) &&
                    904:            tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) {
                    905:                tp = tcp_close(tp);
1.1.1.3   root      906:                STAT(tcpstat.tcps_rcvafterclose++);
1.1       root      907:                goto dropwithreset;
                    908:        }
                    909: 
                    910:        /*
                    911:         * If segment ends after window, drop trailing data
                    912:         * (and PUSH and FIN); if nothing left, just ACK.
                    913:         */
                    914:        todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd);
                    915:        if (todrop > 0) {
1.1.1.3   root      916:                STAT(tcpstat.tcps_rcvpackafterwin++);
1.1       root      917:                if (todrop >= ti->ti_len) {
1.1.1.3   root      918:                        STAT(tcpstat.tcps_rcvbyteafterwin += ti->ti_len);
1.1       root      919:                        /*
                    920:                         * If a new connection request is received
                    921:                         * while in TIME_WAIT, drop the old connection
                    922:                         * and start over if the sequence numbers
                    923:                         * are above the previous ones.
                    924:                         */
                    925:                        if (tiflags & TH_SYN &&
                    926:                            tp->t_state == TCPS_TIME_WAIT &&
                    927:                            SEQ_GT(ti->ti_seq, tp->rcv_nxt)) {
                    928:                                iss = tp->rcv_nxt + TCP_ISSINCR;
                    929:                                tp = tcp_close(tp);
                    930:                                goto findso;
                    931:                        }
                    932:                        /*
                    933:                         * If window is closed can only take segments at
                    934:                         * window edge, and have to drop data and PUSH from
                    935:                         * incoming segments.  Continue processing, but
                    936:                         * remember to ack.  Otherwise, drop segment
                    937:                         * and ack.
                    938:                         */
                    939:                        if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) {
                    940:                                tp->t_flags |= TF_ACKNOW;
1.1.1.3   root      941:                                STAT(tcpstat.tcps_rcvwinprobe++);
1.1       root      942:                        } else
                    943:                                goto dropafterack;
                    944:                } else
1.1.1.3   root      945:                        STAT(tcpstat.tcps_rcvbyteafterwin += todrop);
1.1       root      946:                m_adj(m, -todrop);
                    947:                ti->ti_len -= todrop;
                    948:                tiflags &= ~(TH_PUSH|TH_FIN);
                    949:        }
                    950: 
                    951:        /*
                    952:         * If last ACK falls within this segment's sequence numbers,
                    953:         * record its timestamp.
                    954:         */
                    955: /*     if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) &&
                    956:  *         SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len +
                    957:  *                ((tiflags & (TH_SYN|TH_FIN)) != 0))) {
                    958:  *             tp->ts_recent_age = tcp_now;
                    959:  *             tp->ts_recent = ts_val;
                    960:  *     }
                    961:  */
                    962: 
                    963:        /*
                    964:         * If the RST bit is set examine the state:
                    965:         *    SYN_RECEIVED STATE:
                    966:         *      If passive open, return to LISTEN state.
                    967:         *      If active open, inform user that connection was refused.
                    968:         *    ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
                    969:         *      Inform user that connection was reset, and close tcb.
                    970:         *    CLOSING, LAST_ACK, TIME_WAIT STATES
                    971:         *      Close the tcb.
                    972:         */
                    973:        if (tiflags&TH_RST) switch (tp->t_state) {
                    974: 
                    975:        case TCPS_SYN_RECEIVED:
                    976: /*             so->so_error = ECONNREFUSED; */
                    977:                goto close;
                    978: 
                    979:        case TCPS_ESTABLISHED:
                    980:        case TCPS_FIN_WAIT_1:
                    981:        case TCPS_FIN_WAIT_2:
                    982:        case TCPS_CLOSE_WAIT:
                    983: /*             so->so_error = ECONNRESET; */
                    984:        close:
                    985:                tp->t_state = TCPS_CLOSED;
1.1.1.3   root      986:                STAT(tcpstat.tcps_drops++);
1.1       root      987:                tp = tcp_close(tp);
                    988:                goto drop;
                    989: 
                    990:        case TCPS_CLOSING:
                    991:        case TCPS_LAST_ACK:
                    992:        case TCPS_TIME_WAIT:
                    993:                tp = tcp_close(tp);
                    994:                goto drop;
                    995:        }
                    996: 
                    997:        /*
                    998:         * If a SYN is in the window, then this is an
                    999:         * error and we send an RST and drop the connection.
                   1000:         */
                   1001:        if (tiflags & TH_SYN) {
                   1002:                tp = tcp_drop(tp,0);
                   1003:                goto dropwithreset;
                   1004:        }
                   1005: 
                   1006:        /*
                   1007:         * If the ACK bit is off we drop the segment and return.
                   1008:         */
                   1009:        if ((tiflags & TH_ACK) == 0) goto drop;
                   1010: 
                   1011:        /*
                   1012:         * Ack processing.
                   1013:         */
                   1014:        switch (tp->t_state) {
                   1015:        /*
                   1016:         * In SYN_RECEIVED state if the ack ACKs our SYN then enter
                   1017:         * ESTABLISHED state and continue processing, otherwise
                   1018:         * send an RST.  una<=ack<=max
                   1019:         */
                   1020:        case TCPS_SYN_RECEIVED:
                   1021: 
                   1022:                if (SEQ_GT(tp->snd_una, ti->ti_ack) ||
                   1023:                    SEQ_GT(ti->ti_ack, tp->snd_max))
                   1024:                        goto dropwithreset;
1.1.1.3   root     1025:                STAT(tcpstat.tcps_connects++);
1.1       root     1026:                tp->t_state = TCPS_ESTABLISHED;
1.1.1.3   root     1027:                /*
                   1028:                 * The sent SYN is ack'ed with our sequence number +1
                   1029:                 * The first data byte already in the buffer will get
1.1       root     1030:                 * lost if no correction is made.  This is only needed for
                   1031:                 * SS_CTL since the buffer is empty otherwise.
1.1.1.3   root     1032:                 * tp->snd_una++; or:
1.1       root     1033:                 */
                   1034:                tp->snd_una=ti->ti_ack;
                   1035:                if (so->so_state & SS_CTL) {
                   1036:                  /* So tcp_ctl reports the right state */
                   1037:                  ret = tcp_ctl(so);
                   1038:                  if (ret == 1) {
                   1039:                    soisfconnected(so);
                   1040:                    so->so_state &= ~SS_CTL;   /* success XXX */
                   1041:                  } else if (ret == 2) {
                   1042:                    so->so_state = SS_NOFDREF; /* CTL_CMD */
                   1043:                  } else {
                   1044:                    needoutput = 1;
                   1045:                    tp->t_state = TCPS_FIN_WAIT_1;
                   1046:                  }
                   1047:                } else {
                   1048:                  soisfconnected(so);
                   1049:                }
1.1.1.3   root     1050: 
1.1       root     1051:                /* Do window scaling? */
                   1052: /*             if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
                   1053:  *                     (TF_RCVD_SCALE|TF_REQ_SCALE)) {
                   1054:  *                     tp->snd_scale = tp->requested_s_scale;
                   1055:  *                     tp->rcv_scale = tp->request_r_scale;
                   1056:  *             }
                   1057:  */
                   1058:                (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0);
                   1059:                tp->snd_wl1 = ti->ti_seq - 1;
                   1060:                /* Avoid ack processing; snd_una==ti_ack  =>  dup ack */
                   1061:                goto synrx_to_est;
                   1062:                /* fall into ... */
                   1063: 
                   1064:        /*
                   1065:         * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
                   1066:         * ACKs.  If the ack is in the range
                   1067:         *      tp->snd_una < ti->ti_ack <= tp->snd_max
                   1068:         * then advance tp->snd_una to ti->ti_ack and drop
                   1069:         * data from the retransmission queue.  If this ACK reflects
                   1070:         * more up to date window information we update our window information.
                   1071:         */
                   1072:        case TCPS_ESTABLISHED:
                   1073:        case TCPS_FIN_WAIT_1:
                   1074:        case TCPS_FIN_WAIT_2:
                   1075:        case TCPS_CLOSE_WAIT:
                   1076:        case TCPS_CLOSING:
                   1077:        case TCPS_LAST_ACK:
                   1078:        case TCPS_TIME_WAIT:
                   1079: 
                   1080:                if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) {
                   1081:                        if (ti->ti_len == 0 && tiwin == tp->snd_wnd) {
1.1.1.3   root     1082:                          STAT(tcpstat.tcps_rcvdupack++);
1.1       root     1083:                          DEBUG_MISC((dfd," dup ack  m = %lx  so = %lx \n",
                   1084:                                      (long )m, (long )so));
                   1085:                                /*
                   1086:                                 * If we have outstanding data (other than
                   1087:                                 * a window probe), this is a completely
                   1088:                                 * duplicate ack (ie, window info didn't
                   1089:                                 * change), the ack is the biggest we've
                   1090:                                 * seen and we've seen exactly our rexmt
                   1091:                                 * threshold of them, assume a packet
                   1092:                                 * has been dropped and retransmit it.
                   1093:                                 * Kludge snd_nxt & the congestion
                   1094:                                 * window so we send only this one
                   1095:                                 * packet.
                   1096:                                 *
                   1097:                                 * We know we're losing at the current
                   1098:                                 * window size so do congestion avoidance
                   1099:                                 * (set ssthresh to half the current window
                   1100:                                 * and pull our congestion window back to
                   1101:                                 * the new ssthresh).
                   1102:                                 *
                   1103:                                 * Dup acks mean that packets have left the
1.1.1.3   root     1104:                                 * network (they're now cached at the receiver)
1.1       root     1105:                                 * so bump cwnd by the amount in the receiver
                   1106:                                 * to keep a constant cwnd packets in the
                   1107:                                 * network.
                   1108:                                 */
                   1109:                                if (tp->t_timer[TCPT_REXMT] == 0 ||
                   1110:                                    ti->ti_ack != tp->snd_una)
                   1111:                                        tp->t_dupacks = 0;
1.1.1.3   root     1112:                                else if (++tp->t_dupacks == TCPREXMTTHRESH) {
1.1       root     1113:                                        tcp_seq onxt = tp->snd_nxt;
                   1114:                                        u_int win =
                   1115:                                            min(tp->snd_wnd, tp->snd_cwnd) / 2 /
                   1116:                                                tp->t_maxseg;
                   1117: 
                   1118:                                        if (win < 2)
                   1119:                                                win = 2;
                   1120:                                        tp->snd_ssthresh = win * tp->t_maxseg;
                   1121:                                        tp->t_timer[TCPT_REXMT] = 0;
                   1122:                                        tp->t_rtt = 0;
                   1123:                                        tp->snd_nxt = ti->ti_ack;
                   1124:                                        tp->snd_cwnd = tp->t_maxseg;
                   1125:                                        (void) tcp_output(tp);
                   1126:                                        tp->snd_cwnd = tp->snd_ssthresh +
                   1127:                                               tp->t_maxseg * tp->t_dupacks;
                   1128:                                        if (SEQ_GT(onxt, tp->snd_nxt))
                   1129:                                                tp->snd_nxt = onxt;
                   1130:                                        goto drop;
1.1.1.3   root     1131:                                } else if (tp->t_dupacks > TCPREXMTTHRESH) {
1.1       root     1132:                                        tp->snd_cwnd += tp->t_maxseg;
                   1133:                                        (void) tcp_output(tp);
                   1134:                                        goto drop;
                   1135:                                }
                   1136:                        } else
                   1137:                                tp->t_dupacks = 0;
                   1138:                        break;
                   1139:                }
                   1140:        synrx_to_est:
                   1141:                /*
                   1142:                 * If the congestion window was inflated to account
                   1143:                 * for the other side's cached packets, retract it.
                   1144:                 */
1.1.1.3   root     1145:                if (tp->t_dupacks > TCPREXMTTHRESH &&
1.1       root     1146:                    tp->snd_cwnd > tp->snd_ssthresh)
                   1147:                        tp->snd_cwnd = tp->snd_ssthresh;
                   1148:                tp->t_dupacks = 0;
                   1149:                if (SEQ_GT(ti->ti_ack, tp->snd_max)) {
1.1.1.3   root     1150:                        STAT(tcpstat.tcps_rcvacktoomuch++);
1.1       root     1151:                        goto dropafterack;
                   1152:                }
                   1153:                acked = ti->ti_ack - tp->snd_una;
1.1.1.3   root     1154:                STAT(tcpstat.tcps_rcvackpack++);
                   1155:                STAT(tcpstat.tcps_rcvackbyte += acked);
1.1       root     1156: 
                   1157:                /*
                   1158:                 * If we have a timestamp reply, update smoothed
                   1159:                 * round trip time.  If no timestamp is present but
                   1160:                 * transmit timer is running and timed sequence
                   1161:                 * number was acked, update smoothed round trip time.
                   1162:                 * Since we now have an rtt measurement, cancel the
                   1163:                 * timer backoff (cf., Phil Karn's retransmit alg.).
                   1164:                 * Recompute the initial retransmit timer.
                   1165:                 */
                   1166: /*             if (ts_present)
                   1167:  *                     tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
                   1168:  *             else
1.1.1.3   root     1169:  */
1.1       root     1170:                     if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))
                   1171:                        tcp_xmit_timer(tp,tp->t_rtt);
                   1172: 
                   1173:                /*
                   1174:                 * If all outstanding data is acked, stop retransmit
                   1175:                 * timer and remember to restart (more output or persist).
                   1176:                 * If there is more data to be acked, restart retransmit
                   1177:                 * timer, using current (possibly backed-off) value.
                   1178:                 */
                   1179:                if (ti->ti_ack == tp->snd_max) {
                   1180:                        tp->t_timer[TCPT_REXMT] = 0;
                   1181:                        needoutput = 1;
                   1182:                } else if (tp->t_timer[TCPT_PERSIST] == 0)
                   1183:                        tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
                   1184:                /*
                   1185:                 * When new data is acked, open the congestion window.
                   1186:                 * If the window gives us less than ssthresh packets
                   1187:                 * in flight, open exponentially (maxseg per packet).
                   1188:                 * Otherwise open linearly: maxseg per window
                   1189:                 * (maxseg^2 / cwnd per packet).
                   1190:                 */
                   1191:                {
                   1192:                  register u_int cw = tp->snd_cwnd;
                   1193:                  register u_int incr = tp->t_maxseg;
                   1194: 
                   1195:                  if (cw > tp->snd_ssthresh)
                   1196:                    incr = incr * incr / cw;
                   1197:                  tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale);
                   1198:                }
                   1199:                if (acked > so->so_snd.sb_cc) {
                   1200:                        tp->snd_wnd -= so->so_snd.sb_cc;
                   1201:                        sbdrop(&so->so_snd, (int )so->so_snd.sb_cc);
                   1202:                        ourfinisacked = 1;
                   1203:                } else {
                   1204:                        sbdrop(&so->so_snd, acked);
                   1205:                        tp->snd_wnd -= acked;
                   1206:                        ourfinisacked = 0;
                   1207:                }
                   1208:                /*
                   1209:                 * XXX sowwakup is called when data is acked and there's room for
1.1.1.3   root     1210:                 * for more data... it should read() the socket
1.1       root     1211:                 */
                   1212: /*             if (so->so_snd.sb_flags & SB_NOTIFY)
                   1213:  *                     sowwakeup(so);
                   1214:  */
                   1215:                tp->snd_una = ti->ti_ack;
                   1216:                if (SEQ_LT(tp->snd_nxt, tp->snd_una))
                   1217:                        tp->snd_nxt = tp->snd_una;
                   1218: 
                   1219:                switch (tp->t_state) {
                   1220: 
                   1221:                /*
                   1222:                 * In FIN_WAIT_1 STATE in addition to the processing
                   1223:                 * for the ESTABLISHED state if our FIN is now acknowledged
                   1224:                 * then enter FIN_WAIT_2.
                   1225:                 */
                   1226:                case TCPS_FIN_WAIT_1:
                   1227:                        if (ourfinisacked) {
                   1228:                                /*
                   1229:                                 * If we can't receive any more
                   1230:                                 * data, then closing user can proceed.
                   1231:                                 * Starting the timer is contrary to the
                   1232:                                 * specification, but if we don't get a FIN
                   1233:                                 * we'll hang forever.
                   1234:                                 */
                   1235:                                if (so->so_state & SS_FCANTRCVMORE) {
                   1236:                                        soisfdisconnected(so);
1.1.1.3   root     1237:                                        tp->t_timer[TCPT_2MSL] = TCP_MAXIDLE;
1.1       root     1238:                                }
                   1239:                                tp->t_state = TCPS_FIN_WAIT_2;
                   1240:                        }
                   1241:                        break;
                   1242: 
                   1243:                /*
                   1244:                 * In CLOSING STATE in addition to the processing for
                   1245:                 * the ESTABLISHED state if the ACK acknowledges our FIN
                   1246:                 * then enter the TIME-WAIT state, otherwise ignore
                   1247:                 * the segment.
                   1248:                 */
                   1249:                case TCPS_CLOSING:
                   1250:                        if (ourfinisacked) {
                   1251:                                tp->t_state = TCPS_TIME_WAIT;
                   1252:                                tcp_canceltimers(tp);
                   1253:                                tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
                   1254:                                soisfdisconnected(so);
                   1255:                        }
                   1256:                        break;
                   1257: 
                   1258:                /*
                   1259:                 * In LAST_ACK, we may still be waiting for data to drain
                   1260:                 * and/or to be acked, as well as for the ack of our FIN.
                   1261:                 * If our FIN is now acknowledged, delete the TCB,
                   1262:                 * enter the closed state and return.
                   1263:                 */
                   1264:                case TCPS_LAST_ACK:
                   1265:                        if (ourfinisacked) {
                   1266:                                tp = tcp_close(tp);
                   1267:                                goto drop;
                   1268:                        }
                   1269:                        break;
                   1270: 
                   1271:                /*
                   1272:                 * In TIME_WAIT state the only thing that should arrive
                   1273:                 * is a retransmission of the remote FIN.  Acknowledge
                   1274:                 * it and restart the finack timer.
                   1275:                 */
                   1276:                case TCPS_TIME_WAIT:
                   1277:                        tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
                   1278:                        goto dropafterack;
                   1279:                }
                   1280:        } /* switch(tp->t_state) */
                   1281: 
                   1282: step6:
                   1283:        /*
                   1284:         * Update window information.
                   1285:         * Don't look at window if no ACK: TAC's send garbage on first SYN.
                   1286:         */
                   1287:        if ((tiflags & TH_ACK) &&
1.1.1.3   root     1288:            (SEQ_LT(tp->snd_wl1, ti->ti_seq) ||
1.1       root     1289:            (tp->snd_wl1 == ti->ti_seq && (SEQ_LT(tp->snd_wl2, ti->ti_ack) ||
                   1290:            (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) {
                   1291:                /* keep track of pure window updates */
                   1292:                if (ti->ti_len == 0 &&
                   1293:                    tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd)
1.1.1.3   root     1294:                        STAT(tcpstat.tcps_rcvwinupd++);
1.1       root     1295:                tp->snd_wnd = tiwin;
                   1296:                tp->snd_wl1 = ti->ti_seq;
                   1297:                tp->snd_wl2 = ti->ti_ack;
                   1298:                if (tp->snd_wnd > tp->max_sndwnd)
                   1299:                        tp->max_sndwnd = tp->snd_wnd;
                   1300:                needoutput = 1;
                   1301:        }
                   1302: 
                   1303:        /*
                   1304:         * Process segments with URG.
                   1305:         */
                   1306:        if ((tiflags & TH_URG) && ti->ti_urp &&
                   1307:            TCPS_HAVERCVDFIN(tp->t_state) == 0) {
                   1308:                /*
                   1309:                 * This is a kludge, but if we receive and accept
                   1310:                 * random urgent pointers, we'll crash in
                   1311:                 * soreceive.  It's hard to imagine someone
                   1312:                 * actually wanting to send this much urgent data.
                   1313:                 */
                   1314:                if (ti->ti_urp + so->so_rcv.sb_cc > so->so_rcv.sb_datalen) {
                   1315:                        ti->ti_urp = 0;
                   1316:                        tiflags &= ~TH_URG;
                   1317:                        goto dodata;
                   1318:                }
                   1319:                /*
                   1320:                 * If this segment advances the known urgent pointer,
                   1321:                 * then mark the data stream.  This should not happen
                   1322:                 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
1.1.1.3   root     1323:                 * a FIN has been received from the remote side.
1.1       root     1324:                 * In these states we ignore the URG.
                   1325:                 *
                   1326:                 * According to RFC961 (Assigned Protocols),
                   1327:                 * the urgent pointer points to the last octet
                   1328:                 * of urgent data.  We continue, however,
                   1329:                 * to consider it to indicate the first octet
1.1.1.3   root     1330:                 * of data past the urgent section as the original
1.1       root     1331:                 * spec states (in one of two places).
                   1332:                 */
                   1333:                if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) {
                   1334:                        tp->rcv_up = ti->ti_seq + ti->ti_urp;
                   1335:                        so->so_urgc =  so->so_rcv.sb_cc +
                   1336:                                (tp->rcv_up - tp->rcv_nxt); /* -1; */
                   1337:                        tp->rcv_up = ti->ti_seq + ti->ti_urp;
1.1.1.3   root     1338: 
1.1       root     1339:                }
                   1340:        } else
                   1341:                /*
                   1342:                 * If no out of band data is expected,
                   1343:                 * pull receive urgent pointer along
                   1344:                 * with the receive window.
                   1345:                 */
                   1346:                if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
                   1347:                        tp->rcv_up = tp->rcv_nxt;
                   1348: dodata:
                   1349: 
                   1350:        /*
                   1351:         * Process the segment text, merging it into the TCP sequencing queue,
                   1352:         * and arranging for acknowledgment of receipt if necessary.
                   1353:         * This process logically involves adjusting tp->rcv_wnd as data
                   1354:         * is presented to the user (this happens in tcp_usrreq.c,
                   1355:         * case PRU_RCVD).  If a FIN has already been received on this
                   1356:         * connection then we just ignore the text.
                   1357:         */
                   1358:        if ((ti->ti_len || (tiflags&TH_FIN)) &&
                   1359:            TCPS_HAVERCVDFIN(tp->t_state) == 0) {
                   1360:                TCP_REASS(tp, ti, m, so, tiflags);
                   1361:                /*
                   1362:                 * Note the amount of data that peer has sent into
                   1363:                 * our window, in order to estimate the sender's
                   1364:                 * buffer size.
                   1365:                 */
                   1366:                len = so->so_rcv.sb_datalen - (tp->rcv_adv - tp->rcv_nxt);
                   1367:        } else {
                   1368:                m_free(m);
                   1369:                tiflags &= ~TH_FIN;
                   1370:        }
                   1371: 
                   1372:        /*
                   1373:         * If FIN is received ACK the FIN and let the user know
                   1374:         * that the connection is closing.
                   1375:         */
                   1376:        if (tiflags & TH_FIN) {
                   1377:                if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
                   1378:                        /*
                   1379:                         * If we receive a FIN we can't send more data,
                   1380:                         * set it SS_FDRAIN
                   1381:                          * Shutdown the socket if there is no rx data in the
                   1382:                         * buffer.
                   1383:                         * soread() is called on completion of shutdown() and
                   1384:                         * will got to TCPS_LAST_ACK, and use tcp_output()
                   1385:                         * to send the FIN.
                   1386:                         */
                   1387: /*                     sofcantrcvmore(so); */
                   1388:                        sofwdrain(so);
1.1.1.3   root     1389: 
1.1       root     1390:                        tp->t_flags |= TF_ACKNOW;
                   1391:                        tp->rcv_nxt++;
                   1392:                }
                   1393:                switch (tp->t_state) {
                   1394: 
                   1395:                /*
                   1396:                 * In SYN_RECEIVED and ESTABLISHED STATES
                   1397:                 * enter the CLOSE_WAIT state.
                   1398:                 */
                   1399:                case TCPS_SYN_RECEIVED:
                   1400:                case TCPS_ESTABLISHED:
                   1401:                  if(so->so_emu == EMU_CTL)        /* no shutdown on socket */
                   1402:                    tp->t_state = TCPS_LAST_ACK;
1.1.1.3   root     1403:                  else
1.1       root     1404:                    tp->t_state = TCPS_CLOSE_WAIT;
                   1405:                  break;
                   1406: 
                   1407:                /*
                   1408:                 * If still in FIN_WAIT_1 STATE FIN has not been acked so
                   1409:                 * enter the CLOSING state.
                   1410:                 */
                   1411:                case TCPS_FIN_WAIT_1:
                   1412:                        tp->t_state = TCPS_CLOSING;
                   1413:                        break;
                   1414: 
                   1415:                /*
                   1416:                 * In FIN_WAIT_2 state enter the TIME_WAIT state,
1.1.1.3   root     1417:                 * starting the time-wait timer, turning off the other
1.1       root     1418:                 * standard timers.
                   1419:                 */
                   1420:                case TCPS_FIN_WAIT_2:
                   1421:                        tp->t_state = TCPS_TIME_WAIT;
                   1422:                        tcp_canceltimers(tp);
                   1423:                        tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
                   1424:                        soisfdisconnected(so);
                   1425:                        break;
                   1426: 
                   1427:                /*
                   1428:                 * In TIME_WAIT state restart the 2 MSL time_wait timer.
                   1429:                 */
                   1430:                case TCPS_TIME_WAIT:
                   1431:                        tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
                   1432:                        break;
                   1433:                }
                   1434:        }
                   1435: 
                   1436:        /*
                   1437:         * If this is a small packet, then ACK now - with Nagel
                   1438:         *      congestion avoidance sender won't send more until
                   1439:         *      he gets an ACK.
1.1.1.3   root     1440:         *
1.1       root     1441:         * See above.
                   1442:         */
                   1443: /*     if (ti->ti_len && (unsigned)ti->ti_len < tp->t_maxseg) {
                   1444:  */
                   1445: /*     if ((ti->ti_len && (unsigned)ti->ti_len < tp->t_maxseg &&
                   1446:  *             (so->so_iptos & IPTOS_LOWDELAY) == 0) ||
                   1447:  *            ((so->so_iptos & IPTOS_LOWDELAY) &&
                   1448:  *            ((struct tcpiphdr_2 *)ti)->first_char == (char)27)) {
                   1449:  */
                   1450:        if (ti->ti_len && (unsigned)ti->ti_len <= 5 &&
                   1451:            ((struct tcpiphdr_2 *)ti)->first_char == (char)27) {
                   1452:                tp->t_flags |= TF_ACKNOW;
                   1453:        }
                   1454: 
                   1455:        /*
                   1456:         * Return any desired output.
                   1457:         */
                   1458:        if (needoutput || (tp->t_flags & TF_ACKNOW)) {
                   1459:                (void) tcp_output(tp);
                   1460:        }
                   1461:        return;
                   1462: 
                   1463: dropafterack:
                   1464:        /*
                   1465:         * Generate an ACK dropping incoming segment if it occupies
                   1466:         * sequence space, where the ACK reflects our state.
                   1467:         */
                   1468:        if (tiflags & TH_RST)
                   1469:                goto drop;
                   1470:        m_freem(m);
                   1471:        tp->t_flags |= TF_ACKNOW;
                   1472:        (void) tcp_output(tp);
                   1473:        return;
                   1474: 
                   1475: dropwithreset:
                   1476:        /* reuses m if m!=NULL, m_free() unnecessary */
                   1477:        if (tiflags & TH_ACK)
                   1478:                tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST);
                   1479:        else {
                   1480:                if (tiflags & TH_SYN) ti->ti_len++;
                   1481:                tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0,
                   1482:                    TH_RST|TH_ACK);
                   1483:        }
                   1484: 
                   1485:        return;
                   1486: 
                   1487: drop:
                   1488:        /*
                   1489:         * Drop space held by incoming segment and return.
                   1490:         */
                   1491:        m_free(m);
                   1492: 
                   1493:        return;
                   1494: }
                   1495: 
                   1496:  /* , ts_present, ts_val, ts_ecr) */
                   1497: /*     int *ts_present;
                   1498:  *     u_int32_t *ts_val, *ts_ecr;
                   1499:  */
1.1.1.3   root     1500: static void
                   1501: tcp_dooptions(struct tcpcb *tp, u_char *cp, int cnt, struct tcpiphdr *ti)
1.1       root     1502: {
                   1503:        u_int16_t mss;
                   1504:        int opt, optlen;
                   1505: 
                   1506:        DEBUG_CALL("tcp_dooptions");
                   1507:        DEBUG_ARGS((dfd," tp = %lx  cnt=%i \n", (long )tp, cnt));
                   1508: 
                   1509:        for (; cnt > 0; cnt -= optlen, cp += optlen) {
                   1510:                opt = cp[0];
                   1511:                if (opt == TCPOPT_EOL)
                   1512:                        break;
                   1513:                if (opt == TCPOPT_NOP)
                   1514:                        optlen = 1;
                   1515:                else {
                   1516:                        optlen = cp[1];
                   1517:                        if (optlen <= 0)
                   1518:                                break;
                   1519:                }
                   1520:                switch (opt) {
                   1521: 
                   1522:                default:
                   1523:                        continue;
                   1524: 
                   1525:                case TCPOPT_MAXSEG:
                   1526:                        if (optlen != TCPOLEN_MAXSEG)
                   1527:                                continue;
                   1528:                        if (!(ti->ti_flags & TH_SYN))
                   1529:                                continue;
                   1530:                        memcpy((char *) &mss, (char *) cp + 2, sizeof(mss));
                   1531:                        NTOHS(mss);
                   1532:                        (void) tcp_mss(tp, mss);        /* sets t_maxseg */
                   1533:                        break;
                   1534: 
                   1535: /*             case TCPOPT_WINDOW:
                   1536:  *                     if (optlen != TCPOLEN_WINDOW)
                   1537:  *                             continue;
                   1538:  *                     if (!(ti->ti_flags & TH_SYN))
                   1539:  *                             continue;
                   1540:  *                     tp->t_flags |= TF_RCVD_SCALE;
                   1541:  *                     tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
                   1542:  *                     break;
                   1543:  */
                   1544: /*             case TCPOPT_TIMESTAMP:
                   1545:  *                     if (optlen != TCPOLEN_TIMESTAMP)
                   1546:  *                             continue;
                   1547:  *                     *ts_present = 1;
                   1548:  *                     memcpy((char *) ts_val, (char *)cp + 2, sizeof(*ts_val));
                   1549:  *                     NTOHL(*ts_val);
                   1550:  *                     memcpy((char *) ts_ecr, (char *)cp + 6, sizeof(*ts_ecr));
                   1551:  *                     NTOHL(*ts_ecr);
                   1552:  *
1.1.1.3   root     1553:  */                    /*
1.1       root     1554:  *                      * A timestamp received in a SYN makes
                   1555:  *                      * it ok to send timestamp requests and replies.
                   1556:  *                      */
                   1557: /*                     if (ti->ti_flags & TH_SYN) {
                   1558:  *                             tp->t_flags |= TF_RCVD_TSTMP;
                   1559:  *                             tp->ts_recent = *ts_val;
                   1560:  *                             tp->ts_recent_age = tcp_now;
                   1561:  *                     }
                   1562:  */                    break;
                   1563:                }
                   1564:        }
                   1565: }
                   1566: 
                   1567: 
                   1568: /*
                   1569:  * Pull out of band byte out of a segment so
                   1570:  * it doesn't appear in the user's data queue.
                   1571:  * It is still reflected in the segment length for
                   1572:  * sequencing purposes.
                   1573:  */
                   1574: 
                   1575: #ifdef notdef
                   1576: 
                   1577: void
                   1578: tcp_pulloutofband(so, ti, m)
                   1579:        struct socket *so;
                   1580:        struct tcpiphdr *ti;
                   1581:        register struct mbuf *m;
                   1582: {
                   1583:        int cnt = ti->ti_urp - 1;
1.1.1.3   root     1584: 
1.1       root     1585:        while (cnt >= 0) {
                   1586:                if (m->m_len > cnt) {
                   1587:                        char *cp = mtod(m, caddr_t) + cnt;
                   1588:                        struct tcpcb *tp = sototcpcb(so);
                   1589: 
                   1590:                        tp->t_iobc = *cp;
                   1591:                        tp->t_oobflags |= TCPOOB_HAVEDATA;
                   1592:                        memcpy(sp, cp+1, (unsigned)(m->m_len - cnt - 1));
                   1593:                        m->m_len--;
                   1594:                        return;
                   1595:                }
                   1596:                cnt -= m->m_len;
                   1597:                m = m->m_next; /* XXX WRONG! Fix it! */
                   1598:                if (m == 0)
                   1599:                        break;
                   1600:        }
                   1601:        panic("tcp_pulloutofband");
                   1602: }
                   1603: 
                   1604: #endif /* notdef */
                   1605: 
                   1606: /*
                   1607:  * Collect new round-trip time estimate
                   1608:  * and update averages and current timeout.
                   1609:  */
                   1610: 
1.1.1.3   root     1611: static void
                   1612: tcp_xmit_timer(register struct tcpcb *tp, int rtt)
1.1       root     1613: {
                   1614:        register short delta;
                   1615: 
                   1616:        DEBUG_CALL("tcp_xmit_timer");
                   1617:        DEBUG_ARG("tp = %lx", (long)tp);
                   1618:        DEBUG_ARG("rtt = %d", rtt);
1.1.1.3   root     1619: 
                   1620:        STAT(tcpstat.tcps_rttupdated++);
1.1       root     1621:        if (tp->t_srtt != 0) {
                   1622:                /*
                   1623:                 * srtt is stored as fixed point with 3 bits after the
                   1624:                 * binary point (i.e., scaled by 8).  The following magic
                   1625:                 * is equivalent to the smoothing algorithm in rfc793 with
                   1626:                 * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
                   1627:                 * point).  Adjust rtt to origin 0.
                   1628:                 */
                   1629:                delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT);
                   1630:                if ((tp->t_srtt += delta) <= 0)
                   1631:                        tp->t_srtt = 1;
                   1632:                /*
                   1633:                 * We accumulate a smoothed rtt variance (actually, a
                   1634:                 * smoothed mean difference), then set the retransmit
                   1635:                 * timer to smoothed rtt + 4 times the smoothed variance.
                   1636:                 * rttvar is stored as fixed point with 2 bits after the
                   1637:                 * binary point (scaled by 4).  The following is
                   1638:                 * equivalent to rfc793 smoothing with an alpha of .75
                   1639:                 * (rttvar = rttvar*3/4 + |delta| / 4).  This replaces
                   1640:                 * rfc793's wired-in beta.
                   1641:                 */
                   1642:                if (delta < 0)
                   1643:                        delta = -delta;
                   1644:                delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT);
                   1645:                if ((tp->t_rttvar += delta) <= 0)
                   1646:                        tp->t_rttvar = 1;
                   1647:        } else {
1.1.1.3   root     1648:                /*
1.1       root     1649:                 * No rtt measurement yet - use the unsmoothed rtt.
                   1650:                 * Set the variance to half the rtt (so our first
                   1651:                 * retransmit happens at 3*rtt).
                   1652:                 */
                   1653:                tp->t_srtt = rtt << TCP_RTT_SHIFT;
                   1654:                tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
                   1655:        }
                   1656:        tp->t_rtt = 0;
                   1657:        tp->t_rxtshift = 0;
                   1658: 
                   1659:        /*
                   1660:         * the retransmit should happen at rtt + 4 * rttvar.
                   1661:         * Because of the way we do the smoothing, srtt and rttvar
                   1662:         * will each average +1/2 tick of bias.  When we compute
                   1663:         * the retransmit timer, we want 1/2 tick of rounding and
                   1664:         * 1 extra tick because of +-1/2 tick uncertainty in the
                   1665:         * firing of the timer.  The bias will give us exactly the
                   1666:         * 1.5 tick we need.  But, because the bias is
                   1667:         * statistical, we have to test that we don't drop below
                   1668:         * the minimum feasible timer (which is 2 ticks).
                   1669:         */
                   1670:        TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
                   1671:            (short)tp->t_rttmin, TCPTV_REXMTMAX); /* XXX */
1.1.1.3   root     1672: 
1.1       root     1673:        /*
                   1674:         * We received an ack for a packet that wasn't retransmitted;
                   1675:         * it is probably safe to discard any error indications we've
                   1676:         * received recently.  This isn't quite right, but close enough
                   1677:         * for now (a route might have failed after we sent a segment,
                   1678:         * and the return path might not be symmetrical).
                   1679:         */
                   1680:        tp->t_softerror = 0;
                   1681: }
                   1682: 
                   1683: /*
                   1684:  * Determine a reasonable value for maxseg size.
                   1685:  * If the route is known, check route for mtu.
                   1686:  * If none, use an mss that can be handled on the outgoing
                   1687:  * interface without forcing IP to fragment; if bigger than
                   1688:  * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
                   1689:  * to utilize large mbufs.  If no route is found, route has no mtu,
                   1690:  * or the destination isn't local, use a default, hopefully conservative
                   1691:  * size (usually 512 or the default IP max size, but no more than the mtu
                   1692:  * of the interface), as we can't discover anything about intervening
                   1693:  * gateways or networks.  We also initialize the congestion/slow start
                   1694:  * window to be a single segment if the destination isn't local.
                   1695:  * While looking at the routing entry, we also initialize other path-dependent
                   1696:  * parameters from pre-set or cached values in the routing entry.
                   1697:  */
                   1698: 
                   1699: int
                   1700: tcp_mss(tp, offer)
                   1701:         register struct tcpcb *tp;
                   1702:         u_int offer;
                   1703: {
                   1704:        struct socket *so = tp->t_socket;
                   1705:        int mss;
1.1.1.3   root     1706: 
1.1       root     1707:        DEBUG_CALL("tcp_mss");
                   1708:        DEBUG_ARG("tp = %lx", (long)tp);
                   1709:        DEBUG_ARG("offer = %d", offer);
1.1.1.3   root     1710: 
                   1711:        mss = min(IF_MTU, IF_MRU) - sizeof(struct tcpiphdr);
1.1       root     1712:        if (offer)
                   1713:                mss = min(mss, offer);
                   1714:        mss = max(mss, 32);
                   1715:        if (mss < tp->t_maxseg || offer != 0)
                   1716:           tp->t_maxseg = mss;
1.1.1.3   root     1717: 
1.1       root     1718:        tp->snd_cwnd = mss;
1.1.1.3   root     1719: 
                   1720:        sbreserve(&so->so_snd, TCP_SNDSPACE + ((TCP_SNDSPACE % mss) ?
                   1721:                                                (mss - (TCP_SNDSPACE % mss)) :
                   1722:                                                0));
                   1723:        sbreserve(&so->so_rcv, TCP_RCVSPACE + ((TCP_RCVSPACE % mss) ?
                   1724:                                                (mss - (TCP_RCVSPACE % mss)) :
                   1725:                                                0));
                   1726: 
1.1       root     1727:        DEBUG_MISC((dfd, " returning mss = %d\n", mss));
1.1.1.3   root     1728: 
1.1       root     1729:        return mss;
                   1730: }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.