|
|
1.1 ! root 1: /* ! 2: * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. ! 3: * ! 4: * @APPLE_LICENSE_HEADER_START@ ! 5: * ! 6: * The contents of this file constitute Original Code as defined in and ! 7: * are subject to the Apple Public Source License Version 1.1 (the ! 8: * "License"). You may not use this file except in compliance with the ! 9: * License. Please obtain a copy of the License at ! 10: * http://www.apple.com/publicsource and read it before using this file. ! 11: * ! 12: * This Original Code and all software distributed under the License are ! 13: * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER ! 14: * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, ! 15: * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, ! 16: * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the ! 17: * License for the specific language governing rights and limitations ! 18: * under the License. ! 19: * ! 20: * @APPLE_LICENSE_HEADER_END@ ! 21: */ ! 22: /* ! 23: * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 ! 24: * The Regents of the University of California. All rights reserved. ! 25: * ! 26: * Redistribution and use in source and binary forms, with or without ! 27: * modification, are permitted provided that the following conditions ! 28: * are met: ! 29: * 1. Redistributions of source code must retain the above copyright ! 30: * notice, this list of conditions and the following disclaimer. ! 31: * 2. Redistributions in binary form must reproduce the above copyright ! 32: * notice, this list of conditions and the following disclaimer in the ! 33: * documentation and/or other materials provided with the distribution. ! 34: * 3. All advertising materials mentioning features or use of this software ! 35: * must display the following acknowledgement: ! 36: * This product includes software developed by the University of ! 37: * California, Berkeley and its contributors. ! 38: * 4. Neither the name of the University nor the names of its contributors ! 39: * may be used to endorse or promote products derived from this software ! 40: * without specific prior written permission. ! 41: * ! 42: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ! 43: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ! 44: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ! 45: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ! 46: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ! 47: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ! 48: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ! 49: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ! 50: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ! 51: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ! 52: * SUCH DAMAGE. ! 53: * ! 54: * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95 ! 55: */ ! 56: ! 57: #if ISFB31 ! 58: #include "opt_ipfw.h" /* for ipfw_fwd */ ! 59: #include "opt_tcpdebug.h" ! 60: #endif ! 61: ! 62: #include <sys/param.h> ! 63: #include <sys/systm.h> ! 64: #include <sys/kernel.h> ! 65: #include <sys/sysctl.h> ! 66: #include <sys/malloc.h> ! 67: #include <sys/mbuf.h> ! 68: #include <sys/proc.h> /* for proc0 declaration */ ! 69: #include <sys/protosw.h> ! 70: #include <sys/socket.h> ! 71: #include <sys/socketvar.h> ! 72: #include <sys/syslog.h> ! 73: ! 74: #include <kern/cpu_number.h> /* before tcp_seq.h, for tcp_random18() */ ! 75: ! 76: #include <net/if.h> ! 77: #include <net/route.h> ! 78: ! 79: #include <netinet/in.h> ! 80: #include <netinet/in_systm.h> ! 81: #include <netinet/ip.h> ! 82: #include <netinet/ip_icmp.h> /* for ICMP_BANDLIM */ ! 83: #include <netinet/in_pcb.h> ! 84: #include <netinet/ip_var.h> ! 85: #include <netinet/icmp_var.h> /* for ICMP_BANDLIM */ ! 86: #include <netinet/tcp.h> ! 87: #include <netinet/tcp_fsm.h> ! 88: #include <netinet/tcp_seq.h> ! 89: #include <netinet/tcp_timer.h> ! 90: #include <netinet/tcp_var.h> ! 91: #include <netinet/tcpip.h> ! 92: #if TCPDEBUG ! 93: #include <netinet/tcp_debug.h> ! 94: static struct tcpiphdr tcp_saveti; ! 95: #endif ! 96: ! 97: static int tcprexmtthresh = 3; ! 98: tcp_seq tcp_iss; ! 99: tcp_cc tcp_ccgen; ! 100: ! 101: struct tcpstat tcpstat; ! 102: SYSCTL_STRUCT(_net_inet_tcp, TCPCTL_STATS, stats, ! 103: CTLFLAG_RD, &tcpstat , tcpstat, ""); ! 104: ! 105: static int log_in_vain = 0; ! 106: SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW, ! 107: &log_in_vain, 0, ""); ! 108: ! 109: int tcp_delack_enabled = 1; ! 110: SYSCTL_INT(_net_inet_tcp, OID_AUTO, delayed_ack, CTLFLAG_RW, ! 111: &tcp_delack_enabled, 0, ""); ! 112: ! 113: u_long tcp_now; ! 114: struct inpcbhead tcb; ! 115: struct inpcbinfo tcbinfo; ! 116: ! 117: static void tcp_dooptions __P((struct tcpcb *, ! 118: u_char *, int, struct tcpiphdr *, struct tcpopt *)); ! 119: static void tcp_pulloutofband __P((struct socket *, ! 120: struct tcpiphdr *, struct mbuf *)); ! 121: static int tcp_reass __P((struct tcpcb *, struct tcpiphdr *, struct mbuf *)); ! 122: static void tcp_xmit_timer __P((struct tcpcb *, int)); ! 123: ! 124: ! 125: /* ! 126: * Insert segment ti into reassembly queue of tcp with ! 127: * control block tp. Return TH_FIN if reassembly now includes ! 128: * a segment with FIN. The macro form does the common case inline ! 129: * (segment is the next to be received on an established connection, ! 130: * and the queue is empty), avoiding linkage into and removal ! 131: * from the queue and repetition of various conversions. ! 132: * Set DELACK for segments received in order, but ack immediately ! 133: * when segments are out of order (so fast retransmit can work). ! 134: */ ! 135: #define TCP_REASS(tp, ti, m, so, flags) { \ ! 136: if ((ti)->ti_seq == (tp)->rcv_nxt && \ ! 137: (tp)->t_segq == NULL && \ ! 138: (tp)->t_state == TCPS_ESTABLISHED) { \ ! 139: if (tcp_delack_enabled) \ ! 140: tp->t_flags |= TF_DELACK; \ ! 141: else \ ! 142: tp->t_flags |= TF_ACKNOW; \ ! 143: (tp)->rcv_nxt += (ti)->ti_len; \ ! 144: flags = (ti)->ti_flags & TH_FIN; \ ! 145: tcpstat.tcps_rcvpack++;\ ! 146: tcpstat.tcps_rcvbyte += (ti)->ti_len;\ ! 147: sbappend(&(so)->so_rcv, (m)); \ ! 148: sorwakeup(so); \ ! 149: } else { \ ! 150: (flags) = tcp_reass((tp), (ti), (m)); \ ! 151: tp->t_flags |= TF_ACKNOW; \ ! 152: } \ ! 153: } ! 154: ! 155: static int ! 156: tcp_reass(tp, ti, m) ! 157: register struct tcpcb *tp; ! 158: register struct tcpiphdr *ti; ! 159: struct mbuf *m; ! 160: { ! 161: struct mbuf *q; ! 162: struct mbuf *p; ! 163: struct mbuf *nq; ! 164: struct socket *so = tp->t_inpcb->inp_socket; ! 165: int flags; ! 166: ! 167: #define GETTCP(m) ((struct tcpiphdr *)m->m_pkthdr.header) ! 168: ! 169: /* ! 170: * Call with ti==0 after become established to ! 171: * force pre-ESTABLISHED data up to user socket. ! 172: */ ! 173: if (ti == 0) ! 174: goto present; ! 175: ! 176: m->m_pkthdr.header = ti; ! 177: ! 178: /* ! 179: * Find a segment which begins after this one does. ! 180: */ ! 181: for (q = tp->t_segq, p = NULL; q; p = q, q = q->m_nextpkt) ! 182: if (SEQ_GT(GETTCP(q)->ti_seq, ti->ti_seq)) ! 183: break; ! 184: ! 185: /* ! 186: * If there is a preceding segment, it may provide some of ! 187: * our data already. If so, drop the data from the incoming ! 188: * segment. If it provides all of our data, drop us. ! 189: */ ! 190: if (p != NULL) { ! 191: register int i; ! 192: /* conversion to int (in i) handles seq wraparound */ ! 193: i = GETTCP(p)->ti_seq + GETTCP(p)->ti_len - ti->ti_seq; ! 194: if (i > 0) { ! 195: if (i >= ti->ti_len) { ! 196: tcpstat.tcps_rcvduppack++; ! 197: tcpstat.tcps_rcvdupbyte += ti->ti_len; ! 198: m_freem(m); ! 199: /* ! 200: * Try to present any queued data ! 201: * at the left window edge to the user. ! 202: * This is needed after the 3-WHS ! 203: * completes. ! 204: */ ! 205: goto present; /* ??? */ ! 206: } ! 207: m_adj(m, i); ! 208: ti->ti_len -= i; ! 209: ti->ti_seq += i; ! 210: } ! 211: } ! 212: tcpstat.tcps_rcvoopack++; ! 213: tcpstat.tcps_rcvoobyte += ti->ti_len; ! 214: ! 215: /* ! 216: * While we overlap succeeding segments trim them or, ! 217: * if they are completely covered, dequeue them. ! 218: */ ! 219: while (q) { ! 220: register int i = (ti->ti_seq + ti->ti_len) - GETTCP(q)->ti_seq; ! 221: if (i <= 0) ! 222: break; ! 223: if (i < GETTCP(q)->ti_len) { ! 224: GETTCP(q)->ti_seq += i; ! 225: GETTCP(q)->ti_len -= i; ! 226: m_adj(q, i); ! 227: break; ! 228: } ! 229: ! 230: nq = q->m_nextpkt; ! 231: if (p) ! 232: p->m_nextpkt = nq; ! 233: else ! 234: tp->t_segq = nq; ! 235: m_freem(q); ! 236: q = nq; ! 237: } ! 238: ! 239: if (p == NULL) { ! 240: m->m_nextpkt = tp->t_segq; ! 241: tp->t_segq = m; ! 242: } else { ! 243: m->m_nextpkt = p->m_nextpkt; ! 244: p->m_nextpkt = m; ! 245: } ! 246: ! 247: present: ! 248: /* ! 249: * Present data to user, advancing rcv_nxt through ! 250: * completed sequence space. ! 251: */ ! 252: if (!TCPS_HAVEESTABLISHED(tp->t_state)) ! 253: return (0); ! 254: q = tp->t_segq; ! 255: if (!q || GETTCP(q)->ti_seq != tp->rcv_nxt) ! 256: return (0); ! 257: do { ! 258: tp->rcv_nxt += GETTCP(q)->ti_len; ! 259: flags = GETTCP(q)->ti_flags & TH_FIN; ! 260: nq = q->m_nextpkt; ! 261: tp->t_segq = nq; ! 262: q->m_nextpkt = NULL; ! 263: if (so->so_state & SS_CANTRCVMORE) ! 264: m_freem(q); ! 265: else ! 266: sbappend(&so->so_rcv, q); ! 267: q = nq; ! 268: } while (q && GETTCP(q)->ti_seq == tp->rcv_nxt); ! 269: sorwakeup(so); ! 270: return (flags); ! 271: ! 272: #undef GETTCP ! 273: } ! 274: ! 275: /* ! 276: * TCP input routine, follows pages 65-76 of the ! 277: * protocol specification dated September, 1981 very closely. ! 278: */ ! 279: void ! 280: tcp_input(m, iphlen) ! 281: register struct mbuf *m; ! 282: int iphlen; ! 283: { ! 284: register struct tcpiphdr *ti; ! 285: register struct inpcb *inp; ! 286: u_char *optp = NULL; ! 287: int optlen = 0; ! 288: int len, tlen, off; ! 289: register struct tcpcb *tp = 0; ! 290: register int tiflags; ! 291: struct socket *so = 0; ! 292: int todrop, acked, ourfinisacked, needoutput = 0; ! 293: struct in_addr laddr; ! 294: int dropsocket = 0; ! 295: int iss = 0; ! 296: u_long tiwin; ! 297: struct tcpopt to; /* options in this segment */ ! 298: struct rmxp_tao *taop; /* pointer to our TAO cache entry */ ! 299: struct rmxp_tao tao_noncached; /* in case there's no cached entry */ ! 300: #if TCPDEBUG ! 301: short ostate = 0; ! 302: #endif ! 303: struct proc *proc0=current_proc(); ! 304: ! 305: bzero((char *)&to, sizeof(to)); ! 306: ! 307: tcpstat.tcps_rcvtotal++; ! 308: /* ! 309: * Get IP and TCP header together in first mbuf. ! 310: * Note: IP leaves IP header in first mbuf. ! 311: */ ! 312: ti = mtod(m, struct tcpiphdr *); ! 313: if (iphlen > sizeof (struct ip)) ! 314: ip_stripoptions(m, (struct mbuf *)0); ! 315: if (m->m_len < sizeof (struct tcpiphdr)) { ! 316: if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) { ! 317: tcpstat.tcps_rcvshort++; ! 318: return; ! 319: } ! 320: ti = mtod(m, struct tcpiphdr *); ! 321: } ! 322: ! 323: /* ! 324: * Checksum extended TCP header and data. ! 325: */ ! 326: tlen = ((struct ip *)ti)->ip_len; ! 327: len = sizeof (struct ip) + tlen; ! 328: bzero(ti->ti_x1, sizeof(ti->ti_x1)); ! 329: ti->ti_len = (u_short)tlen; ! 330: HTONS(ti->ti_len); ! 331: ti->ti_sum = in_cksum(m, len); ! 332: if (ti->ti_sum) { ! 333: tcpstat.tcps_rcvbadsum++; ! 334: goto drop; ! 335: } ! 336: ! 337: /* ! 338: * Check that TCP offset makes sense, ! 339: * pull out TCP options and adjust length. XXX ! 340: */ ! 341: off = ti->ti_off << 2; ! 342: if (off < sizeof (struct tcphdr) || off > tlen) { ! 343: tcpstat.tcps_rcvbadoff++; ! 344: goto drop; ! 345: } ! 346: tlen -= off; ! 347: ti->ti_len = tlen; ! 348: if (off > sizeof (struct tcphdr)) { ! 349: if (m->m_len < sizeof(struct ip) + off) { ! 350: if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) { ! 351: tcpstat.tcps_rcvshort++; ! 352: return; ! 353: } ! 354: ti = mtod(m, struct tcpiphdr *); ! 355: } ! 356: optlen = off - sizeof (struct tcphdr); ! 357: optp = mtod(m, u_char *) + sizeof (struct tcpiphdr); ! 358: } ! 359: tiflags = ti->ti_flags; ! 360: ! 361: /* ! 362: * Convert TCP protocol specific fields to host format. ! 363: */ ! 364: NTOHL(ti->ti_seq); ! 365: NTOHL(ti->ti_ack); ! 366: NTOHS(ti->ti_win); ! 367: NTOHS(ti->ti_urp); ! 368: ! 369: /* ! 370: * Drop TCP, IP headers and TCP options. ! 371: */ ! 372: m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); ! 373: m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); ! 374: ! 375: /* ! 376: * Locate pcb for segment. ! 377: */ ! 378: findpcb: ! 379: #if IPFIREWALL_FORWARD ! 380: if (ip_fw_fwd_addr != NULL) { ! 381: /* ! 382: * Diverted. Pretend to be the destination. ! 383: * already got one like this? ! 384: */ ! 385: inp = in_pcblookup_hash(&tcbinfo, ti->ti_src, ti->ti_sport, ! 386: ti->ti_dst, ti->ti_dport, 0); ! 387: if (!inp) { ! 388: /* ! 389: * No, then it's new. Try find the ambushing socket ! 390: */ ! 391: if (!ip_fw_fwd_addr->sin_port) { ! 392: inp = in_pcblookup_hash(&tcbinfo, ti->ti_src, ! 393: ti->ti_sport, ip_fw_fwd_addr->sin_addr, ! 394: ti->ti_dport, 1); ! 395: } else { ! 396: inp = in_pcblookup_hash(&tcbinfo, ! 397: ti->ti_src, ti->ti_sport, ! 398: ip_fw_fwd_addr->sin_addr, ! 399: ntohs(ip_fw_fwd_addr->sin_port), 1); ! 400: } ! 401: } ! 402: ip_fw_fwd_addr = NULL; ! 403: } else ! 404: #endif /* IPFIREWALL_FORWARD */ ! 405: ! 406: inp = in_pcblookup_hash(&tcbinfo, ti->ti_src, ti->ti_sport, ! 407: ti->ti_dst, ti->ti_dport, 1); ! 408: ! 409: /* ! 410: * If the state is CLOSED (i.e., TCB does not exist) then ! 411: * all data in the incoming segment is discarded. ! 412: * If the TCB exists but is in CLOSED state, it is embryonic, ! 413: * but should either do a listen or a connect soon. ! 414: */ ! 415: if (inp == NULL) { ! 416: if (log_in_vain && tiflags & TH_SYN) { ! 417: char buf[4*sizeof "123"]; ! 418: ! 419: strcpy(buf, inet_ntoa(ti->ti_dst)); ! 420: log(LOG_INFO, ! 421: "Connection attempt to TCP %s:%d from %s:%d\n", ! 422: buf, ntohs(ti->ti_dport), inet_ntoa(ti->ti_src), ! 423: ntohs(ti->ti_sport)); ! 424: } ! 425: #if ICMP_BANDLIM ! 426: if (badport_bandlim(1) < 0) ! 427: goto drop; ! 428: #endif ! 429: goto dropwithreset; ! 430: } ! 431: tp = intotcpcb(inp); ! 432: if (tp == 0) ! 433: goto dropwithreset; ! 434: if (tp->t_state == TCPS_CLOSED) ! 435: goto drop; ! 436: ! 437: /* Unscale the window into a 32-bit value. */ ! 438: if ((tiflags & TH_SYN) == 0) ! 439: tiwin = ti->ti_win << tp->snd_scale; ! 440: else ! 441: tiwin = ti->ti_win; ! 442: ! 443: so = inp->inp_socket; ! 444: if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) { ! 445: #if TCPDEBUG ! 446: if (so->so_options & SO_DEBUG) { ! 447: ostate = tp->t_state; ! 448: tcp_saveti = *ti; ! 449: } ! 450: #endif ! 451: if (so->so_options & SO_ACCEPTCONN) { ! 452: register struct tcpcb *tp0 = tp; ! 453: struct socket *so2; ! 454: if ((tiflags & (TH_RST|TH_ACK|TH_SYN)) != TH_SYN) { ! 455: /* ! 456: * Note: dropwithreset makes sure we don't ! 457: * send a RST in response to a RST. ! 458: */ ! 459: if (tiflags & TH_ACK) { ! 460: tcpstat.tcps_badsyn++; ! 461: goto dropwithreset; ! 462: } ! 463: goto drop; ! 464: } ! 465: so2 = sonewconn(so, 0); ! 466: if (so2 == 0) { ! 467: tcpstat.tcps_listendrop++; ! 468: so2 = sodropablereq(so); ! 469: if (so2) { ! 470: tcp_drop(sototcpcb(so2), ETIMEDOUT); ! 471: so2 = sonewconn(so, 0); ! 472: } ! 473: if (!so2) ! 474: goto drop; ! 475: } ! 476: so = so2; ! 477: /* ! 478: * This is ugly, but .... ! 479: * ! 480: * Mark socket as temporary until we're ! 481: * committed to keeping it. The code at ! 482: * ``drop'' and ``dropwithreset'' check the ! 483: * flag dropsocket to see if the temporary ! 484: * socket created here should be discarded. ! 485: * We mark the socket as discardable until ! 486: * we're committed to it below in TCPS_LISTEN. ! 487: */ ! 488: dropsocket++; ! 489: inp = (struct inpcb *)so->so_pcb; ! 490: inp->inp_laddr = ti->ti_dst; ! 491: inp->inp_lport = ti->ti_dport; ! 492: if (in_pcbinshash(inp) != 0) { ! 493: /* ! 494: * Undo the assignments above if we failed to put ! 495: * the PCB on the hash lists. ! 496: */ ! 497: inp->inp_laddr.s_addr = INADDR_ANY; ! 498: inp->inp_lport = 0; ! 499: goto drop; ! 500: } ! 501: inp->inp_options = ip_srcroute(); ! 502: tp = intotcpcb(inp); ! 503: tp->t_state = TCPS_LISTEN; ! 504: tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT); ! 505: ! 506: /* Compute proper scaling value from buffer space */ ! 507: while (tp->request_r_scale < TCP_MAX_WINSHIFT && ! 508: TCP_MAXWIN << tp->request_r_scale < so->so_rcv.sb_hiwat) ! 509: tp->request_r_scale++; ! 510: } ! 511: } ! 512: ! 513: /* ! 514: * Segment received on connection. ! 515: * Reset idle time and keep-alive timer. ! 516: */ ! 517: tp->t_idle = 0; ! 518: if (TCPS_HAVEESTABLISHED(tp->t_state)) ! 519: tp->t_timer[TCPT_KEEP] = tcp_keepidle; ! 520: ! 521: /* ! 522: * Process options if not in LISTEN state, ! 523: * else do it below (after getting remote address). ! 524: */ ! 525: if (tp->t_state != TCPS_LISTEN) ! 526: tcp_dooptions(tp, optp, optlen, ti, &to); ! 527: ! 528: /* ! 529: * Header prediction: check for the two common cases ! 530: * of a uni-directional data xfer. If the packet has ! 531: * no control flags, is in-sequence, the window didn't ! 532: * change and we're not retransmitting, it's a ! 533: * candidate. If the length is zero and the ack moved ! 534: * forward, we're the sender side of the xfer. Just ! 535: * free the data acked & wake any higher level process ! 536: * that was blocked waiting for space. If the length ! 537: * is non-zero and the ack didn't move, we're the ! 538: * receiver side. If we're getting packets in-order ! 539: * (the reassembly queue is empty), add the data to ! 540: * the socket buffer and note that we need a delayed ack. ! 541: * Make sure that the hidden state-flags are also off. ! 542: * Since we check for TCPS_ESTABLISHED above, it can only ! 543: * be TH_NEEDSYN. ! 544: */ ! 545: if (tp->t_state == TCPS_ESTABLISHED && ! 546: (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK && ! 547: ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) && ! 548: ((to.to_flag & TOF_TS) == 0 || ! 549: TSTMP_GEQ(to.to_tsval, tp->ts_recent)) && ! 550: /* ! 551: * Using the CC option is compulsory if once started: ! 552: * the segment is OK if no T/TCP was negotiated or ! 553: * if the segment has a CC option equal to CCrecv ! 554: */ ! 555: ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) != (TF_REQ_CC|TF_RCVD_CC) || ! 556: (to.to_flag & TOF_CC) != 0 && to.to_cc == tp->cc_recv) && ! 557: ti->ti_seq == tp->rcv_nxt && ! 558: tiwin && tiwin == tp->snd_wnd && ! 559: tp->snd_nxt == tp->snd_max) { ! 560: ! 561: /* ! 562: * If last ACK falls within this segment's sequence numbers, ! 563: * record the timestamp. ! 564: * NOTE that the test is modified according to the latest ! 565: * proposal of the [email protected] list (Braden 1993/04/26). ! 566: */ ! 567: if ((to.to_flag & TOF_TS) != 0 && ! 568: SEQ_LEQ(ti->ti_seq, tp->last_ack_sent)) { ! 569: tp->ts_recent_age = tcp_now; ! 570: tp->ts_recent = to.to_tsval; ! 571: } ! 572: ! 573: if (ti->ti_len == 0) { ! 574: if (SEQ_GT(ti->ti_ack, tp->snd_una) && ! 575: SEQ_LEQ(ti->ti_ack, tp->snd_max) && ! 576: tp->snd_cwnd >= tp->snd_wnd && ! 577: tp->t_dupacks < tcprexmtthresh) { ! 578: /* ! 579: * this is a pure ack for outstanding data. ! 580: */ ! 581: ++tcpstat.tcps_predack; ! 582: if ((to.to_flag & TOF_TS) != 0) ! 583: tcp_xmit_timer(tp, ! 584: tcp_now - to.to_tsecr + 1); ! 585: else if (tp->t_rtt && ! 586: SEQ_GT(ti->ti_ack, tp->t_rtseq)) ! 587: tcp_xmit_timer(tp, tp->t_rtt); ! 588: acked = ti->ti_ack - tp->snd_una; ! 589: tcpstat.tcps_rcvackpack++; ! 590: tcpstat.tcps_rcvackbyte += acked; ! 591: sbdrop(&so->so_snd, acked); ! 592: tp->snd_una = ti->ti_ack; ! 593: m_freem(m); ! 594: ! 595: /* ! 596: * If all outstanding data are acked, stop ! 597: * retransmit timer, otherwise restart timer ! 598: * using current (possibly backed-off) value. ! 599: * If process is waiting for space, ! 600: * wakeup/selwakeup/signal. If data ! 601: * are ready to send, let tcp_output ! 602: * decide between more output or persist. ! 603: */ ! 604: if (tp->snd_una == tp->snd_max) ! 605: tp->t_timer[TCPT_REXMT] = 0; ! 606: else if (tp->t_timer[TCPT_PERSIST] == 0) ! 607: tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ! 608: ! 609: sowwakeup(so); ! 610: if (so->so_snd.sb_cc) ! 611: (void) tcp_output(tp); ! 612: return; ! 613: } ! 614: } else if (ti->ti_ack == tp->snd_una && ! 615: tp->t_segq == NULL && ! 616: ti->ti_len <= sbspace(&so->so_rcv)) { ! 617: /* ! 618: * this is a pure, in-sequence data packet ! 619: * with nothing on the reassembly queue and ! 620: * we have enough buffer space to take it. ! 621: */ ! 622: ++tcpstat.tcps_preddat; ! 623: tp->rcv_nxt += ti->ti_len; ! 624: tcpstat.tcps_rcvpack++; ! 625: tcpstat.tcps_rcvbyte += ti->ti_len; ! 626: /* ! 627: * Add data to socket buffer. ! 628: */ ! 629: sbappend(&so->so_rcv, m); ! 630: sorwakeup(so); ! 631: if (tcp_delack_enabled) { ! 632: tp->t_flags |= TF_DELACK; ! 633: } else { ! 634: tp->t_flags |= TF_ACKNOW; ! 635: tcp_output(tp); ! 636: } ! 637: return; ! 638: } ! 639: } ! 640: ! 641: /* ! 642: * Calculate amount of space in receive window, ! 643: * and then do TCP input processing. ! 644: * Receive window is amount of space in rcv queue, ! 645: * but not less than advertised window. ! 646: */ ! 647: { int win; ! 648: ! 649: win = sbspace(&so->so_rcv); ! 650: if (win < 0) ! 651: win = 0; ! 652: tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt)); ! 653: } ! 654: ! 655: switch (tp->t_state) { ! 656: ! 657: /* ! 658: * If the state is LISTEN then ignore segment if it contains an RST. ! 659: * If the segment contains an ACK then it is bad and send a RST. ! 660: * If it does not contain a SYN then it is not interesting; drop it. ! 661: * If it is from this socket, drop it, it must be forged. ! 662: * Don't bother responding if the destination was a broadcast. ! 663: * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial ! 664: * tp->iss, and send a segment: ! 665: * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK> ! 666: * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. ! 667: * Fill in remote peer address fields if not previously specified. ! 668: * Enter SYN_RECEIVED state, and process any other fields of this ! 669: * segment in this state. ! 670: */ ! 671: case TCPS_LISTEN: { ! 672: register struct sockaddr_in *sin; ! 673: ! 674: if (tiflags & TH_RST) ! 675: goto drop; ! 676: if (tiflags & TH_ACK) ! 677: goto dropwithreset; ! 678: if ((tiflags & TH_SYN) == 0) ! 679: goto drop; ! 680: if ((ti->ti_dport == ti->ti_sport) && ! 681: (ti->ti_dst.s_addr == ti->ti_src.s_addr)) ! 682: goto drop; ! 683: /* ! 684: * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN ! 685: * in_broadcast() should never return true on a received ! 686: * packet with M_BCAST not set. ! 687: */ ! 688: if (m->m_flags & (M_BCAST|M_MCAST) || ! 689: IN_MULTICAST(ntohl(ti->ti_dst.s_addr))) ! 690: goto drop; ! 691: MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, ! 692: M_NOWAIT); ! 693: if (sin == NULL) ! 694: goto drop; ! 695: sin->sin_family = AF_INET; ! 696: sin->sin_len = sizeof(*sin); ! 697: sin->sin_addr = ti->ti_src; ! 698: sin->sin_port = ti->ti_sport; ! 699: bzero((caddr_t)sin->sin_zero, sizeof(sin->sin_zero)); ! 700: laddr = inp->inp_laddr; ! 701: if (inp->inp_laddr.s_addr == INADDR_ANY) ! 702: inp->inp_laddr = ti->ti_dst; ! 703: if (in_pcbconnect(inp, (struct sockaddr *)sin, proc0)) { ! 704: inp->inp_laddr = laddr; ! 705: FREE(sin, M_SONAME); ! 706: goto drop; ! 707: } ! 708: FREE(sin, M_SONAME); ! 709: tp->t_template = tcp_template(tp); ! 710: if (tp->t_template == 0) { ! 711: tp = tcp_drop(tp, ENOBUFS); ! 712: dropsocket = 0; /* socket is already gone */ ! 713: goto drop; ! 714: } ! 715: if ((taop = tcp_gettaocache(inp)) == NULL) { ! 716: taop = &tao_noncached; ! 717: bzero(taop, sizeof(*taop)); ! 718: } ! 719: tcp_dooptions(tp, optp, optlen, ti, &to); ! 720: if (iss) ! 721: tp->iss = iss; ! 722: else ! 723: tp->iss = tcp_iss; ! 724: tcp_iss += TCP_ISSINCR/4; ! 725: tp->irs = ti->ti_seq; ! 726: tcp_sendseqinit(tp); ! 727: tcp_rcvseqinit(tp); ! 728: /* ! 729: * Initialization of the tcpcb for transaction; ! 730: * set SND.WND = SEG.WND, ! 731: * initialize CCsend and CCrecv. ! 732: */ ! 733: tp->snd_wnd = tiwin; /* initial send-window */ ! 734: tp->cc_send = CC_INC(tcp_ccgen); ! 735: tp->cc_recv = to.to_cc; ! 736: /* ! 737: * Perform TAO test on incoming CC (SEG.CC) option, if any. ! 738: * - compare SEG.CC against cached CC from the same host, ! 739: * if any. ! 740: * - if SEG.CC > chached value, SYN must be new and is accepted ! 741: * immediately: save new CC in the cache, mark the socket ! 742: * connected, enter ESTABLISHED state, turn on flag to ! 743: * send a SYN in the next segment. ! 744: * A virtual advertised window is set in rcv_adv to ! 745: * initialize SWS prevention. Then enter normal segment ! 746: * processing: drop SYN, process data and FIN. ! 747: * - otherwise do a normal 3-way handshake. ! 748: */ ! 749: if ((to.to_flag & TOF_CC) != 0) { ! 750: if (((tp->t_flags & TF_NOPUSH) != 0) && ! 751: taop->tao_cc != 0 && CC_GT(to.to_cc, taop->tao_cc)) { ! 752: ! 753: taop->tao_cc = to.to_cc; ! 754: tp->t_state = TCPS_ESTABLISHED; ! 755: ! 756: /* ! 757: * If there is a FIN, or if there is data and the ! 758: * connection is local, then delay SYN,ACK(SYN) in ! 759: * the hope of piggy-backing it on a response ! 760: * segment. Otherwise must send ACK now in case ! 761: * the other side is slow starting. ! 762: */ ! 763: if (tcp_delack_enabled && ((tiflags & TH_FIN) || (ti->ti_len != 0 && ! 764: in_localaddr(inp->inp_faddr)))) ! 765: tp->t_flags |= (TF_DELACK | TF_NEEDSYN); ! 766: else ! 767: tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN); ! 768: ! 769: /* ! 770: * Limit the `virtual advertised window' to TCP_MAXWIN ! 771: * here. Even if we requested window scaling, it will ! 772: * become effective only later when our SYN is acked. ! 773: */ ! 774: tp->rcv_adv += min(tp->rcv_wnd, TCP_MAXWIN); ! 775: tcpstat.tcps_connects++; ! 776: soisconnected(so); ! 777: tp->t_timer[TCPT_KEEP] = tcp_keepinit; ! 778: dropsocket = 0; /* committed to socket */ ! 779: tcpstat.tcps_accepts++; ! 780: goto trimthenstep6; ! 781: } ! 782: /* else do standard 3-way handshake */ ! 783: } else { ! 784: /* ! 785: * No CC option, but maybe CC.NEW: ! 786: * invalidate cached value. ! 787: */ ! 788: taop->tao_cc = 0; ! 789: } ! 790: /* ! 791: * TAO test failed or there was no CC option, ! 792: * do a standard 3-way handshake. ! 793: */ ! 794: tp->t_flags |= TF_ACKNOW; ! 795: tp->t_state = TCPS_SYN_RECEIVED; ! 796: tp->t_timer[TCPT_KEEP] = tcp_keepinit; ! 797: dropsocket = 0; /* committed to socket */ ! 798: tcpstat.tcps_accepts++; ! 799: goto trimthenstep6; ! 800: } ! 801: ! 802: /* ! 803: * If the state is SYN_RECEIVED: ! 804: * if seg contains an ACK, but not for our SYN/ACK, send a RST. ! 805: */ ! 806: case TCPS_SYN_RECEIVED: ! 807: if ((tiflags & TH_ACK) && ! 808: (SEQ_LEQ(ti->ti_ack, tp->snd_una) || ! 809: SEQ_GT(ti->ti_ack, tp->snd_max))) ! 810: goto dropwithreset; ! 811: break; ! 812: ! 813: /* ! 814: * If the state is SYN_SENT: ! 815: * if seg contains an ACK, but not for our SYN, drop the input. ! 816: * if seg contains a RST, then drop the connection. ! 817: * if seg does not contain SYN, then drop it. ! 818: * Otherwise this is an acceptable SYN segment ! 819: * initialize tp->rcv_nxt and tp->irs ! 820: * if seg contains ack then advance tp->snd_una ! 821: * if SYN has been acked change to ESTABLISHED else SYN_RCVD state ! 822: * arrange for segment to be acked (eventually) ! 823: * continue processing rest of data/controls, beginning with URG ! 824: */ ! 825: case TCPS_SYN_SENT: ! 826: if ((taop = tcp_gettaocache(inp)) == NULL) { ! 827: taop = &tao_noncached; ! 828: bzero(taop, sizeof(*taop)); ! 829: } ! 830: ! 831: if ((tiflags & TH_ACK) && ! 832: (SEQ_LEQ(ti->ti_ack, tp->iss) || ! 833: SEQ_GT(ti->ti_ack, tp->snd_max))) { ! 834: /* ! 835: * If we have a cached CCsent for the remote host, ! 836: * hence we haven't just crashed and restarted, ! 837: * do not send a RST. This may be a retransmission ! 838: * from the other side after our earlier ACK was lost. ! 839: * Our new SYN, when it arrives, will serve as the ! 840: * needed ACK. ! 841: */ ! 842: if (taop->tao_ccsent != 0) ! 843: goto drop; ! 844: else ! 845: goto dropwithreset; ! 846: } ! 847: if (tiflags & TH_RST) { ! 848: if (tiflags & TH_ACK) { ! 849: tp = tcp_drop(tp, ECONNREFUSED); ! 850: postevent(so, 0, EV_RESET); ! 851: } ! 852: goto drop; ! 853: } ! 854: if ((tiflags & TH_SYN) == 0) ! 855: goto drop; ! 856: tp->snd_wnd = ti->ti_win; /* initial send window */ ! 857: tp->cc_recv = to.to_cc; /* foreign CC */ ! 858: ! 859: tp->irs = ti->ti_seq; ! 860: tcp_rcvseqinit(tp); ! 861: if (tiflags & TH_ACK) { ! 862: /* ! 863: * Our SYN was acked. If segment contains CC.ECHO ! 864: * option, check it to make sure this segment really ! 865: * matches our SYN. If not, just drop it as old ! 866: * duplicate, but send an RST if we're still playing ! 867: * by the old rules. If no CC.ECHO option, make sure ! 868: * we don't get fooled into using T/TCP. ! 869: */ ! 870: if (to.to_flag & TOF_CCECHO) { ! 871: if (tp->cc_send != to.to_ccecho) ! 872: if (taop->tao_ccsent != 0) ! 873: goto drop; ! 874: else ! 875: goto dropwithreset; ! 876: } else ! 877: tp->t_flags &= ~TF_RCVD_CC; ! 878: tcpstat.tcps_connects++; ! 879: soisconnected(so); ! 880: /* Do window scaling on this connection? */ ! 881: if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == ! 882: (TF_RCVD_SCALE|TF_REQ_SCALE)) { ! 883: tp->snd_scale = tp->requested_s_scale; ! 884: tp->rcv_scale = tp->request_r_scale; ! 885: } ! 886: /* Segment is acceptable, update cache if undefined. */ ! 887: if (taop->tao_ccsent == 0) ! 888: taop->tao_ccsent = to.to_ccecho; ! 889: ! 890: tp->rcv_adv += tp->rcv_wnd; ! 891: tp->snd_una++; /* SYN is acked */ ! 892: /* ! 893: * If there's data, delay ACK; if there's also a FIN ! 894: * ACKNOW will be turned on later. ! 895: */ ! 896: if (tcp_delack_enabled && ti->ti_len != 0) ! 897: tp->t_flags |= TF_DELACK; ! 898: else ! 899: tp->t_flags |= TF_ACKNOW; ! 900: /* ! 901: * Received <SYN,ACK> in SYN_SENT[*] state. ! 902: * Transitions: ! 903: * SYN_SENT --> ESTABLISHED ! 904: * SYN_SENT* --> FIN_WAIT_1 ! 905: */ ! 906: if (tp->t_flags & TF_NEEDFIN) { ! 907: tp->t_state = TCPS_FIN_WAIT_1; ! 908: tp->t_flags &= ~TF_NEEDFIN; ! 909: tiflags &= ~TH_SYN; ! 910: } else { ! 911: tp->t_state = TCPS_ESTABLISHED; ! 912: tp->t_timer[TCPT_KEEP] = tcp_keepidle; ! 913: } ! 914: } else { ! 915: /* ! 916: * Received initial SYN in SYN-SENT[*] state => simul- ! 917: * taneous open. If segment contains CC option and there is ! 918: * a cached CC, apply TAO test; if it succeeds, connection is ! 919: * half-synchronized. Otherwise, do 3-way handshake: ! 920: * SYN-SENT -> SYN-RECEIVED ! 921: * SYN-SENT* -> SYN-RECEIVED* ! 922: * If there was no CC option, clear cached CC value. ! 923: */ ! 924: tp->t_flags |= TF_ACKNOW; ! 925: tp->t_timer[TCPT_REXMT] = 0; ! 926: if (to.to_flag & TOF_CC) { ! 927: if (taop->tao_cc != 0 && ! 928: CC_GT(to.to_cc, taop->tao_cc)) { ! 929: /* ! 930: * update cache and make transition: ! 931: * SYN-SENT -> ESTABLISHED* ! 932: * SYN-SENT* -> FIN-WAIT-1* ! 933: */ ! 934: taop->tao_cc = to.to_cc; ! 935: if (tp->t_flags & TF_NEEDFIN) { ! 936: tp->t_state = TCPS_FIN_WAIT_1; ! 937: tp->t_flags &= ~TF_NEEDFIN; ! 938: } else { ! 939: tp->t_state = TCPS_ESTABLISHED; ! 940: tp->t_timer[TCPT_KEEP] = tcp_keepidle; ! 941: } ! 942: tp->t_flags |= TF_NEEDSYN; ! 943: } else ! 944: tp->t_state = TCPS_SYN_RECEIVED; ! 945: } else { ! 946: /* CC.NEW or no option => invalidate cache */ ! 947: taop->tao_cc = 0; ! 948: tp->t_state = TCPS_SYN_RECEIVED; ! 949: } ! 950: } ! 951: ! 952: trimthenstep6: ! 953: /* ! 954: * Advance ti->ti_seq to correspond to first data byte. ! 955: * If data, trim to stay within window, ! 956: * dropping FIN if necessary. ! 957: */ ! 958: ti->ti_seq++; ! 959: if (ti->ti_len > tp->rcv_wnd) { ! 960: todrop = ti->ti_len - tp->rcv_wnd; ! 961: m_adj(m, -todrop); ! 962: ti->ti_len = tp->rcv_wnd; ! 963: tiflags &= ~TH_FIN; ! 964: tcpstat.tcps_rcvpackafterwin++; ! 965: tcpstat.tcps_rcvbyteafterwin += todrop; ! 966: } ! 967: tp->snd_wl1 = ti->ti_seq - 1; ! 968: tp->rcv_up = ti->ti_seq; ! 969: /* ! 970: * Client side of transaction: already sent SYN and data. ! 971: * If the remote host used T/TCP to validate the SYN, ! 972: * our data will be ACK'd; if so, enter normal data segment ! 973: * processing in the middle of step 5, ack processing. ! 974: * Otherwise, goto step 6. ! 975: */ ! 976: if (tiflags & TH_ACK) ! 977: goto process_ACK; ! 978: goto step6; ! 979: /* ! 980: * If the state is LAST_ACK or CLOSING or TIME_WAIT: ! 981: * if segment contains a SYN and CC [not CC.NEW] option: ! 982: * if state == TIME_WAIT and connection duration > MSL, ! 983: * drop packet and send RST; ! 984: * ! 985: * if SEG.CC > CCrecv then is new SYN, and can implicitly ! 986: * ack the FIN (and data) in retransmission queue. ! 987: * Complete close and delete TCPCB. Then reprocess ! 988: * segment, hoping to find new TCPCB in LISTEN state; ! 989: * ! 990: * else must be old SYN; drop it. ! 991: * else do normal processing. ! 992: */ ! 993: case TCPS_LAST_ACK: ! 994: case TCPS_CLOSING: ! 995: case TCPS_TIME_WAIT: ! 996: if ((tiflags & TH_SYN) && ! 997: (to.to_flag & TOF_CC) && tp->cc_recv != 0) { ! 998: if (tp->t_state == TCPS_TIME_WAIT && ! 999: tp->t_duration > TCPTV_MSL) ! 1000: goto dropwithreset; ! 1001: if (CC_GT(to.to_cc, tp->cc_recv)) { ! 1002: tp = tcp_close(tp); ! 1003: goto findpcb; ! 1004: } ! 1005: else ! 1006: goto drop; ! 1007: } ! 1008: break; /* continue normal processing */ ! 1009: } ! 1010: ! 1011: /* ! 1012: * States other than LISTEN or SYN_SENT. ! 1013: * First check the RST flag and sequence number since reset segments ! 1014: * are exempt from the timestamp and connection count tests. This ! 1015: * fixes a bug introduced by the Stevens, vol. 2, p. 960 bugfix ! 1016: * below which allowed reset segments in half the sequence space ! 1017: * to fall though and be processed (which gives forged reset ! 1018: * segments with a random sequence number a 50 percent chance of ! 1019: * killing a connection). ! 1020: * Then check timestamp, if present. ! 1021: * Then check the connection count, if present. ! 1022: * Then check that at least some bytes of segment are within ! 1023: * receive window. If segment begins before rcv_nxt, ! 1024: * drop leading data (and SYN); if nothing left, just ack. ! 1025: * ! 1026: * ! 1027: * If the RST bit is set, check the sequence number to see ! 1028: * if this is a valid reset segment. ! 1029: * RFC 793 page 37: ! 1030: * In all states except SYN-SENT, all reset (RST) segments ! 1031: * are validated by checking their SEQ-fields. A reset is ! 1032: * valid if its sequence number is in the window. ! 1033: * Note: this does not take into account delayed ACKs, so ! 1034: * we should test against last_ack_sent instead of rcv_nxt. ! 1035: * Also, it does not make sense to allow reset segments with ! 1036: * sequence numbers greater than last_ack_sent to be processed ! 1037: * since these sequence numbers are just the acknowledgement ! 1038: * numbers in our outgoing packets being echoed back at us, ! 1039: * and these acknowledgement numbers are monotonically ! 1040: * increasing. ! 1041: * If we have multiple segments in flight, the intial reset ! 1042: * segment sequence numbers will be to the left of last_ack_sent, ! 1043: * but they will eventually catch up. ! 1044: * In any case, it never made sense to trim reset segments to ! 1045: * fit the receive window since RFC 1122 says: ! 1046: * 4.2.2.12 RST Segment: RFC-793 Section 3.4 ! 1047: * ! 1048: * A TCP SHOULD allow a received RST segment to include data. ! 1049: * ! 1050: * DISCUSSION ! 1051: * It has been suggested that a RST segment could contain ! 1052: * ASCII text that encoded and explained the cause of the ! 1053: * RST. No standard has yet been established for such ! 1054: * data. ! 1055: * ! 1056: * If the reset segment passes the sequence number test examine ! 1057: * the state: ! 1058: * SYN_RECEIVED STATE: ! 1059: * If passive open, return to LISTEN state. ! 1060: * If active open, inform user that connection was refused. ! 1061: * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: ! 1062: * Inform user that connection was reset, and close tcb. ! 1063: * CLOSING, LAST_ACK, TIME_WAIT STATES ! 1064: * Close the tcb. ! 1065: * TIME_WAIT state: ! 1066: * Drop the segment - see Stevens, vol. 2, p. 964 and ! 1067: * RFC 1337. ! 1068: */ ! 1069: if (tiflags & TH_RST) { ! 1070: if (tp->last_ack_sent == ti->ti_seq) { ! 1071: switch (tp->t_state) { ! 1072: ! 1073: case TCPS_SYN_RECEIVED: ! 1074: so->so_error = ECONNREFUSED; ! 1075: goto close; ! 1076: ! 1077: case TCPS_ESTABLISHED: ! 1078: case TCPS_FIN_WAIT_1: ! 1079: case TCPS_FIN_WAIT_2: ! 1080: case TCPS_CLOSE_WAIT: ! 1081: so->so_error = ECONNRESET; ! 1082: close: ! 1083: postevent(so, 0, EV_RESET); ! 1084: tp->t_state = TCPS_CLOSED; ! 1085: tcpstat.tcps_drops++; ! 1086: tp = tcp_close(tp); ! 1087: break; ! 1088: ! 1089: case TCPS_CLOSING: ! 1090: case TCPS_LAST_ACK: ! 1091: tp = tcp_close(tp); ! 1092: break; ! 1093: ! 1094: case TCPS_TIME_WAIT: ! 1095: break; ! 1096: } ! 1097: } ! 1098: goto drop; ! 1099: } ! 1100: ! 1101: /* ! 1102: * RFC 1323 PAWS: If we have a timestamp reply on this segment ! 1103: * and it's less than ts_recent, drop it. ! 1104: */ ! 1105: if ((to.to_flag & TOF_TS) != 0 && tp->ts_recent && ! 1106: TSTMP_LT(to.to_tsval, tp->ts_recent)) { ! 1107: ! 1108: /* Check to see if ts_recent is over 24 days old. */ ! 1109: if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) { ! 1110: /* ! 1111: * Invalidate ts_recent. If this segment updates ! 1112: * ts_recent, the age will be reset later and ts_recent ! 1113: * will get a valid value. If it does not, setting ! 1114: * ts_recent to zero will at least satisfy the ! 1115: * requirement that zero be placed in the timestamp ! 1116: * echo reply when ts_recent isn't valid. The ! 1117: * age isn't reset until we get a valid ts_recent ! 1118: * because we don't want out-of-order segments to be ! 1119: * dropped when ts_recent is old. ! 1120: */ ! 1121: tp->ts_recent = 0; ! 1122: } else { ! 1123: tcpstat.tcps_rcvduppack++; ! 1124: tcpstat.tcps_rcvdupbyte += ti->ti_len; ! 1125: tcpstat.tcps_pawsdrop++; ! 1126: goto dropafterack; ! 1127: } ! 1128: } ! 1129: ! 1130: /* ! 1131: * T/TCP mechanism ! 1132: * If T/TCP was negotiated and the segment doesn't have CC, ! 1133: * or if its CC is wrong then drop the segment. ! 1134: * RST segments do not have to comply with this. ! 1135: */ ! 1136: if ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) == (TF_REQ_CC|TF_RCVD_CC) && ! 1137: ((to.to_flag & TOF_CC) == 0 || tp->cc_recv != to.to_cc)) ! 1138: goto dropafterack; ! 1139: ! 1140: /* ! 1141: * In the SYN-RECEIVED state, validate that the packet belongs to ! 1142: * this connection before trimming the data to fit the receive ! 1143: * window. Check the sequence number versus IRS since we know ! 1144: * the sequence numbers haven't wrapped. This is a partial fix ! 1145: * for the "LAND" DoS attack. ! 1146: */ ! 1147: if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(ti->ti_seq, tp->irs)) ! 1148: goto dropwithreset; ! 1149: ! 1150: todrop = tp->rcv_nxt - ti->ti_seq; ! 1151: if (todrop > 0) { ! 1152: if (tiflags & TH_SYN) { ! 1153: tiflags &= ~TH_SYN; ! 1154: ti->ti_seq++; ! 1155: if (ti->ti_urp > 1) ! 1156: ti->ti_urp--; ! 1157: else ! 1158: tiflags &= ~TH_URG; ! 1159: todrop--; ! 1160: } ! 1161: /* ! 1162: * Following if statement from Stevens, vol. 2, p. 960. ! 1163: */ ! 1164: if (todrop > ti->ti_len ! 1165: || (todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) { ! 1166: /* ! 1167: * Any valid FIN must be to the left of the window. ! 1168: * At this point the FIN must be a duplicate or out ! 1169: * of sequence; drop it. ! 1170: */ ! 1171: tiflags &= ~TH_FIN; ! 1172: ! 1173: /* ! 1174: * Send an ACK to resynchronize and drop any data. ! 1175: * But keep on processing for RST or ACK. ! 1176: */ ! 1177: tp->t_flags |= TF_ACKNOW; ! 1178: todrop = ti->ti_len; ! 1179: tcpstat.tcps_rcvduppack++; ! 1180: tcpstat.tcps_rcvdupbyte += todrop; ! 1181: } else { ! 1182: tcpstat.tcps_rcvpartduppack++; ! 1183: tcpstat.tcps_rcvpartdupbyte += todrop; ! 1184: } ! 1185: m_adj(m, todrop); ! 1186: ti->ti_seq += todrop; ! 1187: ti->ti_len -= todrop; ! 1188: if (ti->ti_urp > todrop) ! 1189: ti->ti_urp -= todrop; ! 1190: else { ! 1191: tiflags &= ~TH_URG; ! 1192: ti->ti_urp = 0; ! 1193: } ! 1194: } ! 1195: ! 1196: /* ! 1197: * If new data are received on a connection after the ! 1198: * user processes are gone, then RST the other end. ! 1199: */ ! 1200: if ((so->so_state & SS_NOFDREF) && ! 1201: tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) { ! 1202: tp = tcp_close(tp); ! 1203: tcpstat.tcps_rcvafterclose++; ! 1204: goto dropwithreset; ! 1205: } ! 1206: ! 1207: /* ! 1208: * If segment ends after window, drop trailing data ! 1209: * (and PUSH and FIN); if nothing left, just ACK. ! 1210: */ ! 1211: todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd); ! 1212: if (todrop > 0) { ! 1213: tcpstat.tcps_rcvpackafterwin++; ! 1214: if (todrop >= ti->ti_len) { ! 1215: tcpstat.tcps_rcvbyteafterwin += ti->ti_len; ! 1216: /* ! 1217: * If a new connection request is received ! 1218: * while in TIME_WAIT, drop the old connection ! 1219: * and start over if the sequence numbers ! 1220: * are above the previous ones. ! 1221: */ ! 1222: if (tiflags & TH_SYN && ! 1223: tp->t_state == TCPS_TIME_WAIT && ! 1224: SEQ_GT(ti->ti_seq, tp->rcv_nxt)) { ! 1225: iss = tp->rcv_nxt + TCP_ISSINCR; ! 1226: tp = tcp_close(tp); ! 1227: goto findpcb; ! 1228: } ! 1229: /* ! 1230: * If window is closed can only take segments at ! 1231: * window edge, and have to drop data and PUSH from ! 1232: * incoming segments. Continue processing, but ! 1233: * remember to ack. Otherwise, drop segment ! 1234: * and ack. ! 1235: */ ! 1236: if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) { ! 1237: tp->t_flags |= TF_ACKNOW; ! 1238: tcpstat.tcps_rcvwinprobe++; ! 1239: } else ! 1240: goto dropafterack; ! 1241: } else ! 1242: tcpstat.tcps_rcvbyteafterwin += todrop; ! 1243: m_adj(m, -todrop); ! 1244: ti->ti_len -= todrop; ! 1245: tiflags &= ~(TH_PUSH|TH_FIN); ! 1246: } ! 1247: ! 1248: /* ! 1249: * If last ACK falls within this segment's sequence numbers, ! 1250: * record its timestamp. ! 1251: * NOTE that the test is modified according to the latest ! 1252: * proposal of the [email protected] list (Braden 1993/04/26). ! 1253: */ ! 1254: if ((to.to_flag & TOF_TS) != 0 && ! 1255: SEQ_LEQ(ti->ti_seq, tp->last_ack_sent)) { ! 1256: tp->ts_recent_age = tcp_now; ! 1257: tp->ts_recent = to.to_tsval; ! 1258: } ! 1259: ! 1260: /* ! 1261: * If a SYN is in the window, then this is an ! 1262: * error and we send an RST and drop the connection. ! 1263: */ ! 1264: if (tiflags & TH_SYN) { ! 1265: tp = tcp_drop(tp, ECONNRESET); ! 1266: postevent(so, 0, EV_RESET); ! 1267: goto dropwithreset; ! 1268: } ! 1269: ! 1270: /* ! 1271: * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN ! 1272: * flag is on (half-synchronized state), then queue data for ! 1273: * later processing; else drop segment and return. ! 1274: */ ! 1275: if ((tiflags & TH_ACK) == 0) { ! 1276: if (tp->t_state == TCPS_SYN_RECEIVED || ! 1277: (tp->t_flags & TF_NEEDSYN)) ! 1278: goto step6; ! 1279: else ! 1280: goto drop; ! 1281: } ! 1282: ! 1283: /* ! 1284: * Ack processing. ! 1285: */ ! 1286: switch (tp->t_state) { ! 1287: ! 1288: /* ! 1289: * In SYN_RECEIVED state, the ack ACKs our SYN, so enter ! 1290: * ESTABLISHED state and continue processing. ! 1291: * The ACK was checked above. ! 1292: */ ! 1293: case TCPS_SYN_RECEIVED: ! 1294: ! 1295: tcpstat.tcps_connects++; ! 1296: soisconnected(so); ! 1297: /* Do window scaling? */ ! 1298: if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == ! 1299: (TF_RCVD_SCALE|TF_REQ_SCALE)) { ! 1300: tp->snd_scale = tp->requested_s_scale; ! 1301: tp->rcv_scale = tp->request_r_scale; ! 1302: } ! 1303: /* ! 1304: * Upon successful completion of 3-way handshake, ! 1305: * update cache.CC if it was undefined, pass any queued ! 1306: * data to the user, and advance state appropriately. ! 1307: */ ! 1308: if ((taop = tcp_gettaocache(inp)) != NULL && ! 1309: taop->tao_cc == 0) ! 1310: taop->tao_cc = tp->cc_recv; ! 1311: ! 1312: /* ! 1313: * Make transitions: ! 1314: * SYN-RECEIVED -> ESTABLISHED ! 1315: * SYN-RECEIVED* -> FIN-WAIT-1 ! 1316: */ ! 1317: if (tp->t_flags & TF_NEEDFIN) { ! 1318: tp->t_state = TCPS_FIN_WAIT_1; ! 1319: tp->t_flags &= ~TF_NEEDFIN; ! 1320: } else { ! 1321: tp->t_state = TCPS_ESTABLISHED; ! 1322: tp->t_timer[TCPT_KEEP] = tcp_keepidle; ! 1323: } ! 1324: /* ! 1325: * If segment contains data or ACK, will call tcp_reass() ! 1326: * later; if not, do so now to pass queued data to user. ! 1327: */ ! 1328: if (ti->ti_len == 0 && (tiflags & TH_FIN) == 0) ! 1329: (void) tcp_reass(tp, (struct tcpiphdr *)0, ! 1330: (struct mbuf *)0); ! 1331: tp->snd_wl1 = ti->ti_seq - 1; ! 1332: /* fall into ... */ ! 1333: ! 1334: /* ! 1335: * In ESTABLISHED state: drop duplicate ACKs; ACK out of range ! 1336: * ACKs. If the ack is in the range ! 1337: * tp->snd_una < ti->ti_ack <= tp->snd_max ! 1338: * then advance tp->snd_una to ti->ti_ack and drop ! 1339: * data from the retransmission queue. If this ACK reflects ! 1340: * more up to date window information we update our window information. ! 1341: */ ! 1342: case TCPS_ESTABLISHED: ! 1343: case TCPS_FIN_WAIT_1: ! 1344: case TCPS_FIN_WAIT_2: ! 1345: case TCPS_CLOSE_WAIT: ! 1346: case TCPS_CLOSING: ! 1347: case TCPS_LAST_ACK: ! 1348: case TCPS_TIME_WAIT: ! 1349: ! 1350: if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) { ! 1351: if (ti->ti_len == 0 && tiwin == tp->snd_wnd) { ! 1352: tcpstat.tcps_rcvdupack++; ! 1353: /* ! 1354: * If we have outstanding data (other than ! 1355: * a window probe), this is a completely ! 1356: * duplicate ack (ie, window info didn't ! 1357: * change), the ack is the biggest we've ! 1358: * seen and we've seen exactly our rexmt ! 1359: * threshhold of them, assume a packet ! 1360: * has been dropped and retransmit it. ! 1361: * Kludge snd_nxt & the congestion ! 1362: * window so we send only this one ! 1363: * packet. ! 1364: * ! 1365: * We know we're losing at the current ! 1366: * window size so do congestion avoidance ! 1367: * (set ssthresh to half the current window ! 1368: * and pull our congestion window back to ! 1369: * the new ssthresh). ! 1370: * ! 1371: * Dup acks mean that packets have left the ! 1372: * network (they're now cached at the receiver) ! 1373: * so bump cwnd by the amount in the receiver ! 1374: * to keep a constant cwnd packets in the ! 1375: * network. ! 1376: */ ! 1377: if (tp->t_timer[TCPT_REXMT] == 0 || ! 1378: ti->ti_ack != tp->snd_una) ! 1379: tp->t_dupacks = 0; ! 1380: else if (++tp->t_dupacks == tcprexmtthresh) { ! 1381: tcp_seq onxt = tp->snd_nxt; ! 1382: u_int win = ! 1383: min(tp->snd_wnd, tp->snd_cwnd) / 2 / ! 1384: tp->t_maxseg; ! 1385: ! 1386: if (win < 2) ! 1387: win = 2; ! 1388: tp->snd_ssthresh = win * tp->t_maxseg; ! 1389: tp->t_timer[TCPT_REXMT] = 0; ! 1390: tp->t_rtt = 0; ! 1391: tp->snd_nxt = ti->ti_ack; ! 1392: tp->snd_cwnd = tp->t_maxseg; ! 1393: (void) tcp_output(tp); ! 1394: tp->snd_cwnd = tp->snd_ssthresh + ! 1395: tp->t_maxseg * tp->t_dupacks; ! 1396: if (SEQ_GT(onxt, tp->snd_nxt)) ! 1397: tp->snd_nxt = onxt; ! 1398: goto drop; ! 1399: } else if (tp->t_dupacks > tcprexmtthresh) { ! 1400: tp->snd_cwnd += tp->t_maxseg; ! 1401: (void) tcp_output(tp); ! 1402: goto drop; ! 1403: } ! 1404: } else ! 1405: tp->t_dupacks = 0; ! 1406: break; ! 1407: } ! 1408: /* ! 1409: * If the congestion window was inflated to account ! 1410: * for the other side's cached packets, retract it. ! 1411: */ ! 1412: if (tp->t_dupacks >= tcprexmtthresh && ! 1413: tp->snd_cwnd > tp->snd_ssthresh) ! 1414: tp->snd_cwnd = tp->snd_ssthresh; ! 1415: tp->t_dupacks = 0; ! 1416: if (SEQ_GT(ti->ti_ack, tp->snd_max)) { ! 1417: tcpstat.tcps_rcvacktoomuch++; ! 1418: goto dropafterack; ! 1419: } ! 1420: /* ! 1421: * If we reach this point, ACK is not a duplicate, ! 1422: * i.e., it ACKs something we sent. ! 1423: */ ! 1424: if (tp->t_flags & TF_NEEDSYN) { ! 1425: /* ! 1426: * T/TCP: Connection was half-synchronized, and our ! 1427: * SYN has been ACK'd (so connection is now fully ! 1428: * synchronized). Go to non-starred state, ! 1429: * increment snd_una for ACK of SYN, and check if ! 1430: * we can do window scaling. ! 1431: */ ! 1432: tp->t_flags &= ~TF_NEEDSYN; ! 1433: tp->snd_una++; ! 1434: /* Do window scaling? */ ! 1435: if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == ! 1436: (TF_RCVD_SCALE|TF_REQ_SCALE)) { ! 1437: tp->snd_scale = tp->requested_s_scale; ! 1438: tp->rcv_scale = tp->request_r_scale; ! 1439: } ! 1440: } ! 1441: ! 1442: process_ACK: ! 1443: acked = ti->ti_ack - tp->snd_una; ! 1444: tcpstat.tcps_rcvackpack++; ! 1445: tcpstat.tcps_rcvackbyte += acked; ! 1446: ! 1447: /* ! 1448: * If we have a timestamp reply, update smoothed ! 1449: * round trip time. If no timestamp is present but ! 1450: * transmit timer is running and timed sequence ! 1451: * number was acked, update smoothed round trip time. ! 1452: * Since we now have an rtt measurement, cancel the ! 1453: * timer backoff (cf., Phil Karn's retransmit alg.). ! 1454: * Recompute the initial retransmit timer. ! 1455: */ ! 1456: if (to.to_flag & TOF_TS) ! 1457: tcp_xmit_timer(tp, tcp_now - to.to_tsecr + 1); ! 1458: else if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) ! 1459: tcp_xmit_timer(tp,tp->t_rtt); ! 1460: ! 1461: /* ! 1462: * If all outstanding data is acked, stop retransmit ! 1463: * timer and remember to restart (more output or persist). ! 1464: * If there is more data to be acked, restart retransmit ! 1465: * timer, using current (possibly backed-off) value. ! 1466: */ ! 1467: if (ti->ti_ack == tp->snd_max) { ! 1468: tp->t_timer[TCPT_REXMT] = 0; ! 1469: needoutput = 1; ! 1470: } else if (tp->t_timer[TCPT_PERSIST] == 0) ! 1471: tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ! 1472: ! 1473: /* ! 1474: * If no data (only SYN) was ACK'd, ! 1475: * skip rest of ACK processing. ! 1476: */ ! 1477: if (acked == 0) ! 1478: goto step6; ! 1479: ! 1480: /* ! 1481: * When new data is acked, open the congestion window. ! 1482: * If the window gives us less than ssthresh packets ! 1483: * in flight, open exponentially (maxseg per packet). ! 1484: * Otherwise open linearly: maxseg per window ! 1485: * (maxseg^2 / cwnd per packet). ! 1486: */ ! 1487: { ! 1488: register u_int cw = tp->snd_cwnd; ! 1489: register u_int incr = tp->t_maxseg; ! 1490: ! 1491: if (cw > tp->snd_ssthresh) ! 1492: incr = incr * incr / cw; ! 1493: tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale); ! 1494: } ! 1495: if (acked > so->so_snd.sb_cc) { ! 1496: tp->snd_wnd -= so->so_snd.sb_cc; ! 1497: sbdrop(&so->so_snd, (int)so->so_snd.sb_cc); ! 1498: ourfinisacked = 1; ! 1499: } else { ! 1500: sbdrop(&so->so_snd, acked); ! 1501: tp->snd_wnd -= acked; ! 1502: ourfinisacked = 0; ! 1503: } ! 1504: sowwakeup(so); ! 1505: tp->snd_una = ti->ti_ack; ! 1506: if (SEQ_LT(tp->snd_nxt, tp->snd_una)) ! 1507: tp->snd_nxt = tp->snd_una; ! 1508: ! 1509: switch (tp->t_state) { ! 1510: ! 1511: /* ! 1512: * In FIN_WAIT_1 STATE in addition to the processing ! 1513: * for the ESTABLISHED state if our FIN is now acknowledged ! 1514: * then enter FIN_WAIT_2. ! 1515: */ ! 1516: case TCPS_FIN_WAIT_1: ! 1517: if (ourfinisacked) { ! 1518: /* ! 1519: * If we can't receive any more ! 1520: * data, then closing user can proceed. ! 1521: * Starting the timer is contrary to the ! 1522: * specification, but if we don't get a FIN ! 1523: * we'll hang forever. ! 1524: */ ! 1525: if (so->so_state & SS_CANTRCVMORE) { ! 1526: soisdisconnected(so); ! 1527: tp->t_timer[TCPT_2MSL] = tcp_maxidle; ! 1528: } ! 1529: tp->t_state = TCPS_FIN_WAIT_2; ! 1530: } ! 1531: break; ! 1532: ! 1533: /* ! 1534: * In CLOSING STATE in addition to the processing for ! 1535: * the ESTABLISHED state if the ACK acknowledges our FIN ! 1536: * then enter the TIME-WAIT state, otherwise ignore ! 1537: * the segment. ! 1538: */ ! 1539: case TCPS_CLOSING: ! 1540: if (ourfinisacked) { ! 1541: tp->t_state = TCPS_TIME_WAIT; ! 1542: tcp_canceltimers(tp); ! 1543: /* Shorten TIME_WAIT [RFC-1644, p.28] */ ! 1544: if (tp->cc_recv != 0 && ! 1545: tp->t_duration < TCPTV_MSL) ! 1546: tp->t_timer[TCPT_2MSL] = ! 1547: tp->t_rxtcur * TCPTV_TWTRUNC; ! 1548: else ! 1549: tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ! 1550: soisdisconnected(so); ! 1551: } ! 1552: break; ! 1553: ! 1554: /* ! 1555: * In LAST_ACK, we may still be waiting for data to drain ! 1556: * and/or to be acked, as well as for the ack of our FIN. ! 1557: * If our FIN is now acknowledged, delete the TCB, ! 1558: * enter the closed state and return. ! 1559: */ ! 1560: case TCPS_LAST_ACK: ! 1561: if (ourfinisacked) { ! 1562: tp = tcp_close(tp); ! 1563: goto drop; ! 1564: } ! 1565: break; ! 1566: ! 1567: /* ! 1568: * In TIME_WAIT state the only thing that should arrive ! 1569: * is a retransmission of the remote FIN. Acknowledge ! 1570: * it and restart the finack timer. ! 1571: */ ! 1572: case TCPS_TIME_WAIT: ! 1573: tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ! 1574: goto dropafterack; ! 1575: } ! 1576: } ! 1577: ! 1578: step6: ! 1579: /* ! 1580: * Update window information. ! 1581: * Don't look at window if no ACK: TAC's send garbage on first SYN. ! 1582: */ ! 1583: if ((tiflags & TH_ACK) && ! 1584: (SEQ_LT(tp->snd_wl1, ti->ti_seq) || ! 1585: (tp->snd_wl1 == ti->ti_seq && (SEQ_LT(tp->snd_wl2, ti->ti_ack) || ! 1586: (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) { ! 1587: /* keep track of pure window updates */ ! 1588: if (ti->ti_len == 0 && ! 1589: tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd) ! 1590: tcpstat.tcps_rcvwinupd++; ! 1591: tp->snd_wnd = tiwin; ! 1592: tp->snd_wl1 = ti->ti_seq; ! 1593: tp->snd_wl2 = ti->ti_ack; ! 1594: if (tp->snd_wnd > tp->max_sndwnd) ! 1595: tp->max_sndwnd = tp->snd_wnd; ! 1596: needoutput = 1; ! 1597: } ! 1598: ! 1599: /* ! 1600: * Process segments with URG. ! 1601: */ ! 1602: if ((tiflags & TH_URG) && ti->ti_urp && ! 1603: TCPS_HAVERCVDFIN(tp->t_state) == 0) { ! 1604: /* ! 1605: * This is a kludge, but if we receive and accept ! 1606: * random urgent pointers, we'll crash in ! 1607: * soreceive. It's hard to imagine someone ! 1608: * actually wanting to send this much urgent data. ! 1609: */ ! 1610: if (ti->ti_urp + so->so_rcv.sb_cc > sb_max) { ! 1611: ti->ti_urp = 0; /* XXX */ ! 1612: tiflags &= ~TH_URG; /* XXX */ ! 1613: goto dodata; /* XXX */ ! 1614: } ! 1615: /* ! 1616: * If this segment advances the known urgent pointer, ! 1617: * then mark the data stream. This should not happen ! 1618: * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since ! 1619: * a FIN has been received from the remote side. ! 1620: * In these states we ignore the URG. ! 1621: * ! 1622: * According to RFC961 (Assigned Protocols), ! 1623: * the urgent pointer points to the last octet ! 1624: * of urgent data. We continue, however, ! 1625: * to consider it to indicate the first octet ! 1626: * of data past the urgent section as the original ! 1627: * spec states (in one of two places). ! 1628: */ ! 1629: if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) { ! 1630: tp->rcv_up = ti->ti_seq + ti->ti_urp; ! 1631: so->so_oobmark = so->so_rcv.sb_cc + ! 1632: (tp->rcv_up - tp->rcv_nxt) - 1; ! 1633: if (so->so_oobmark == 0) { ! 1634: so->so_state |= SS_RCVATMARK; ! 1635: postevent(so, 0, EV_OOB); ! 1636: } ! 1637: sohasoutofband(so); ! 1638: tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA); ! 1639: } ! 1640: /* ! 1641: * Remove out of band data so doesn't get presented to user. ! 1642: * This can happen independent of advancing the URG pointer, ! 1643: * but if two URG's are pending at once, some out-of-band ! 1644: * data may creep in... ick. ! 1645: */ ! 1646: if (ti->ti_urp <= (u_long)ti->ti_len ! 1647: #if SO_OOBINLINE ! 1648: && (so->so_options & SO_OOBINLINE) == 0 ! 1649: #endif ! 1650: ) ! 1651: tcp_pulloutofband(so, ti, m); ! 1652: } else ! 1653: /* ! 1654: * If no out of band data is expected, ! 1655: * pull receive urgent pointer along ! 1656: * with the receive window. ! 1657: */ ! 1658: if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) ! 1659: tp->rcv_up = tp->rcv_nxt; ! 1660: dodata: /* XXX */ ! 1661: ! 1662: /* ! 1663: * Process the segment text, merging it into the TCP sequencing queue, ! 1664: * and arranging for acknowledgment of receipt if necessary. ! 1665: * This process logically involves adjusting tp->rcv_wnd as data ! 1666: * is presented to the user (this happens in tcp_usrreq.c, ! 1667: * case PRU_RCVD). If a FIN has already been received on this ! 1668: * connection then we just ignore the text. ! 1669: */ ! 1670: if ((ti->ti_len || (tiflags&TH_FIN)) && ! 1671: TCPS_HAVERCVDFIN(tp->t_state) == 0) { ! 1672: TCP_REASS(tp, ti, m, so, tiflags); ! 1673: /* ! 1674: * Note the amount of data that peer has sent into ! 1675: * our window, in order to estimate the sender's ! 1676: * buffer size. ! 1677: */ ! 1678: len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt); ! 1679: } else { ! 1680: m_freem(m); ! 1681: tiflags &= ~TH_FIN; ! 1682: } ! 1683: ! 1684: /* ! 1685: * If FIN is received ACK the FIN and let the user know ! 1686: * that the connection is closing. ! 1687: */ ! 1688: if (tiflags & TH_FIN) { ! 1689: if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { ! 1690: socantrcvmore(so); ! 1691: postevent(so, 0, EV_FIN); ! 1692: /* ! 1693: * If connection is half-synchronized ! 1694: * (ie NEEDSYN flag on) then delay ACK, ! 1695: * so it may be piggybacked when SYN is sent. ! 1696: * Otherwise, since we received a FIN then no ! 1697: * more input can be expected, send ACK now. ! 1698: */ ! 1699: if (tcp_delack_enabled && (tp->t_flags & TF_NEEDSYN)) ! 1700: tp->t_flags |= TF_DELACK; ! 1701: else ! 1702: tp->t_flags |= TF_ACKNOW; ! 1703: tp->rcv_nxt++; ! 1704: } ! 1705: switch (tp->t_state) { ! 1706: ! 1707: /* ! 1708: * In SYN_RECEIVED and ESTABLISHED STATES ! 1709: * enter the CLOSE_WAIT state. ! 1710: */ ! 1711: case TCPS_SYN_RECEIVED: ! 1712: case TCPS_ESTABLISHED: ! 1713: tp->t_state = TCPS_CLOSE_WAIT; ! 1714: break; ! 1715: ! 1716: /* ! 1717: * If still in FIN_WAIT_1 STATE FIN has not been acked so ! 1718: * enter the CLOSING state. ! 1719: */ ! 1720: case TCPS_FIN_WAIT_1: ! 1721: tp->t_state = TCPS_CLOSING; ! 1722: break; ! 1723: ! 1724: /* ! 1725: * In FIN_WAIT_2 state enter the TIME_WAIT state, ! 1726: * starting the time-wait timer, turning off the other ! 1727: * standard timers. ! 1728: */ ! 1729: case TCPS_FIN_WAIT_2: ! 1730: tp->t_state = TCPS_TIME_WAIT; ! 1731: tcp_canceltimers(tp); ! 1732: /* Shorten TIME_WAIT [RFC-1644, p.28] */ ! 1733: if (tp->cc_recv != 0 && ! 1734: tp->t_duration < TCPTV_MSL) { ! 1735: tp->t_timer[TCPT_2MSL] = ! 1736: tp->t_rxtcur * TCPTV_TWTRUNC; ! 1737: /* For transaction client, force ACK now. */ ! 1738: tp->t_flags |= TF_ACKNOW; ! 1739: } ! 1740: else ! 1741: tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ! 1742: soisdisconnected(so); ! 1743: break; ! 1744: ! 1745: /* ! 1746: * In TIME_WAIT state restart the 2 MSL time_wait timer. ! 1747: */ ! 1748: case TCPS_TIME_WAIT: ! 1749: tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ! 1750: break; ! 1751: } ! 1752: } ! 1753: #if TCPDEBUG ! 1754: if (so->so_options & SO_DEBUG) ! 1755: tcp_trace(TA_INPUT, ostate, tp, &tcp_saveti, 0); ! 1756: #endif ! 1757: ! 1758: /* ! 1759: * Return any desired output. ! 1760: */ ! 1761: if (needoutput || (tp->t_flags & TF_ACKNOW)) ! 1762: (void) tcp_output(tp); ! 1763: return; ! 1764: ! 1765: dropafterack: ! 1766: /* ! 1767: * Generate an ACK dropping incoming segment if it occupies ! 1768: * sequence space, where the ACK reflects our state. ! 1769: * ! 1770: * We can now skip the test for the RST flag since all ! 1771: * paths to this code happen after packets containing ! 1772: * RST have been dropped. ! 1773: * ! 1774: * In the SYN-RECEIVED state, don't send an ACK unless the ! 1775: * segment we received passes the SYN-RECEIVED ACK test. ! 1776: * If it fails send a RST. This breaks the loop in the ! 1777: * "LAND" DoS attack, and also prevents an ACK storm ! 1778: * between two listening ports that have been sent forged ! 1779: * SYN segments, each with the source address of the other. ! 1780: */ ! 1781: if (tp->t_state == TCPS_SYN_RECEIVED && (tiflags & TH_ACK) && ! 1782: (SEQ_GT(tp->snd_una, ti->ti_ack) || ! 1783: SEQ_GT(ti->ti_ack, tp->snd_max)) ) ! 1784: goto dropwithreset; ! 1785: #if TCPDEBUG ! 1786: if (so->so_options & SO_DEBUG) ! 1787: tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0); ! 1788: #endif ! 1789: m_freem(m); ! 1790: tp->t_flags |= TF_ACKNOW; ! 1791: (void) tcp_output(tp); ! 1792: return; ! 1793: ! 1794: dropwithreset: ! 1795: /* ! 1796: * Generate a RST, dropping incoming segment. ! 1797: * Make ACK acceptable to originator of segment. ! 1798: * Don't bother to respond if destination was broadcast/multicast. ! 1799: */ ! 1800: if ((tiflags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST) || ! 1801: IN_MULTICAST(ntohl(ti->ti_dst.s_addr))) ! 1802: goto drop; ! 1803: #if TCPDEBUG ! 1804: if (tp == 0 || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) ! 1805: tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0); ! 1806: #endif ! 1807: if (tiflags & TH_ACK) ! 1808: tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST); ! 1809: else { ! 1810: if (tiflags & TH_SYN) ! 1811: ti->ti_len++; ! 1812: tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0, ! 1813: TH_RST|TH_ACK); ! 1814: } ! 1815: /* destroy temporarily created socket */ ! 1816: if (dropsocket) ! 1817: (void) soabort(so); ! 1818: return; ! 1819: ! 1820: drop: ! 1821: /* ! 1822: * Drop space held by incoming segment and return. ! 1823: */ ! 1824: #if TCPDEBUG ! 1825: if (tp == 0 || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) ! 1826: tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0); ! 1827: #endif ! 1828: m_freem(m); ! 1829: /* destroy temporarily created socket */ ! 1830: if (dropsocket) ! 1831: (void) soabort(so); ! 1832: return; ! 1833: } ! 1834: ! 1835: static void ! 1836: tcp_dooptions(tp, cp, cnt, ti, to) ! 1837: struct tcpcb *tp; ! 1838: u_char *cp; ! 1839: int cnt; ! 1840: struct tcpiphdr *ti; ! 1841: struct tcpopt *to; ! 1842: { ! 1843: u_short mss = 0; ! 1844: int opt, optlen; ! 1845: ! 1846: for (; cnt > 0; cnt -= optlen, cp += optlen) { ! 1847: opt = cp[0]; ! 1848: if (opt == TCPOPT_EOL) ! 1849: break; ! 1850: if (opt == TCPOPT_NOP) ! 1851: optlen = 1; ! 1852: else { ! 1853: optlen = cp[1]; ! 1854: if (optlen <= 0) ! 1855: break; ! 1856: } ! 1857: switch (opt) { ! 1858: ! 1859: default: ! 1860: continue; ! 1861: ! 1862: case TCPOPT_MAXSEG: ! 1863: if (optlen != TCPOLEN_MAXSEG) ! 1864: continue; ! 1865: if (!(ti->ti_flags & TH_SYN)) ! 1866: continue; ! 1867: bcopy((char *) cp + 2, (char *) &mss, sizeof(mss)); ! 1868: NTOHS(mss); ! 1869: break; ! 1870: ! 1871: case TCPOPT_WINDOW: ! 1872: if (optlen != TCPOLEN_WINDOW) ! 1873: continue; ! 1874: if (!(ti->ti_flags & TH_SYN)) ! 1875: continue; ! 1876: tp->t_flags |= TF_RCVD_SCALE; ! 1877: tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT); ! 1878: break; ! 1879: ! 1880: case TCPOPT_TIMESTAMP: ! 1881: if (optlen != TCPOLEN_TIMESTAMP) ! 1882: continue; ! 1883: to->to_flag |= TOF_TS; ! 1884: bcopy((char *)cp + 2, ! 1885: (char *)&to->to_tsval, sizeof(to->to_tsval)); ! 1886: NTOHL(to->to_tsval); ! 1887: bcopy((char *)cp + 6, ! 1888: (char *)&to->to_tsecr, sizeof(to->to_tsecr)); ! 1889: NTOHL(to->to_tsecr); ! 1890: ! 1891: /* ! 1892: * A timestamp received in a SYN makes ! 1893: * it ok to send timestamp requests and replies. ! 1894: */ ! 1895: if (ti->ti_flags & TH_SYN) { ! 1896: tp->t_flags |= TF_RCVD_TSTMP; ! 1897: tp->ts_recent = to->to_tsval; ! 1898: tp->ts_recent_age = tcp_now; ! 1899: } ! 1900: break; ! 1901: case TCPOPT_CC: ! 1902: if (optlen != TCPOLEN_CC) ! 1903: continue; ! 1904: to->to_flag |= TOF_CC; ! 1905: bcopy((char *)cp + 2, ! 1906: (char *)&to->to_cc, sizeof(to->to_cc)); ! 1907: NTOHL(to->to_cc); ! 1908: /* ! 1909: * A CC or CC.new option received in a SYN makes ! 1910: * it ok to send CC in subsequent segments. ! 1911: */ ! 1912: if (ti->ti_flags & TH_SYN) ! 1913: tp->t_flags |= TF_RCVD_CC; ! 1914: break; ! 1915: case TCPOPT_CCNEW: ! 1916: if (optlen != TCPOLEN_CC) ! 1917: continue; ! 1918: if (!(ti->ti_flags & TH_SYN)) ! 1919: continue; ! 1920: to->to_flag |= TOF_CCNEW; ! 1921: bcopy((char *)cp + 2, ! 1922: (char *)&to->to_cc, sizeof(to->to_cc)); ! 1923: NTOHL(to->to_cc); ! 1924: /* ! 1925: * A CC or CC.new option received in a SYN makes ! 1926: * it ok to send CC in subsequent segments. ! 1927: */ ! 1928: tp->t_flags |= TF_RCVD_CC; ! 1929: break; ! 1930: case TCPOPT_CCECHO: ! 1931: if (optlen != TCPOLEN_CC) ! 1932: continue; ! 1933: if (!(ti->ti_flags & TH_SYN)) ! 1934: continue; ! 1935: to->to_flag |= TOF_CCECHO; ! 1936: bcopy((char *)cp + 2, ! 1937: (char *)&to->to_ccecho, sizeof(to->to_ccecho)); ! 1938: NTOHL(to->to_ccecho); ! 1939: break; ! 1940: } ! 1941: } ! 1942: if (ti->ti_flags & TH_SYN) ! 1943: tcp_mss(tp, mss); /* sets t_maxseg */ ! 1944: } ! 1945: ! 1946: /* ! 1947: * Pull out of band byte out of a segment so ! 1948: * it doesn't appear in the user's data queue. ! 1949: * It is still reflected in the segment length for ! 1950: * sequencing purposes. ! 1951: */ ! 1952: static void ! 1953: tcp_pulloutofband(so, ti, m) ! 1954: struct socket *so; ! 1955: struct tcpiphdr *ti; ! 1956: register struct mbuf *m; ! 1957: { ! 1958: int cnt = ti->ti_urp - 1; ! 1959: ! 1960: while (cnt >= 0) { ! 1961: if (m->m_len > cnt) { ! 1962: char *cp = mtod(m, caddr_t) + cnt; ! 1963: struct tcpcb *tp = sototcpcb(so); ! 1964: ! 1965: tp->t_iobc = *cp; ! 1966: tp->t_oobflags |= TCPOOB_HAVEDATA; ! 1967: bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1)); ! 1968: m->m_len--; ! 1969: return; ! 1970: } ! 1971: cnt -= m->m_len; ! 1972: m = m->m_next; ! 1973: if (m == 0) ! 1974: break; ! 1975: } ! 1976: panic("tcp_pulloutofband"); ! 1977: } ! 1978: ! 1979: /* ! 1980: * Collect new round-trip time estimate ! 1981: * and update averages and current timeout. ! 1982: */ ! 1983: static void ! 1984: tcp_xmit_timer(tp, rtt) ! 1985: register struct tcpcb *tp; ! 1986: short rtt; ! 1987: { ! 1988: register int delta; ! 1989: ! 1990: tcpstat.tcps_rttupdated++; ! 1991: tp->t_rttupdated++; ! 1992: if (tp->t_srtt != 0) { ! 1993: /* ! 1994: * srtt is stored as fixed point with 5 bits after the ! 1995: * binary point (i.e., scaled by 8). The following magic ! 1996: * is equivalent to the smoothing algorithm in rfc793 with ! 1997: * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed ! 1998: * point). Adjust rtt to origin 0. ! 1999: */ ! 2000: delta = ((rtt - 1) << TCP_DELTA_SHIFT) ! 2001: - (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT)); ! 2002: ! 2003: if ((tp->t_srtt += delta) <= 0) ! 2004: tp->t_srtt = 1; ! 2005: ! 2006: /* ! 2007: * We accumulate a smoothed rtt variance (actually, a ! 2008: * smoothed mean difference), then set the retransmit ! 2009: * timer to smoothed rtt + 4 times the smoothed variance. ! 2010: * rttvar is stored as fixed point with 4 bits after the ! 2011: * binary point (scaled by 16). The following is ! 2012: * equivalent to rfc793 smoothing with an alpha of .75 ! 2013: * (rttvar = rttvar*3/4 + |delta| / 4). This replaces ! 2014: * rfc793's wired-in beta. ! 2015: */ ! 2016: if (delta < 0) ! 2017: delta = -delta; ! 2018: delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT); ! 2019: if ((tp->t_rttvar += delta) <= 0) ! 2020: tp->t_rttvar = 1; ! 2021: } else { ! 2022: /* ! 2023: * No rtt measurement yet - use the unsmoothed rtt. ! 2024: * Set the variance to half the rtt (so our first ! 2025: * retransmit happens at 3*rtt). ! 2026: */ ! 2027: tp->t_srtt = rtt << TCP_RTT_SHIFT; ! 2028: tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1); ! 2029: } ! 2030: tp->t_rtt = 0; ! 2031: tp->t_rxtshift = 0; ! 2032: ! 2033: /* ! 2034: * the retransmit should happen at rtt + 4 * rttvar. ! 2035: * Because of the way we do the smoothing, srtt and rttvar ! 2036: * will each average +1/2 tick of bias. When we compute ! 2037: * the retransmit timer, we want 1/2 tick of rounding and ! 2038: * 1 extra tick because of +-1/2 tick uncertainty in the ! 2039: * firing of the timer. The bias will give us exactly the ! 2040: * 1.5 tick we need. But, because the bias is ! 2041: * statistical, we have to test that we don't drop below ! 2042: * the minimum feasible timer (which is 2 ticks). ! 2043: */ ! 2044: TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), ! 2045: max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX); ! 2046: ! 2047: /* ! 2048: * We received an ack for a packet that wasn't retransmitted; ! 2049: * it is probably safe to discard any error indications we've ! 2050: * received recently. This isn't quite right, but close enough ! 2051: * for now (a route might have failed after we sent a segment, ! 2052: * and the return path might not be symmetrical). ! 2053: */ ! 2054: tp->t_softerror = 0; ! 2055: } ! 2056: ! 2057: /* ! 2058: * Determine a reasonable value for maxseg size. ! 2059: * If the route is known, check route for mtu. ! 2060: * If none, use an mss that can be handled on the outgoing ! 2061: * interface without forcing IP to fragment; if bigger than ! 2062: * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES ! 2063: * to utilize large mbufs. If no route is found, route has no mtu, ! 2064: * or the destination isn't local, use a default, hopefully conservative ! 2065: * size (usually 512 or the default IP max size, but no more than the mtu ! 2066: * of the interface), as we can't discover anything about intervening ! 2067: * gateways or networks. We also initialize the congestion/slow start ! 2068: * window to be a single segment if the destination isn't local. ! 2069: * While looking at the routing entry, we also initialize other path-dependent ! 2070: * parameters from pre-set or cached values in the routing entry. ! 2071: * ! 2072: * Also take into account the space needed for options that we ! 2073: * send regularly. Make maxseg shorter by that amount to assure ! 2074: * that we can send maxseg amount of data even when the options ! 2075: * are present. Store the upper limit of the length of options plus ! 2076: * data in maxopd. ! 2077: * ! 2078: * NOTE that this routine is only called when we process an incoming ! 2079: * segment, for outgoing segments only tcp_mssopt is called. ! 2080: * ! 2081: * In case of T/TCP, we call this routine during implicit connection ! 2082: * setup as well (offer = -1), to initialize maxseg from the cached ! 2083: * MSS of our peer. ! 2084: */ ! 2085: void ! 2086: tcp_mss(tp, offer) ! 2087: struct tcpcb *tp; ! 2088: int offer; ! 2089: { ! 2090: register struct rtentry *rt; ! 2091: struct ifnet *ifp; ! 2092: register int rtt, mss; ! 2093: u_long bufsize; ! 2094: struct inpcb *inp; ! 2095: struct socket *so; ! 2096: struct rmxp_tao *taop; ! 2097: int origoffer = offer; ! 2098: ! 2099: inp = tp->t_inpcb; ! 2100: if ((rt = tcp_rtlookup(inp)) == NULL) { ! 2101: tp->t_maxopd = tp->t_maxseg = tcp_mssdflt; ! 2102: return; ! 2103: } ! 2104: ifp = rt->rt_ifp; ! 2105: so = inp->inp_socket; ! 2106: ! 2107: taop = rmx_taop(rt->rt_rmx); ! 2108: /* ! 2109: * Offer == -1 means that we didn't receive SYN yet, ! 2110: * use cached value in that case; ! 2111: */ ! 2112: if (offer == -1) ! 2113: offer = taop->tao_mssopt; ! 2114: /* ! 2115: * Offer == 0 means that there was no MSS on the SYN segment, ! 2116: * in this case we use tcp_mssdflt. ! 2117: */ ! 2118: if (offer == 0) ! 2119: offer = tcp_mssdflt; ! 2120: else ! 2121: /* ! 2122: * Sanity check: make sure that maxopd will be large ! 2123: * enough to allow some data on segments even is the ! 2124: * all the option space is used (40bytes). Otherwise ! 2125: * funny things may happen in tcp_output. ! 2126: */ ! 2127: offer = max(offer, 64); ! 2128: taop->tao_mssopt = offer; ! 2129: ! 2130: /* ! 2131: * While we're here, check if there's an initial rtt ! 2132: * or rttvar. Convert from the route-table units ! 2133: * to scaled multiples of the slow timeout timer. ! 2134: */ ! 2135: if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) { ! 2136: /* ! 2137: * XXX the lock bit for RTT indicates that the value ! 2138: * is also a minimum value; this is subject to time. ! 2139: */ ! 2140: if (rt->rt_rmx.rmx_locks & RTV_RTT) ! 2141: tp->t_rttmin = rtt / (RTM_RTTUNIT / PR_SLOWHZ); ! 2142: tp->t_srtt = rtt / (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE)); ! 2143: tcpstat.tcps_usedrtt++; ! 2144: if (rt->rt_rmx.rmx_rttvar) { ! 2145: tp->t_rttvar = rt->rt_rmx.rmx_rttvar / ! 2146: (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE)); ! 2147: tcpstat.tcps_usedrttvar++; ! 2148: } else { ! 2149: /* default variation is +- 1 rtt */ ! 2150: tp->t_rttvar = ! 2151: tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE; ! 2152: } ! 2153: TCPT_RANGESET(tp->t_rxtcur, ! 2154: ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1, ! 2155: tp->t_rttmin, TCPTV_REXMTMAX); ! 2156: } ! 2157: /* ! 2158: * if there's an mtu associated with the route, use it ! 2159: */ ! 2160: if (rt->rt_rmx.rmx_mtu) ! 2161: mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr); ! 2162: else ! 2163: { ! 2164: mss = ifp->if_mtu - sizeof(struct tcpiphdr); ! 2165: if (!in_localaddr(inp->inp_faddr)) ! 2166: mss = min(mss, tcp_mssdflt); ! 2167: } ! 2168: mss = min(mss, offer); ! 2169: /* ! 2170: * maxopd stores the maximum length of data AND options ! 2171: * in a segment; maxseg is the amount of data in a normal ! 2172: * segment. We need to store this value (maxopd) apart ! 2173: * from maxseg, because now every segment carries options ! 2174: * and thus we normally have somewhat less data in segments. ! 2175: */ ! 2176: tp->t_maxopd = mss; ! 2177: ! 2178: /* ! 2179: * In case of T/TCP, origoffer==-1 indicates, that no segments ! 2180: * were received yet. In this case we just guess, otherwise ! 2181: * we do the same as before T/TCP. ! 2182: */ ! 2183: if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && ! 2184: (origoffer == -1 || ! 2185: (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)) ! 2186: mss -= TCPOLEN_TSTAMP_APPA; ! 2187: if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC && ! 2188: (origoffer == -1 || ! 2189: (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC)) ! 2190: mss -= TCPOLEN_CC_APPA; ! 2191: ! 2192: #if (MCLBYTES & (MCLBYTES - 1)) == 0 ! 2193: if (mss > MCLBYTES) ! 2194: mss &= ~(MCLBYTES-1); ! 2195: #else ! 2196: if (mss > MCLBYTES) ! 2197: mss = mss / MCLBYTES * MCLBYTES; ! 2198: #endif ! 2199: /* ! 2200: * If there's a pipesize, change the socket buffer ! 2201: * to that size. Make the socket buffers an integral ! 2202: * number of mss units; if the mss is larger than ! 2203: * the socket buffer, decrease the mss. ! 2204: */ ! 2205: #if RTV_SPIPE ! 2206: if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0) ! 2207: #endif ! 2208: bufsize = so->so_snd.sb_hiwat; ! 2209: if (bufsize < mss) ! 2210: mss = bufsize; ! 2211: else { ! 2212: bufsize = roundup(bufsize, mss); ! 2213: if (bufsize > sb_max) ! 2214: bufsize = sb_max; ! 2215: (void)sbreserve(&so->so_snd, bufsize); ! 2216: } ! 2217: tp->t_maxseg = mss; ! 2218: ! 2219: #if RTV_RPIPE ! 2220: if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0) ! 2221: #endif ! 2222: bufsize = so->so_rcv.sb_hiwat; ! 2223: if (bufsize > mss) { ! 2224: bufsize = roundup(bufsize, mss); ! 2225: if (bufsize > sb_max) ! 2226: bufsize = sb_max; ! 2227: (void)sbreserve(&so->so_rcv, bufsize); ! 2228: } ! 2229: /* ! 2230: * Don't force slow-start on local network. ! 2231: */ ! 2232: if (!in_localaddr(inp->inp_faddr)) ! 2233: tp->snd_cwnd = mss; ! 2234: ! 2235: if (rt->rt_rmx.rmx_ssthresh) { ! 2236: /* ! 2237: * There's some sort of gateway or interface ! 2238: * buffer limit on the path. Use this to set ! 2239: * the slow start threshhold, but set the ! 2240: * threshold to no less than 2*mss. ! 2241: */ ! 2242: tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh); ! 2243: tcpstat.tcps_usedssthresh++; ! 2244: } ! 2245: } ! 2246: ! 2247: /* ! 2248: * Determine the MSS option to send on an outgoing SYN. ! 2249: */ ! 2250: int ! 2251: tcp_mssopt(tp) ! 2252: struct tcpcb *tp; ! 2253: { ! 2254: struct rtentry *rt; ! 2255: ! 2256: rt = tcp_rtlookup(tp->t_inpcb); ! 2257: if (rt == NULL) ! 2258: return tcp_mssdflt; ! 2259: ! 2260: return rt->rt_ifp->if_mtu - sizeof(struct tcpiphdr); ! 2261: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.