File:  [Qemu by Fabrice Bellard] / qemu / slirp / tcp_output.c
Revision 1.1.1.4 (vendor branch): download - view: text, annotated - select for diffs
Tue Apr 24 17:25:43 2018 UTC (2 years, 10 months ago) by root
Branches: qemu, MAIN
CVS tags: qemu0125, qemu0124, qemu0123, qemu0122, qemu0121, qemu0120, qemu0111, qemu0110, HEAD
qemu 0.11.0

    1: /*
    2:  * Copyright (c) 1982, 1986, 1988, 1990, 1993
    3:  *	The Regents of the University of California.  All rights reserved.
    4:  *
    5:  * Redistribution and use in source and binary forms, with or without
    6:  * modification, are permitted provided that the following conditions
    7:  * are met:
    8:  * 1. Redistributions of source code must retain the above copyright
    9:  *    notice, this list of conditions and the following disclaimer.
   10:  * 2. Redistributions in binary form must reproduce the above copyright
   11:  *    notice, this list of conditions and the following disclaimer in the
   12:  *    documentation and/or other materials provided with the distribution.
   13:  * 3. Neither the name of the University nor the names of its contributors
   14:  *    may be used to endorse or promote products derived from this software
   15:  *    without specific prior written permission.
   16:  *
   17:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   18:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   21:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27:  * SUCH DAMAGE.
   28:  *
   29:  *	@(#)tcp_output.c	8.3 (Berkeley) 12/30/93
   30:  * tcp_output.c,v 1.3 1994/09/15 10:36:55 davidg Exp
   31:  */
   32: 
   33: /*
   34:  * Changes and additions relating to SLiRP
   35:  * Copyright (c) 1995 Danny Gasparovski.
   36:  *
   37:  * Please read the file COPYRIGHT for the
   38:  * terms and conditions of the copyright.
   39:  */
   40: 
   41: #include <slirp.h>
   42: 
   43: static const u_char  tcp_outflags[TCP_NSTATES] = {
   44: 	TH_RST|TH_ACK, 0,      TH_SYN,        TH_SYN|TH_ACK,
   45: 	TH_ACK,        TH_ACK, TH_FIN|TH_ACK, TH_FIN|TH_ACK,
   46: 	TH_FIN|TH_ACK, TH_ACK, TH_ACK,
   47: };
   48: 
   49: 
   50: #define MAX_TCPOPTLEN	32	/* max # bytes that go in options */
   51: 
   52: /*
   53:  * Tcp output routine: figure out what should be sent and send it.
   54:  */
   55: int
   56: tcp_output(struct tcpcb *tp)
   57: {
   58: 	register struct socket *so = tp->t_socket;
   59: 	register long len, win;
   60: 	int off, flags, error;
   61: 	register struct mbuf *m;
   62: 	register struct tcpiphdr *ti;
   63: 	u_char opt[MAX_TCPOPTLEN];
   64: 	unsigned optlen, hdrlen;
   65: 	int idle, sendalot;
   66: 
   67: 	DEBUG_CALL("tcp_output");
   68: 	DEBUG_ARG("tp = %lx", (long )tp);
   69: 
   70: 	/*
   71: 	 * Determine length of data that should be transmitted,
   72: 	 * and flags that will be used.
   73: 	 * If there is some data or critical controls (SYN, RST)
   74: 	 * to send, then transmit; otherwise, investigate further.
   75: 	 */
   76: 	idle = (tp->snd_max == tp->snd_una);
   77: 	if (idle && tp->t_idle >= tp->t_rxtcur)
   78: 		/*
   79: 		 * We have been idle for "a while" and no acks are
   80: 		 * expected to clock out any data we send --
   81: 		 * slow start to get ack "clock" running again.
   82: 		 */
   83: 		tp->snd_cwnd = tp->t_maxseg;
   84: again:
   85: 	sendalot = 0;
   86: 	off = tp->snd_nxt - tp->snd_una;
   87: 	win = min(tp->snd_wnd, tp->snd_cwnd);
   88: 
   89: 	flags = tcp_outflags[tp->t_state];
   90: 
   91: 	DEBUG_MISC((dfd, " --- tcp_output flags = 0x%x\n",flags));
   92: 
   93: 	/*
   94: 	 * If in persist timeout with window of 0, send 1 byte.
   95: 	 * Otherwise, if window is small but nonzero
   96: 	 * and timer expired, we will send what we can
   97: 	 * and go to transmit state.
   98: 	 */
   99: 	if (tp->t_force) {
  100: 		if (win == 0) {
  101: 			/*
  102: 			 * If we still have some data to send, then
  103: 			 * clear the FIN bit.  Usually this would
  104: 			 * happen below when it realizes that we
  105: 			 * aren't sending all the data.  However,
  106: 			 * if we have exactly 1 byte of unset data,
  107: 			 * then it won't clear the FIN bit below,
  108: 			 * and if we are in persist state, we wind
  109: 			 * up sending the packet without recording
  110: 			 * that we sent the FIN bit.
  111: 			 *
  112: 			 * We can't just blindly clear the FIN bit,
  113: 			 * because if we don't have any more data
  114: 			 * to send then the probe will be the FIN
  115: 			 * itself.
  116: 			 */
  117: 			if (off < so->so_snd.sb_cc)
  118: 				flags &= ~TH_FIN;
  119: 			win = 1;
  120: 		} else {
  121: 			tp->t_timer[TCPT_PERSIST] = 0;
  122: 			tp->t_rxtshift = 0;
  123: 		}
  124: 	}
  125: 
  126: 	len = min(so->so_snd.sb_cc, win) - off;
  127: 
  128: 	if (len < 0) {
  129: 		/*
  130: 		 * If FIN has been sent but not acked,
  131: 		 * but we haven't been called to retransmit,
  132: 		 * len will be -1.  Otherwise, window shrank
  133: 		 * after we sent into it.  If window shrank to 0,
  134: 		 * cancel pending retransmit and pull snd_nxt
  135: 		 * back to (closed) window.  We will enter persist
  136: 		 * state below.  If the window didn't close completely,
  137: 		 * just wait for an ACK.
  138: 		 */
  139: 		len = 0;
  140: 		if (win == 0) {
  141: 			tp->t_timer[TCPT_REXMT] = 0;
  142: 			tp->snd_nxt = tp->snd_una;
  143: 		}
  144: 	}
  145: 
  146: 	if (len > tp->t_maxseg) {
  147: 		len = tp->t_maxseg;
  148: 		sendalot = 1;
  149: 	}
  150: 	if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc))
  151: 		flags &= ~TH_FIN;
  152: 
  153: 	win = sbspace(&so->so_rcv);
  154: 
  155: 	/*
  156: 	 * Sender silly window avoidance.  If connection is idle
  157: 	 * and can send all data, a maximum segment,
  158: 	 * at least a maximum default-size segment do it,
  159: 	 * or are forced, do it; otherwise don't bother.
  160: 	 * If peer's buffer is tiny, then send
  161: 	 * when window is at least half open.
  162: 	 * If retransmitting (possibly after persist timer forced us
  163: 	 * to send into a small window), then must resend.
  164: 	 */
  165: 	if (len) {
  166: 		if (len == tp->t_maxseg)
  167: 			goto send;
  168: 		if ((1 || idle || tp->t_flags & TF_NODELAY) &&
  169: 		    len + off >= so->so_snd.sb_cc)
  170: 			goto send;
  171: 		if (tp->t_force)
  172: 			goto send;
  173: 		if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0)
  174: 			goto send;
  175: 		if (SEQ_LT(tp->snd_nxt, tp->snd_max))
  176: 			goto send;
  177: 	}
  178: 
  179: 	/*
  180: 	 * Compare available window to amount of window
  181: 	 * known to peer (as advertised window less
  182: 	 * next expected input).  If the difference is at least two
  183: 	 * max size segments, or at least 50% of the maximum possible
  184: 	 * window, then want to send a window update to peer.
  185: 	 */
  186: 	if (win > 0) {
  187: 		/*
  188: 		 * "adv" is the amount we can increase the window,
  189: 		 * taking into account that we are limited by
  190: 		 * TCP_MAXWIN << tp->rcv_scale.
  191: 		 */
  192: 		long adv = min(win, (long)TCP_MAXWIN << tp->rcv_scale) -
  193: 			(tp->rcv_adv - tp->rcv_nxt);
  194: 
  195: 		if (adv >= (long) (2 * tp->t_maxseg))
  196: 			goto send;
  197: 		if (2 * adv >= (long) so->so_rcv.sb_datalen)
  198: 			goto send;
  199: 	}
  200: 
  201: 	/*
  202: 	 * Send if we owe peer an ACK.
  203: 	 */
  204: 	if (tp->t_flags & TF_ACKNOW)
  205: 		goto send;
  206: 	if (flags & (TH_SYN|TH_RST))
  207: 		goto send;
  208: 	if (SEQ_GT(tp->snd_up, tp->snd_una))
  209: 		goto send;
  210: 	/*
  211: 	 * If our state indicates that FIN should be sent
  212: 	 * and we have not yet done so, or we're retransmitting the FIN,
  213: 	 * then we need to send.
  214: 	 */
  215: 	if (flags & TH_FIN &&
  216: 	    ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
  217: 		goto send;
  218: 
  219: 	/*
  220: 	 * TCP window updates are not reliable, rather a polling protocol
  221: 	 * using ``persist'' packets is used to insure receipt of window
  222: 	 * updates.  The three ``states'' for the output side are:
  223: 	 *	idle			not doing retransmits or persists
  224: 	 *	persisting		to move a small or zero window
  225: 	 *	(re)transmitting	and thereby not persisting
  226: 	 *
  227: 	 * tp->t_timer[TCPT_PERSIST]
  228: 	 *	is set when we are in persist state.
  229: 	 * tp->t_force
  230: 	 *	is set when we are called to send a persist packet.
  231: 	 * tp->t_timer[TCPT_REXMT]
  232: 	 *	is set when we are retransmitting
  233: 	 * The output side is idle when both timers are zero.
  234: 	 *
  235: 	 * If send window is too small, there is data to transmit, and no
  236: 	 * retransmit or persist is pending, then go to persist state.
  237: 	 * If nothing happens soon, send when timer expires:
  238: 	 * if window is nonzero, transmit what we can,
  239: 	 * otherwise force out a byte.
  240: 	 */
  241: 	if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 &&
  242: 	    tp->t_timer[TCPT_PERSIST] == 0) {
  243: 		tp->t_rxtshift = 0;
  244: 		tcp_setpersist(tp);
  245: 	}
  246: 
  247: 	/*
  248: 	 * No reason to send a segment, just return.
  249: 	 */
  250: 	return (0);
  251: 
  252: send:
  253: 	/*
  254: 	 * Before ESTABLISHED, force sending of initial options
  255: 	 * unless TCP set not to do any options.
  256: 	 * NOTE: we assume that the IP/TCP header plus TCP options
  257: 	 * always fit in a single mbuf, leaving room for a maximum
  258: 	 * link header, i.e.
  259: 	 *	max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN
  260: 	 */
  261: 	optlen = 0;
  262: 	hdrlen = sizeof (struct tcpiphdr);
  263: 	if (flags & TH_SYN) {
  264: 		tp->snd_nxt = tp->iss;
  265: 		if ((tp->t_flags & TF_NOOPT) == 0) {
  266: 			u_int16_t mss;
  267: 
  268: 			opt[0] = TCPOPT_MAXSEG;
  269: 			opt[1] = 4;
  270: 			mss = htons((u_int16_t) tcp_mss(tp, 0));
  271: 			memcpy((caddr_t)(opt + 2), (caddr_t)&mss, sizeof(mss));
  272: 			optlen = 4;
  273: 		}
  274:  	}
  275: 
  276:  	hdrlen += optlen;
  277: 
  278: 	/*
  279: 	 * Adjust data length if insertion of options will
  280: 	 * bump the packet length beyond the t_maxseg length.
  281: 	 */
  282: 	 if (len > tp->t_maxseg - optlen) {
  283: 		len = tp->t_maxseg - optlen;
  284: 		sendalot = 1;
  285: 	 }
  286: 
  287: 	/*
  288: 	 * Grab a header mbuf, attaching a copy of data to
  289: 	 * be transmitted, and initialize the header from
  290: 	 * the template for sends on this connection.
  291: 	 */
  292: 	if (len) {
  293: 		m = m_get(so->slirp);
  294: 		if (m == NULL) {
  295: 			error = 1;
  296: 			goto out;
  297: 		}
  298: 		m->m_data += IF_MAXLINKHDR;
  299: 		m->m_len = hdrlen;
  300: 
  301: 		sbcopy(&so->so_snd, off, (int) len, mtod(m, caddr_t) + hdrlen);
  302: 		m->m_len += len;
  303: 
  304: 		/*
  305: 		 * If we're sending everything we've got, set PUSH.
  306: 		 * (This will keep happy those implementations which only
  307: 		 * give data to the user when a buffer fills or
  308: 		 * a PUSH comes in.)
  309: 		 */
  310: 		if (off + len == so->so_snd.sb_cc)
  311: 			flags |= TH_PUSH;
  312: 	} else {
  313: 		m = m_get(so->slirp);
  314: 		if (m == NULL) {
  315: 			error = 1;
  316: 			goto out;
  317: 		}
  318: 		m->m_data += IF_MAXLINKHDR;
  319: 		m->m_len = hdrlen;
  320: 	}
  321: 
  322: 	ti = mtod(m, struct tcpiphdr *);
  323: 
  324: 	memcpy((caddr_t)ti, &tp->t_template, sizeof (struct tcpiphdr));
  325: 
  326: 	/*
  327: 	 * Fill in fields, remembering maximum advertised
  328: 	 * window for use in delaying messages about window sizes.
  329: 	 * If resending a FIN, be sure not to use a new sequence number.
  330: 	 */
  331: 	if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
  332: 	    tp->snd_nxt == tp->snd_max)
  333: 		tp->snd_nxt--;
  334: 	/*
  335: 	 * If we are doing retransmissions, then snd_nxt will
  336: 	 * not reflect the first unsent octet.  For ACK only
  337: 	 * packets, we do not want the sequence number of the
  338: 	 * retransmitted packet, we want the sequence number
  339: 	 * of the next unsent octet.  So, if there is no data
  340: 	 * (and no SYN or FIN), use snd_max instead of snd_nxt
  341: 	 * when filling in ti_seq.  But if we are in persist
  342: 	 * state, snd_max might reflect one byte beyond the
  343: 	 * right edge of the window, so use snd_nxt in that
  344: 	 * case, since we know we aren't doing a retransmission.
  345: 	 * (retransmit and persist are mutually exclusive...)
  346: 	 */
  347: 	if (len || (flags & (TH_SYN|TH_FIN)) || tp->t_timer[TCPT_PERSIST])
  348: 		ti->ti_seq = htonl(tp->snd_nxt);
  349: 	else
  350: 		ti->ti_seq = htonl(tp->snd_max);
  351: 	ti->ti_ack = htonl(tp->rcv_nxt);
  352: 	if (optlen) {
  353: 		memcpy((caddr_t)(ti + 1), (caddr_t)opt, optlen);
  354: 		ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2;
  355: 	}
  356: 	ti->ti_flags = flags;
  357: 	/*
  358: 	 * Calculate receive window.  Don't shrink window,
  359: 	 * but avoid silly window syndrome.
  360: 	 */
  361: 	if (win < (long)(so->so_rcv.sb_datalen / 4) && win < (long)tp->t_maxseg)
  362: 		win = 0;
  363: 	if (win > (long)TCP_MAXWIN << tp->rcv_scale)
  364: 		win = (long)TCP_MAXWIN << tp->rcv_scale;
  365: 	if (win < (long)(tp->rcv_adv - tp->rcv_nxt))
  366: 		win = (long)(tp->rcv_adv - tp->rcv_nxt);
  367: 	ti->ti_win = htons((u_int16_t) (win>>tp->rcv_scale));
  368: 
  369: 	if (SEQ_GT(tp->snd_up, tp->snd_una)) {
  370: 		ti->ti_urp = htons((u_int16_t)(tp->snd_up - ntohl(ti->ti_seq)));
  371: 		ti->ti_flags |= TH_URG;
  372: 	} else
  373: 		/*
  374: 		 * If no urgent pointer to send, then we pull
  375: 		 * the urgent pointer to the left edge of the send window
  376: 		 * so that it doesn't drift into the send window on sequence
  377: 		 * number wraparound.
  378: 		 */
  379: 		tp->snd_up = tp->snd_una;		/* drag it along */
  380: 
  381: 	/*
  382: 	 * Put TCP length in extended header, and then
  383: 	 * checksum extended header and data.
  384: 	 */
  385: 	if (len + optlen)
  386: 		ti->ti_len = htons((u_int16_t)(sizeof (struct tcphdr) +
  387: 		    optlen + len));
  388: 	ti->ti_sum = cksum(m, (int)(hdrlen + len));
  389: 
  390: 	/*
  391: 	 * In transmit state, time the transmission and arrange for
  392: 	 * the retransmit.  In persist state, just set snd_max.
  393: 	 */
  394: 	if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) {
  395: 		tcp_seq startseq = tp->snd_nxt;
  396: 
  397: 		/*
  398: 		 * Advance snd_nxt over sequence space of this segment.
  399: 		 */
  400: 		if (flags & (TH_SYN|TH_FIN)) {
  401: 			if (flags & TH_SYN)
  402: 				tp->snd_nxt++;
  403: 			if (flags & TH_FIN) {
  404: 				tp->snd_nxt++;
  405: 				tp->t_flags |= TF_SENTFIN;
  406: 			}
  407: 		}
  408: 		tp->snd_nxt += len;
  409: 		if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
  410: 			tp->snd_max = tp->snd_nxt;
  411: 			/*
  412: 			 * Time this transmission if not a retransmission and
  413: 			 * not currently timing anything.
  414: 			 */
  415: 			if (tp->t_rtt == 0) {
  416: 				tp->t_rtt = 1;
  417: 				tp->t_rtseq = startseq;
  418: 			}
  419: 		}
  420: 
  421: 		/*
  422: 		 * Set retransmit timer if not currently set,
  423: 		 * and not doing an ack or a keep-alive probe.
  424: 		 * Initial value for retransmit timer is smoothed
  425: 		 * round-trip time + 2 * round-trip time variance.
  426: 		 * Initialize shift counter which is used for backoff
  427: 		 * of retransmit time.
  428: 		 */
  429: 		if (tp->t_timer[TCPT_REXMT] == 0 &&
  430: 		    tp->snd_nxt != tp->snd_una) {
  431: 			tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
  432: 			if (tp->t_timer[TCPT_PERSIST]) {
  433: 				tp->t_timer[TCPT_PERSIST] = 0;
  434: 				tp->t_rxtshift = 0;
  435: 			}
  436: 		}
  437: 	} else
  438: 		if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
  439: 			tp->snd_max = tp->snd_nxt + len;
  440: 
  441: 	/*
  442: 	 * Fill in IP length and desired time to live and
  443: 	 * send to IP level.  There should be a better way
  444: 	 * to handle ttl and tos; we could keep them in
  445: 	 * the template, but need a way to checksum without them.
  446: 	 */
  447: 	m->m_len = hdrlen + len; /* XXX Needed? m_len should be correct */
  448: 
  449:     {
  450: 
  451: 	((struct ip *)ti)->ip_len = m->m_len;
  452: 
  453: 	((struct ip *)ti)->ip_ttl = IPDEFTTL;
  454: 	((struct ip *)ti)->ip_tos = so->so_iptos;
  455: 
  456: 	error = ip_output(so, m);
  457:     }
  458: 	if (error) {
  459: out:
  460: 		return (error);
  461: 	}
  462: 
  463: 	/*
  464: 	 * Data sent (as far as we can tell).
  465: 	 * If this advertises a larger window than any other segment,
  466: 	 * then remember the size of the advertised window.
  467: 	 * Any pending ACK has now been sent.
  468: 	 */
  469: 	if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv))
  470: 		tp->rcv_adv = tp->rcv_nxt + win;
  471: 	tp->last_ack_sent = tp->rcv_nxt;
  472: 	tp->t_flags &= ~(TF_ACKNOW|TF_DELACK);
  473: 	if (sendalot)
  474: 		goto again;
  475: 
  476: 	return (0);
  477: }
  478: 
  479: void
  480: tcp_setpersist(struct tcpcb *tp)
  481: {
  482:     int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
  483: 
  484: 	/*
  485: 	 * Start/restart persistence timer.
  486: 	 */
  487: 	TCPT_RANGESET(tp->t_timer[TCPT_PERSIST],
  488: 	    t * tcp_backoff[tp->t_rxtshift],
  489: 	    TCPTV_PERSMIN, TCPTV_PERSMAX);
  490: 	if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
  491: 		tp->t_rxtshift++;
  492: }

unix.superglobalmegacorp.com