File:  [Qemu by Fabrice Bellard] / qemu / slirp / tcp_output.c
Revision 1.1.1.6 (vendor branch): download - view: text, annotated - select for diffs
Tue Apr 24 19:51:36 2018 UTC (3 years, 4 months ago) by root
Branches: qemu, MAIN
CVS tags: qemu1101, HEAD
qemu 1.1.1

    1: /*
    2:  * Copyright (c) 1982, 1986, 1988, 1990, 1993
    3:  *	The Regents of the University of California.  All rights reserved.
    4:  *
    5:  * Redistribution and use in source and binary forms, with or without
    6:  * modification, are permitted provided that the following conditions
    7:  * are met:
    8:  * 1. Redistributions of source code must retain the above copyright
    9:  *    notice, this list of conditions and the following disclaimer.
   10:  * 2. Redistributions in binary form must reproduce the above copyright
   11:  *    notice, this list of conditions and the following disclaimer in the
   12:  *    documentation and/or other materials provided with the distribution.
   13:  * 3. Neither the name of the University nor the names of its contributors
   14:  *    may be used to endorse or promote products derived from this software
   15:  *    without specific prior written permission.
   16:  *
   17:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   18:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   21:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27:  * SUCH DAMAGE.
   28:  *
   29:  *	@(#)tcp_output.c	8.3 (Berkeley) 12/30/93
   30:  * tcp_output.c,v 1.3 1994/09/15 10:36:55 davidg Exp
   31:  */
   32: 
   33: /*
   34:  * Changes and additions relating to SLiRP
   35:  * Copyright (c) 1995 Danny Gasparovski.
   36:  *
   37:  * Please read the file COPYRIGHT for the
   38:  * terms and conditions of the copyright.
   39:  */
   40: 
   41: #include <slirp.h>
   42: 
   43: static const u_char  tcp_outflags[TCP_NSTATES] = {
   44: 	TH_RST|TH_ACK, 0,      TH_SYN,        TH_SYN|TH_ACK,
   45: 	TH_ACK,        TH_ACK, TH_FIN|TH_ACK, TH_FIN|TH_ACK,
   46: 	TH_FIN|TH_ACK, TH_ACK, TH_ACK,
   47: };
   48: 
   49: 
   50: #undef MAX_TCPOPTLEN
   51: #define MAX_TCPOPTLEN	32	/* max # bytes that go in options */
   52: 
   53: /*
   54:  * Tcp output routine: figure out what should be sent and send it.
   55:  */
   56: int
   57: tcp_output(struct tcpcb *tp)
   58: {
   59: 	register struct socket *so = tp->t_socket;
   60: 	register long len, win;
   61: 	int off, flags, error;
   62: 	register struct mbuf *m;
   63: 	register struct tcpiphdr *ti;
   64: 	u_char opt[MAX_TCPOPTLEN];
   65: 	unsigned optlen, hdrlen;
   66: 	int idle, sendalot;
   67: 
   68: 	DEBUG_CALL("tcp_output");
   69: 	DEBUG_ARG("tp = %lx", (long )tp);
   70: 
   71: 	/*
   72: 	 * Determine length of data that should be transmitted,
   73: 	 * and flags that will be used.
   74: 	 * If there is some data or critical controls (SYN, RST)
   75: 	 * to send, then transmit; otherwise, investigate further.
   76: 	 */
   77: 	idle = (tp->snd_max == tp->snd_una);
   78: 	if (idle && tp->t_idle >= tp->t_rxtcur)
   79: 		/*
   80: 		 * We have been idle for "a while" and no acks are
   81: 		 * expected to clock out any data we send --
   82: 		 * slow start to get ack "clock" running again.
   83: 		 */
   84: 		tp->snd_cwnd = tp->t_maxseg;
   85: again:
   86: 	sendalot = 0;
   87: 	off = tp->snd_nxt - tp->snd_una;
   88: 	win = min(tp->snd_wnd, tp->snd_cwnd);
   89: 
   90: 	flags = tcp_outflags[tp->t_state];
   91: 
   92: 	DEBUG_MISC((dfd, " --- tcp_output flags = 0x%x\n",flags));
   93: 
   94: 	/*
   95: 	 * If in persist timeout with window of 0, send 1 byte.
   96: 	 * Otherwise, if window is small but nonzero
   97: 	 * and timer expired, we will send what we can
   98: 	 * and go to transmit state.
   99: 	 */
  100: 	if (tp->t_force) {
  101: 		if (win == 0) {
  102: 			/*
  103: 			 * If we still have some data to send, then
  104: 			 * clear the FIN bit.  Usually this would
  105: 			 * happen below when it realizes that we
  106: 			 * aren't sending all the data.  However,
  107: 			 * if we have exactly 1 byte of unset data,
  108: 			 * then it won't clear the FIN bit below,
  109: 			 * and if we are in persist state, we wind
  110: 			 * up sending the packet without recording
  111: 			 * that we sent the FIN bit.
  112: 			 *
  113: 			 * We can't just blindly clear the FIN bit,
  114: 			 * because if we don't have any more data
  115: 			 * to send then the probe will be the FIN
  116: 			 * itself.
  117: 			 */
  118: 			if (off < so->so_snd.sb_cc)
  119: 				flags &= ~TH_FIN;
  120: 			win = 1;
  121: 		} else {
  122: 			tp->t_timer[TCPT_PERSIST] = 0;
  123: 			tp->t_rxtshift = 0;
  124: 		}
  125: 	}
  126: 
  127: 	len = min(so->so_snd.sb_cc, win) - off;
  128: 
  129: 	if (len < 0) {
  130: 		/*
  131: 		 * If FIN has been sent but not acked,
  132: 		 * but we haven't been called to retransmit,
  133: 		 * len will be -1.  Otherwise, window shrank
  134: 		 * after we sent into it.  If window shrank to 0,
  135: 		 * cancel pending retransmit and pull snd_nxt
  136: 		 * back to (closed) window.  We will enter persist
  137: 		 * state below.  If the window didn't close completely,
  138: 		 * just wait for an ACK.
  139: 		 */
  140: 		len = 0;
  141: 		if (win == 0) {
  142: 			tp->t_timer[TCPT_REXMT] = 0;
  143: 			tp->snd_nxt = tp->snd_una;
  144: 		}
  145: 	}
  146: 
  147: 	if (len > tp->t_maxseg) {
  148: 		len = tp->t_maxseg;
  149: 		sendalot = 1;
  150: 	}
  151: 	if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc))
  152: 		flags &= ~TH_FIN;
  153: 
  154: 	win = sbspace(&so->so_rcv);
  155: 
  156: 	/*
  157: 	 * Sender silly window avoidance.  If connection is idle
  158: 	 * and can send all data, a maximum segment,
  159: 	 * at least a maximum default-size segment do it,
  160: 	 * or are forced, do it; otherwise don't bother.
  161: 	 * If peer's buffer is tiny, then send
  162: 	 * when window is at least half open.
  163: 	 * If retransmitting (possibly after persist timer forced us
  164: 	 * to send into a small window), then must resend.
  165: 	 */
  166: 	if (len) {
  167: 		if (len == tp->t_maxseg)
  168: 			goto send;
  169: 		if ((1 || idle || tp->t_flags & TF_NODELAY) &&
  170: 		    len + off >= so->so_snd.sb_cc)
  171: 			goto send;
  172: 		if (tp->t_force)
  173: 			goto send;
  174: 		if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0)
  175: 			goto send;
  176: 		if (SEQ_LT(tp->snd_nxt, tp->snd_max))
  177: 			goto send;
  178: 	}
  179: 
  180: 	/*
  181: 	 * Compare available window to amount of window
  182: 	 * known to peer (as advertised window less
  183: 	 * next expected input).  If the difference is at least two
  184: 	 * max size segments, or at least 50% of the maximum possible
  185: 	 * window, then want to send a window update to peer.
  186: 	 */
  187: 	if (win > 0) {
  188: 		/*
  189: 		 * "adv" is the amount we can increase the window,
  190: 		 * taking into account that we are limited by
  191: 		 * TCP_MAXWIN << tp->rcv_scale.
  192: 		 */
  193: 		long adv = min(win, (long)TCP_MAXWIN << tp->rcv_scale) -
  194: 			(tp->rcv_adv - tp->rcv_nxt);
  195: 
  196: 		if (adv >= (long) (2 * tp->t_maxseg))
  197: 			goto send;
  198: 		if (2 * adv >= (long) so->so_rcv.sb_datalen)
  199: 			goto send;
  200: 	}
  201: 
  202: 	/*
  203: 	 * Send if we owe peer an ACK.
  204: 	 */
  205: 	if (tp->t_flags & TF_ACKNOW)
  206: 		goto send;
  207: 	if (flags & (TH_SYN|TH_RST))
  208: 		goto send;
  209: 	if (SEQ_GT(tp->snd_up, tp->snd_una))
  210: 		goto send;
  211: 	/*
  212: 	 * If our state indicates that FIN should be sent
  213: 	 * and we have not yet done so, or we're retransmitting the FIN,
  214: 	 * then we need to send.
  215: 	 */
  216: 	if (flags & TH_FIN &&
  217: 	    ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
  218: 		goto send;
  219: 
  220: 	/*
  221: 	 * TCP window updates are not reliable, rather a polling protocol
  222: 	 * using ``persist'' packets is used to insure receipt of window
  223: 	 * updates.  The three ``states'' for the output side are:
  224: 	 *	idle			not doing retransmits or persists
  225: 	 *	persisting		to move a small or zero window
  226: 	 *	(re)transmitting	and thereby not persisting
  227: 	 *
  228: 	 * tp->t_timer[TCPT_PERSIST]
  229: 	 *	is set when we are in persist state.
  230: 	 * tp->t_force
  231: 	 *	is set when we are called to send a persist packet.
  232: 	 * tp->t_timer[TCPT_REXMT]
  233: 	 *	is set when we are retransmitting
  234: 	 * The output side is idle when both timers are zero.
  235: 	 *
  236: 	 * If send window is too small, there is data to transmit, and no
  237: 	 * retransmit or persist is pending, then go to persist state.
  238: 	 * If nothing happens soon, send when timer expires:
  239: 	 * if window is nonzero, transmit what we can,
  240: 	 * otherwise force out a byte.
  241: 	 */
  242: 	if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 &&
  243: 	    tp->t_timer[TCPT_PERSIST] == 0) {
  244: 		tp->t_rxtshift = 0;
  245: 		tcp_setpersist(tp);
  246: 	}
  247: 
  248: 	/*
  249: 	 * No reason to send a segment, just return.
  250: 	 */
  251: 	return (0);
  252: 
  253: send:
  254: 	/*
  255: 	 * Before ESTABLISHED, force sending of initial options
  256: 	 * unless TCP set not to do any options.
  257: 	 * NOTE: we assume that the IP/TCP header plus TCP options
  258: 	 * always fit in a single mbuf, leaving room for a maximum
  259: 	 * link header, i.e.
  260: 	 *	max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN
  261: 	 */
  262: 	optlen = 0;
  263: 	hdrlen = sizeof (struct tcpiphdr);
  264: 	if (flags & TH_SYN) {
  265: 		tp->snd_nxt = tp->iss;
  266: 		if ((tp->t_flags & TF_NOOPT) == 0) {
  267: 			uint16_t mss;
  268: 
  269: 			opt[0] = TCPOPT_MAXSEG;
  270: 			opt[1] = 4;
  271: 			mss = htons((uint16_t) tcp_mss(tp, 0));
  272: 			memcpy((caddr_t)(opt + 2), (caddr_t)&mss, sizeof(mss));
  273: 			optlen = 4;
  274: 		}
  275:  	}
  276: 
  277:  	hdrlen += optlen;
  278: 
  279: 	/*
  280: 	 * Adjust data length if insertion of options will
  281: 	 * bump the packet length beyond the t_maxseg length.
  282: 	 */
  283: 	 if (len > tp->t_maxseg - optlen) {
  284: 		len = tp->t_maxseg - optlen;
  285: 		sendalot = 1;
  286: 	 }
  287: 
  288: 	/*
  289: 	 * Grab a header mbuf, attaching a copy of data to
  290: 	 * be transmitted, and initialize the header from
  291: 	 * the template for sends on this connection.
  292: 	 */
  293: 	if (len) {
  294: 		m = m_get(so->slirp);
  295: 		if (m == NULL) {
  296: 			error = 1;
  297: 			goto out;
  298: 		}
  299: 		m->m_data += IF_MAXLINKHDR;
  300: 		m->m_len = hdrlen;
  301: 
  302: 		sbcopy(&so->so_snd, off, (int) len, mtod(m, caddr_t) + hdrlen);
  303: 		m->m_len += len;
  304: 
  305: 		/*
  306: 		 * If we're sending everything we've got, set PUSH.
  307: 		 * (This will keep happy those implementations which only
  308: 		 * give data to the user when a buffer fills or
  309: 		 * a PUSH comes in.)
  310: 		 */
  311: 		if (off + len == so->so_snd.sb_cc)
  312: 			flags |= TH_PUSH;
  313: 	} else {
  314: 		m = m_get(so->slirp);
  315: 		if (m == NULL) {
  316: 			error = 1;
  317: 			goto out;
  318: 		}
  319: 		m->m_data += IF_MAXLINKHDR;
  320: 		m->m_len = hdrlen;
  321: 	}
  322: 
  323: 	ti = mtod(m, struct tcpiphdr *);
  324: 
  325: 	memcpy((caddr_t)ti, &tp->t_template, sizeof (struct tcpiphdr));
  326: 
  327: 	/*
  328: 	 * Fill in fields, remembering maximum advertised
  329: 	 * window for use in delaying messages about window sizes.
  330: 	 * If resending a FIN, be sure not to use a new sequence number.
  331: 	 */
  332: 	if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
  333: 	    tp->snd_nxt == tp->snd_max)
  334: 		tp->snd_nxt--;
  335: 	/*
  336: 	 * If we are doing retransmissions, then snd_nxt will
  337: 	 * not reflect the first unsent octet.  For ACK only
  338: 	 * packets, we do not want the sequence number of the
  339: 	 * retransmitted packet, we want the sequence number
  340: 	 * of the next unsent octet.  So, if there is no data
  341: 	 * (and no SYN or FIN), use snd_max instead of snd_nxt
  342: 	 * when filling in ti_seq.  But if we are in persist
  343: 	 * state, snd_max might reflect one byte beyond the
  344: 	 * right edge of the window, so use snd_nxt in that
  345: 	 * case, since we know we aren't doing a retransmission.
  346: 	 * (retransmit and persist are mutually exclusive...)
  347: 	 */
  348: 	if (len || (flags & (TH_SYN|TH_FIN)) || tp->t_timer[TCPT_PERSIST])
  349: 		ti->ti_seq = htonl(tp->snd_nxt);
  350: 	else
  351: 		ti->ti_seq = htonl(tp->snd_max);
  352: 	ti->ti_ack = htonl(tp->rcv_nxt);
  353: 	if (optlen) {
  354: 		memcpy((caddr_t)(ti + 1), (caddr_t)opt, optlen);
  355: 		ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2;
  356: 	}
  357: 	ti->ti_flags = flags;
  358: 	/*
  359: 	 * Calculate receive window.  Don't shrink window,
  360: 	 * but avoid silly window syndrome.
  361: 	 */
  362: 	if (win < (long)(so->so_rcv.sb_datalen / 4) && win < (long)tp->t_maxseg)
  363: 		win = 0;
  364: 	if (win > (long)TCP_MAXWIN << tp->rcv_scale)
  365: 		win = (long)TCP_MAXWIN << tp->rcv_scale;
  366: 	if (win < (long)(tp->rcv_adv - tp->rcv_nxt))
  367: 		win = (long)(tp->rcv_adv - tp->rcv_nxt);
  368: 	ti->ti_win = htons((uint16_t) (win>>tp->rcv_scale));
  369: 
  370: 	if (SEQ_GT(tp->snd_up, tp->snd_una)) {
  371: 		ti->ti_urp = htons((uint16_t)(tp->snd_up - ntohl(ti->ti_seq)));
  372: 		ti->ti_flags |= TH_URG;
  373: 	} else
  374: 		/*
  375: 		 * If no urgent pointer to send, then we pull
  376: 		 * the urgent pointer to the left edge of the send window
  377: 		 * so that it doesn't drift into the send window on sequence
  378: 		 * number wraparound.
  379: 		 */
  380: 		tp->snd_up = tp->snd_una;		/* drag it along */
  381: 
  382: 	/*
  383: 	 * Put TCP length in extended header, and then
  384: 	 * checksum extended header and data.
  385: 	 */
  386: 	if (len + optlen)
  387: 		ti->ti_len = htons((uint16_t)(sizeof (struct tcphdr) +
  388: 		    optlen + len));
  389: 	ti->ti_sum = cksum(m, (int)(hdrlen + len));
  390: 
  391: 	/*
  392: 	 * In transmit state, time the transmission and arrange for
  393: 	 * the retransmit.  In persist state, just set snd_max.
  394: 	 */
  395: 	if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) {
  396: 		tcp_seq startseq = tp->snd_nxt;
  397: 
  398: 		/*
  399: 		 * Advance snd_nxt over sequence space of this segment.
  400: 		 */
  401: 		if (flags & (TH_SYN|TH_FIN)) {
  402: 			if (flags & TH_SYN)
  403: 				tp->snd_nxt++;
  404: 			if (flags & TH_FIN) {
  405: 				tp->snd_nxt++;
  406: 				tp->t_flags |= TF_SENTFIN;
  407: 			}
  408: 		}
  409: 		tp->snd_nxt += len;
  410: 		if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
  411: 			tp->snd_max = tp->snd_nxt;
  412: 			/*
  413: 			 * Time this transmission if not a retransmission and
  414: 			 * not currently timing anything.
  415: 			 */
  416: 			if (tp->t_rtt == 0) {
  417: 				tp->t_rtt = 1;
  418: 				tp->t_rtseq = startseq;
  419: 			}
  420: 		}
  421: 
  422: 		/*
  423: 		 * Set retransmit timer if not currently set,
  424: 		 * and not doing an ack or a keep-alive probe.
  425: 		 * Initial value for retransmit timer is smoothed
  426: 		 * round-trip time + 2 * round-trip time variance.
  427: 		 * Initialize shift counter which is used for backoff
  428: 		 * of retransmit time.
  429: 		 */
  430: 		if (tp->t_timer[TCPT_REXMT] == 0 &&
  431: 		    tp->snd_nxt != tp->snd_una) {
  432: 			tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
  433: 			if (tp->t_timer[TCPT_PERSIST]) {
  434: 				tp->t_timer[TCPT_PERSIST] = 0;
  435: 				tp->t_rxtshift = 0;
  436: 			}
  437: 		}
  438: 	} else
  439: 		if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
  440: 			tp->snd_max = tp->snd_nxt + len;
  441: 
  442: 	/*
  443: 	 * Fill in IP length and desired time to live and
  444: 	 * send to IP level.  There should be a better way
  445: 	 * to handle ttl and tos; we could keep them in
  446: 	 * the template, but need a way to checksum without them.
  447: 	 */
  448: 	m->m_len = hdrlen + len; /* XXX Needed? m_len should be correct */
  449: 
  450:     {
  451: 
  452: 	((struct ip *)ti)->ip_len = m->m_len;
  453: 
  454: 	((struct ip *)ti)->ip_ttl = IPDEFTTL;
  455: 	((struct ip *)ti)->ip_tos = so->so_iptos;
  456: 
  457: 	error = ip_output(so, m);
  458:     }
  459: 	if (error) {
  460: out:
  461: 		return (error);
  462: 	}
  463: 
  464: 	/*
  465: 	 * Data sent (as far as we can tell).
  466: 	 * If this advertises a larger window than any other segment,
  467: 	 * then remember the size of the advertised window.
  468: 	 * Any pending ACK has now been sent.
  469: 	 */
  470: 	if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv))
  471: 		tp->rcv_adv = tp->rcv_nxt + win;
  472: 	tp->last_ack_sent = tp->rcv_nxt;
  473: 	tp->t_flags &= ~(TF_ACKNOW|TF_DELACK);
  474: 	if (sendalot)
  475: 		goto again;
  476: 
  477: 	return (0);
  478: }
  479: 
  480: void
  481: tcp_setpersist(struct tcpcb *tp)
  482: {
  483:     int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
  484: 
  485: 	/*
  486: 	 * Start/restart persistence timer.
  487: 	 */
  488: 	TCPT_RANGESET(tp->t_timer[TCPT_PERSIST],
  489: 	    t * tcp_backoff[tp->t_rxtshift],
  490: 	    TCPTV_PERSMIN, TCPTV_PERSMAX);
  491: 	if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
  492: 		tp->t_rxtshift++;
  493: }

unix.superglobalmegacorp.com