|
|
1.1 root 1: /*
2: * Copyright (c) 1982, 1986, 1988, 1990 Regents of the University of California.
3: * All rights reserved.
4: *
5: * Redistribution is only permitted until one year after the first shipment
6: * of 4.4BSD by the Regents. Otherwise, redistribution and use in source and
7: * binary forms are permitted provided that: (1) source distributions retain
8: * this entire copyright notice and comment, and (2) distributions including
9: * binaries display the following acknowledgement: This product includes
10: * software developed by the University of California, Berkeley and its
11: * contributors'' in the documentation or other materials provided with the
12: * distribution and in all advertising materials mentioning features or use
13: * of this software. Neither the name of the University nor the names of
14: * its contributors may be used to endorse or promote products derived from
15: * this software without specific prior written permission.
16: * THIS SOFTWARE IS PROVIDED AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
17: * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
18: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
19: *
20: * @(#)tcp_output.c 7.21 (Berkeley) 6/28/90
21: */
22:
23: #include "param.h"
24: #include "systm.h"
25: #include "malloc.h"
26: #include "mbuf.h"
27: #include "protosw.h"
28: #include "socket.h"
29: #include "socketvar.h"
30: #include "errno.h"
31:
32: #include "../net/route.h"
33:
34: #include "in.h"
35: #include "in_systm.h"
36: #include "ip.h"
37: #include "in_pcb.h"
38: #include "ip_var.h"
39: #include "tcp.h"
40: #define TCPOUTFLAGS
41: #include "tcp_fsm.h"
42: #include "tcp_seq.h"
43: #include "tcp_timer.h"
44: #include "tcp_var.h"
45: #include "tcpip.h"
46: #include "tcp_debug.h"
47:
48: #ifdef notyet
49: extern struct mbuf *m_copypack();
50: #endif
51:
52: /*
53: * Initial options.
54: */
55: u_char tcp_initopt[4] = { TCPOPT_MAXSEG, 4, 0x0, 0x0, };
56:
57: /*
58: * Tcp output routine: figure out what should be sent and send it.
59: */
60: tcp_output(tp)
61: register struct tcpcb *tp;
62: {
63: register struct socket *so = tp->t_inpcb->inp_socket;
64: register long len, win;
65: int off, flags, error;
66: register struct mbuf *m;
67: register struct tcpiphdr *ti;
68: u_char *opt;
69: unsigned optlen, hdrlen;
70: int idle, sendalot;
71:
72: /*
73: * Determine length of data that should be transmitted,
74: * and flags that will be used.
75: * If there is some data or critical controls (SYN, RST)
76: * to send, then transmit; otherwise, investigate further.
77: */
78: idle = (tp->snd_max == tp->snd_una);
79: if (idle && tp->t_idle >= tp->t_rxtcur)
80: /*
81: * We have been idle for "a while" and no acks are
82: * expected to clock out any data we send --
83: * slow start to get ack "clock" running again.
84: */
85: tp->snd_cwnd = tp->t_maxseg;
86: again:
87: sendalot = 0;
88: off = tp->snd_nxt - tp->snd_una;
89: win = min(tp->snd_wnd, tp->snd_cwnd);
90:
91: /*
92: * If in persist timeout with window of 0, send 1 byte.
93: * Otherwise, if window is small but nonzero
94: * and timer expired, we will send what we can
95: * and go to transmit state.
96: */
97: if (tp->t_force) {
98: if (win == 0)
99: win = 1;
100: else {
101: tp->t_timer[TCPT_PERSIST] = 0;
102: tp->t_rxtshift = 0;
103: }
104: }
105:
106: flags = tcp_outflags[tp->t_state];
107: len = min(so->so_snd.sb_cc, win) - off;
108:
109: if (len < 0) {
110: /*
111: * If FIN has been sent but not acked,
112: * but we haven't been called to retransmit,
113: * len will be -1. Otherwise, window shrank
114: * after we sent into it. If window shrank to 0,
115: * cancel pending retransmit and pull snd_nxt
116: * back to (closed) window. We will enter persist
117: * state below. If the window didn't close completely,
118: * just wait for an ACK.
119: */
120: len = 0;
121: if (win == 0) {
122: tp->t_timer[TCPT_REXMT] = 0;
123: tp->snd_nxt = tp->snd_una;
124: }
125: }
126: if (len > tp->t_maxseg) {
127: len = tp->t_maxseg;
128: sendalot = 1;
129: }
130: if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc))
131: flags &= ~TH_FIN;
132:
133: win = sbspace(&so->so_rcv);
134:
135: /*
136: * Sender silly window avoidance. If connection is idle
137: * and can send all data, a maximum segment,
138: * at least a maximum default-size segment do it,
139: * or are forced, do it; otherwise don't bother.
140: * If peer's buffer is tiny, then send
141: * when window is at least half open.
142: * If retransmitting (possibly after persist timer forced us
143: * to send into a small window), then must resend.
144: */
145: if (len) {
146: if (len == tp->t_maxseg)
147: goto send;
148: if ((idle || tp->t_flags & TF_NODELAY) &&
149: len + off >= so->so_snd.sb_cc)
150: goto send;
151: if (tp->t_force)
152: goto send;
153: if (len >= tp->max_sndwnd / 2)
154: goto send;
155: if (SEQ_LT(tp->snd_nxt, tp->snd_max))
156: goto send;
157: }
158:
159: /*
160: * Compare available window to amount of window
161: * known to peer (as advertised window less
162: * next expected input). If the difference is at least two
163: * max size segments, or at least 50% of the maximum possible
164: * window, then want to send a window update to peer.
165: */
166: if (win > 0) {
167: int adv = win - (tp->rcv_adv - tp->rcv_nxt);
168:
169: /* this was: XXX
170: * if (so->so_rcv.sb_cc == 0 && adv >= 2 * tp->t_maxseg)
171: */
172: if (adv >= 2 * tp->t_maxseg)
173: goto send;
174: if (2 * adv >= so->so_rcv.sb_hiwat)
175: goto send;
176: }
177:
178: /*
179: * Send if we owe peer an ACK.
180: */
181: if (tp->t_flags & TF_ACKNOW)
182: goto send;
183: if (flags & (TH_SYN|TH_RST))
184: goto send;
185: if (SEQ_GT(tp->snd_up, tp->snd_una))
186: goto send;
187: /*
188: * If our state indicates that FIN should be sent
189: * and we have not yet done so, or we're retransmitting the FIN,
190: * then we need to send.
191: */
192: if (flags & TH_FIN &&
193: ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
194: goto send;
195:
196: /*
197: * TCP window updates are not reliable, rather a polling protocol
198: * using ``persist'' packets is used to insure receipt of window
199: * updates. The three ``states'' for the output side are:
200: * idle not doing retransmits or persists
201: * persisting to move a small or zero window
202: * (re)transmitting and thereby not persisting
203: *
204: * tp->t_timer[TCPT_PERSIST]
205: * is set when we are in persist state.
206: * tp->t_force
207: * is set when we are called to send a persist packet.
208: * tp->t_timer[TCPT_REXMT]
209: * is set when we are retransmitting
210: * The output side is idle when both timers are zero.
211: *
212: * If send window is too small, there is data to transmit, and no
213: * retransmit or persist is pending, then go to persist state.
214: * If nothing happens soon, send when timer expires:
215: * if window is nonzero, transmit what we can,
216: * otherwise force out a byte.
217: */
218: if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 &&
219: tp->t_timer[TCPT_PERSIST] == 0) {
220: tp->t_rxtshift = 0;
221: tcp_setpersist(tp);
222: }
223:
224: /*
225: * No reason to send a segment, just return.
226: */
227: return (0);
228:
229: send:
230: /*
231: * Before ESTABLISHED, force sending of initial options
232: * unless TCP set not to do any options.
233: * NOTE: we assume that the IP/TCP header plus TCP options
234: * always fit in a single mbuf, leaving room for a maximum
235: * link header, i.e.
236: * max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN
237: */
238: optlen = 0;
239: hdrlen = sizeof (struct tcpiphdr);
240: if (flags & TH_SYN && (tp->t_flags & TF_NOOPT) == 0) {
241: opt = tcp_initopt;
242: optlen = sizeof (tcp_initopt);
243: hdrlen += sizeof (tcp_initopt);
244: *(u_short *)(opt + 2) = htons((u_short) tcp_mss(tp, 0));
245: #ifdef DIAGNOSTIC
246: if (max_linkhdr + hdrlen > MHLEN)
247: panic("tcphdr too big");
248: #endif
249: }
250:
251: /*
252: * Grab a header mbuf, attaching a copy of data to
253: * be transmitted, and initialize the header from
254: * the template for sends on this connection.
255: */
256: if (len) {
257: if (tp->t_force && len == 1)
258: tcpstat.tcps_sndprobe++;
259: else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
260: tcpstat.tcps_sndrexmitpack++;
261: tcpstat.tcps_sndrexmitbyte += len;
262: } else {
263: tcpstat.tcps_sndpack++;
264: tcpstat.tcps_sndbyte += len;
265: }
266: #ifdef notyet
267: if ((m = m_copypack(so->so_snd.sb_mb, off,
268: (int)len, max_linkhdr + hdrlen)) == 0) {
269: error = ENOBUFS;
270: goto out;
271: }
272: /*
273: * m_copypack left space for our hdr; use it.
274: */
275: m->m_len += hdrlen;
276: m->m_data -= hdrlen;
277: #else
278: MGETHDR(m, M_DONTWAIT, MT_HEADER);
279: if (m == NULL) {
280: error = ENOBUFS;
281: goto out;
282: }
283: m->m_data += max_linkhdr;
284: m->m_len = hdrlen;
285: if (len <= MHLEN - hdrlen - max_linkhdr) {
286: m_copydata(so->so_snd.sb_mb, off, (int) len,
287: mtod(m, caddr_t) + hdrlen);
288: m->m_len += len;
289: } else {
290: m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len);
291: if (m->m_next == 0)
292: len = 0;
293: }
294: #endif
295: /*
296: * If we're sending everything we've got, set PUSH.
297: * (This will keep happy those implementations which only
298: * give data to the user when a buffer fills or
299: * a PUSH comes in.)
300: */
301: if (off + len == so->so_snd.sb_cc)
302: flags |= TH_PUSH;
303: } else {
304: if (tp->t_flags & TF_ACKNOW)
305: tcpstat.tcps_sndacks++;
306: else if (flags & (TH_SYN|TH_FIN|TH_RST))
307: tcpstat.tcps_sndctrl++;
308: else if (SEQ_GT(tp->snd_up, tp->snd_una))
309: tcpstat.tcps_sndurg++;
310: else
311: tcpstat.tcps_sndwinup++;
312:
313: MGETHDR(m, M_DONTWAIT, MT_HEADER);
314: if (m == NULL) {
315: error = ENOBUFS;
316: goto out;
317: }
318: m->m_data += max_linkhdr;
319: m->m_len = hdrlen;
320: }
321: m->m_pkthdr.rcvif = (struct ifnet *)0;
322: ti = mtod(m, struct tcpiphdr *);
323: if (tp->t_template == 0)
324: panic("tcp_output");
325: bcopy((caddr_t)tp->t_template, (caddr_t)ti, sizeof (struct tcpiphdr));
326:
327: /*
328: * Fill in fields, remembering maximum advertised
329: * window for use in delaying messages about window sizes.
330: * If resending a FIN, be sure not to use a new sequence number.
331: */
332: if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
333: tp->snd_nxt == tp->snd_max)
334: tp->snd_nxt--;
335: ti->ti_seq = htonl(tp->snd_nxt);
336: ti->ti_ack = htonl(tp->rcv_nxt);
337: if (optlen) {
338: bcopy((caddr_t)opt, (caddr_t)(ti + 1), optlen);
339: ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2;
340: }
341: ti->ti_flags = flags;
342: /*
343: * Calculate receive window. Don't shrink window,
344: * but avoid silly window syndrome.
345: */
346: if (win < (long)(so->so_rcv.sb_hiwat / 4) && win < (long)tp->t_maxseg)
347: win = 0;
348: if (win > TCP_MAXWIN)
349: win = TCP_MAXWIN;
350: if (win < (long)(tp->rcv_adv - tp->rcv_nxt))
351: win = (long)(tp->rcv_adv - tp->rcv_nxt);
352: ti->ti_win = htons((u_short)win);
353: if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
354: ti->ti_urp = htons((u_short)(tp->snd_up - tp->snd_nxt));
355: ti->ti_flags |= TH_URG;
356: } else
357: /*
358: * If no urgent pointer to send, then we pull
359: * the urgent pointer to the left edge of the send window
360: * so that it doesn't drift into the send window on sequence
361: * number wraparound.
362: */
363: tp->snd_up = tp->snd_una; /* drag it along */
364:
365: /*
366: * Put TCP length in extended header, and then
367: * checksum extended header and data.
368: */
369: if (len + optlen)
370: ti->ti_len = htons((u_short)(sizeof (struct tcphdr) +
371: optlen + len));
372: ti->ti_sum = in_cksum(m, (int)(hdrlen + len));
373:
374: /*
375: * In transmit state, time the transmission and arrange for
376: * the retransmit. In persist state, just set snd_max.
377: */
378: if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) {
379: tcp_seq startseq = tp->snd_nxt;
380:
381: /*
382: * Advance snd_nxt over sequence space of this segment.
383: */
384: if (flags & (TH_SYN|TH_FIN)) {
385: if (flags & TH_SYN)
386: tp->snd_nxt++;
387: if (flags & TH_FIN) {
388: tp->snd_nxt++;
389: tp->t_flags |= TF_SENTFIN;
390: }
391: }
392: tp->snd_nxt += len;
393: if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
394: tp->snd_max = tp->snd_nxt;
395: /*
396: * Time this transmission if not a retransmission and
397: * not currently timing anything.
398: */
399: if (tp->t_rtt == 0) {
400: tp->t_rtt = 1;
401: tp->t_rtseq = startseq;
402: tcpstat.tcps_segstimed++;
403: }
404: }
405:
406: /*
407: * Set retransmit timer if not currently set,
408: * and not doing an ack or a keep-alive probe.
409: * Initial value for retransmit timer is smoothed
410: * round-trip time + 2 * round-trip time variance.
411: * Initialize shift counter which is used for backoff
412: * of retransmit time.
413: */
414: if (tp->t_timer[TCPT_REXMT] == 0 &&
415: tp->snd_nxt != tp->snd_una) {
416: tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
417: if (tp->t_timer[TCPT_PERSIST]) {
418: tp->t_timer[TCPT_PERSIST] = 0;
419: tp->t_rxtshift = 0;
420: }
421: }
422: } else
423: if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
424: tp->snd_max = tp->snd_nxt + len;
425:
426: /*
427: * Trace.
428: */
429: if (so->so_options & SO_DEBUG)
430: tcp_trace(TA_OUTPUT, tp->t_state, tp, ti, 0);
431:
432: /*
433: * Fill in IP length and desired time to live and
434: * send to IP level. There should be a better way
435: * to handle ttl and tos; we could keep them in
436: * the template, but need a way to checksum without them.
437: */
438: m->m_pkthdr.len = hdrlen + len;
439: ((struct ip *)ti)->ip_len = m->m_pkthdr.len;
440: ((struct ip *)ti)->ip_ttl = tp->t_inpcb->inp_ip.ip_ttl; /* XXX */
441: ((struct ip *)ti)->ip_tos = tp->t_inpcb->inp_ip.ip_tos; /* XXX */
442: #if BSD >= 43
443: error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
444: so->so_options & SO_DONTROUTE);
445: #else
446: error = ip_output(m, (struct mbuf *)0, &tp->t_inpcb->inp_route,
447: so->so_options & SO_DONTROUTE);
448: #endif
449: if (error) {
450: out:
451: if (error == ENOBUFS) {
452: tcp_quench(tp->t_inpcb);
453: return (0);
454: }
455: if ((error == EHOSTUNREACH || error == ENETDOWN)
456: && TCPS_HAVERCVDSYN(tp->t_state)) {
457: tp->t_softerror = error;
458: return (0);
459: }
460: return (error);
461: }
462: tcpstat.tcps_sndtotal++;
463:
464: /*
465: * Data sent (as far as we can tell).
466: * If this advertises a larger window than any other segment,
467: * then remember the size of the advertised window.
468: * Any pending ACK has now been sent.
469: */
470: if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv))
471: tp->rcv_adv = tp->rcv_nxt + win;
472: tp->t_flags &= ~(TF_ACKNOW|TF_DELACK);
473: if (sendalot)
474: goto again;
475: return (0);
476: }
477:
478: tcp_setpersist(tp)
479: register struct tcpcb *tp;
480: {
481: register t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
482:
483: if (tp->t_timer[TCPT_REXMT])
484: panic("tcp_output REXMT");
485: /*
486: * Start/restart persistance timer.
487: */
488: TCPT_RANGESET(tp->t_timer[TCPT_PERSIST],
489: t * tcp_backoff[tp->t_rxtshift],
490: TCPTV_PERSMIN, TCPTV_PERSMAX);
491: if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
492: tp->t_rxtshift++;
493: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.