|
|
1.1 root 1: /*
2: * Copyright (c) 1982, 1986 Regents of the University of California.
3: * All rights reserved.
4: *
5: * Redistribution and use in source and binary forms are permitted
6: * provided that the above copyright notice and this paragraph are
7: * duplicated in all such forms and that any documentation,
8: * advertising materials, and other materials related to such
9: * distribution and use acknowledge that the software was developed
10: * by the University of California, Berkeley. The name of the
11: * University may not be used to endorse or promote products derived
12: * from this software without specific prior written permission.
13: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14: * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15: * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16: *
17: * @(#)tcp_output.c 7.17 (Berkeley) 6/29/88
18: */
19:
20: #include "param.h"
21: #include "systm.h"
22: #include "mbuf.h"
23: #include "protosw.h"
24: #include "socket.h"
25: #include "socketvar.h"
26: #include "errno.h"
27:
28: #include "../net/route.h"
29:
30: #include "in.h"
31: #include "in_pcb.h"
32: #include "in_systm.h"
33: #include "ip.h"
34: #include "ip_var.h"
35: #include "tcp.h"
36: #define TCPOUTFLAGS
37: #include "tcp_fsm.h"
38: #include "tcp_seq.h"
39: #include "tcp_timer.h"
40: #include "tcp_var.h"
41: #include "tcpip.h"
42: #include "tcp_debug.h"
43:
44: /*
45: * Initial options.
46: */
47: u_char tcp_initopt[4] = { TCPOPT_MAXSEG, 4, 0x0, 0x0, };
48:
49: /*
50: * Tcp output routine: figure out what should be sent and send it.
51: */
52: tcp_output(tp)
53: register struct tcpcb *tp;
54: {
55: register struct socket *so = tp->t_inpcb->inp_socket;
56: register long len, win;
57: struct mbuf *m0;
58: int off, flags, error;
59: register struct mbuf *m;
60: register struct tcpiphdr *ti;
61: u_char *opt;
62: unsigned optlen = 0;
63: int idle, sendalot;
64:
65: /*
66: * Determine length of data that should be transmitted,
67: * and flags that will be used.
68: * If there is some data or critical controls (SYN, RST)
69: * to send, then transmit; otherwise, investigate further.
70: */
71: idle = (tp->snd_max == tp->snd_una);
72: again:
73: sendalot = 0;
74: off = tp->snd_nxt - tp->snd_una;
75: win = MIN(tp->snd_wnd, tp->snd_cwnd);
76:
77: /*
78: * If in persist timeout with window of 0, send 1 byte.
79: * Otherwise, if window is small but nonzero
80: * and timer expired, we will send what we can
81: * and go to transmit state.
82: */
83: if (tp->t_force) {
84: if (win == 0)
85: win = 1;
86: else {
87: tp->t_timer[TCPT_PERSIST] = 0;
88: tp->t_rxtshift = 0;
89: }
90: }
91:
92: len = MIN(so->so_snd.sb_cc, win) - off;
93: flags = tcp_outflags[tp->t_state];
94:
95: if (len < 0) {
96: /*
97: * If FIN has been sent but not acked,
98: * but we haven't been called to retransmit,
99: * len will be -1. Otherwise, window shrank
100: * after we sent into it. If window shrank to 0,
101: * cancel pending retransmit and pull snd_nxt
102: * back to (closed) window. We will enter persist
103: * state below. If the window didn't close completely,
104: * just wait for an ACK.
105: */
106: len = 0;
107: if (win == 0) {
108: tp->t_timer[TCPT_REXMT] = 0;
109: tp->snd_nxt = tp->snd_una;
110: }
111: }
112: if (len > tp->t_maxseg) {
113: len = tp->t_maxseg;
114: sendalot = 1;
115: }
116: if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc))
117: flags &= ~TH_FIN;
118: win = sbspace(&so->so_rcv);
119:
120:
121: /*
122: * If our state indicates that FIN should be sent
123: * and we have not yet done so, or we're retransmitting the FIN,
124: * then we need to send.
125: */
126: if (flags & TH_FIN &&
127: ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
128: goto send;
129: /*
130: * Send if we owe peer an ACK.
131: */
132: if (tp->t_flags & TF_ACKNOW)
133: goto send;
134: if (flags & (TH_SYN|TH_RST))
135: goto send;
136: if (SEQ_GT(tp->snd_up, tp->snd_una))
137: goto send;
138:
139: /*
140: * Sender silly window avoidance. If connection is idle
141: * and can send all data, a maximum segment,
142: * at least a maximum default-size segment do it,
143: * or are forced, do it; otherwise don't bother.
144: * If peer's buffer is tiny, then send
145: * when window is at least half open.
146: * If retransmitting (possibly after persist timer forced us
147: * to send into a small window), then must resend.
148: */
149: if (len) {
150: if (len == tp->t_maxseg)
151: goto send;
152: if ((idle || tp->t_flags & TF_NODELAY) &&
153: len + off >= so->so_snd.sb_cc)
154: goto send;
155: if (tp->t_force)
156: goto send;
157: if (len >= tp->max_sndwnd / 2)
158: goto send;
159: if (SEQ_LT(tp->snd_nxt, tp->snd_max))
160: goto send;
161: }
162:
163: /*
164: * Compare available window to amount of window
165: * known to peer (as advertised window less
166: * next expected input). If the difference is at least two
167: * max size segments or at least 35% of the maximum possible
168: * window, then want to send a window update to peer.
169: */
170: if (win > 0) {
171: int adv = win - (tp->rcv_adv - tp->rcv_nxt);
172:
173: if (so->so_rcv.sb_cc == 0 && adv >= 2 * tp->t_maxseg)
174: goto send;
175: if (100 * adv / so->so_rcv.sb_hiwat >= 35)
176: goto send;
177: }
178:
179: /*
180: * TCP window updates are not reliable, rather a polling protocol
181: * using ``persist'' packets is used to insure receipt of window
182: * updates. The three ``states'' for the output side are:
183: * idle not doing retransmits or persists
184: * persisting to move a small or zero window
185: * (re)transmitting and thereby not persisting
186: *
187: * tp->t_timer[TCPT_PERSIST]
188: * is set when we are in persist state.
189: * tp->t_force
190: * is set when we are called to send a persist packet.
191: * tp->t_timer[TCPT_REXMT]
192: * is set when we are retransmitting
193: * The output side is idle when both timers are zero.
194: *
195: * If send window is too small, there is data to transmit, and no
196: * retransmit or persist is pending, then go to persist state.
197: * If nothing happens soon, send when timer expires:
198: * if window is nonzero, transmit what we can,
199: * otherwise force out a byte.
200: */
201: if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 &&
202: tp->t_timer[TCPT_PERSIST] == 0) {
203: tp->t_rxtshift = 0;
204: tcp_setpersist(tp);
205: }
206:
207: /*
208: * No reason to send a segment, just return.
209: */
210: return (0);
211:
212: send:
213: /*
214: * Grab a header mbuf, attaching a copy of data to
215: * be transmitted, and initialize the header from
216: * the template for sends on this connection.
217: */
218: MGET(m, M_DONTWAIT, MT_HEADER);
219: if (m == NULL)
220: return (ENOBUFS);
221: #define MAXLINKHDR 32 /* belongs elsewhere */
222: #define DATASPACE (MMAXOFF - (MMINOFF + MAXLINKHDR + sizeof (struct tcpiphdr)))
223: m->m_off = MMINOFF + MAXLINKHDR;
224: m->m_len = sizeof (struct tcpiphdr);
225: ti = mtod(m, struct tcpiphdr *);
226: if (len) {
227: if (tp->t_force && len == 1)
228: tcpstat.tcps_sndprobe++;
229: else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
230: tcpstat.tcps_sndrexmitpack++;
231: tcpstat.tcps_sndrexmitbyte += len;
232: } else {
233: tcpstat.tcps_sndpack++;
234: tcpstat.tcps_sndbyte += len;
235: }
236: if (len <= DATASPACE) {
237: m_copydata(so->so_snd.sb_mb, off, (int) len,
238: mtod(m, caddr_t) + sizeof(struct tcpiphdr));
239: m->m_len += len;
240: } else {
241: m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len);
242: if (m->m_next == 0)
243: len = 0;
244: }
245: } else if (tp->t_flags & TF_ACKNOW)
246: tcpstat.tcps_sndacks++;
247: else if (flags & (TH_SYN|TH_FIN|TH_RST))
248: tcpstat.tcps_sndctrl++;
249: else if (SEQ_GT(tp->snd_up, tp->snd_una))
250: tcpstat.tcps_sndurg++;
251: else
252: tcpstat.tcps_sndwinup++;
253:
254: if (tp->t_template == 0)
255: panic("tcp_output");
256: bcopy((caddr_t)tp->t_template, (caddr_t)ti, sizeof (struct tcpiphdr));
257:
258: /*
259: * Fill in fields, remembering maximum advertised
260: * window for use in delaying messages about window sizes.
261: * If resending a FIN, be sure not to use a new sequence number.
262: */
263: if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
264: tp->snd_nxt == tp->snd_max)
265: tp->snd_nxt--;
266: ti->ti_seq = htonl(tp->snd_nxt);
267: ti->ti_ack = htonl(tp->rcv_nxt);
268: /*
269: * Before ESTABLISHED, force sending of initial options
270: * unless TCP set to not do any options.
271: */
272: opt = NULL;
273: if (flags & TH_SYN && (tp->t_flags & TF_NOOPT) == 0) {
274: u_short mss;
275:
276: mss = MIN(so->so_rcv.sb_hiwat / 2, tcp_mss(tp));
277: if (mss > IP_MSS - sizeof(struct tcpiphdr)) {
278: opt = tcp_initopt;
279: optlen = sizeof (tcp_initopt);
280: *(u_short *)(opt + 2) = htons(mss);
281: }
282: }
283: if (opt) {
284: m0 = m->m_next;
285: m->m_next = m_get(M_DONTWAIT, MT_DATA);
286: if (m->m_next == 0) {
287: (void) m_free(m);
288: m_freem(m0);
289: return (ENOBUFS);
290: }
291: m->m_next->m_next = m0;
292: m0 = m->m_next;
293: m0->m_len = optlen;
294: bcopy((caddr_t)opt, mtod(m0, caddr_t), optlen);
295: opt = (u_char *)(mtod(m0, caddr_t) + optlen);
296: while (m0->m_len & 0x3) {
297: *opt++ = TCPOPT_EOL;
298: m0->m_len++;
299: }
300: optlen = m0->m_len;
301: ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2;
302: }
303: ti->ti_flags = flags;
304: /*
305: * Calculate receive window. Don't shrink window,
306: * but avoid silly window syndrome.
307: */
308: if (win < (long)(so->so_rcv.sb_hiwat / 4) && win < (long)tp->t_maxseg)
309: win = 0;
310: if (win > IP_MAXPACKET)
311: win = IP_MAXPACKET;
312: if (win < (long)(tp->rcv_adv - tp->rcv_nxt))
313: win = (long)(tp->rcv_adv - tp->rcv_nxt);
314: ti->ti_win = htons((u_short)win);
315: if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
316: ti->ti_urp = htons((u_short)(tp->snd_up - tp->snd_nxt));
317: ti->ti_flags |= TH_URG;
318: } else
319: /*
320: * If no urgent pointer to send, then we pull
321: * the urgent pointer to the left edge of the send window
322: * so that it doesn't drift into the send window on sequence
323: * number wraparound.
324: */
325: tp->snd_up = tp->snd_una; /* drag it along */
326: /*
327: * If anything to send and we can send it all, set PUSH.
328: * (This will keep happy those implementations which only
329: * give data to the user when a buffer fills or a PUSH comes in.)
330: */
331: if (len && off+len == so->so_snd.sb_cc)
332: ti->ti_flags |= TH_PUSH;
333:
334: /*
335: * Put TCP length in extended header, and then
336: * checksum extended header and data.
337: */
338: if (len + optlen)
339: ti->ti_len = htons((u_short)(sizeof(struct tcphdr) +
340: optlen + len));
341: ti->ti_sum = in_cksum(m,
342: (int)(sizeof (struct tcpiphdr) + (int)optlen + len));
343:
344: /*
345: * In transmit state, time the transmission and arrange for
346: * the retransmit. In persist state, just set snd_max.
347: */
348: if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) {
349: tcp_seq startseq = tp->snd_nxt;
350:
351: /*
352: * Advance snd_nxt over sequence space of this segment.
353: */
354: if (flags & TH_SYN)
355: tp->snd_nxt++;
356: if (flags & TH_FIN) {
357: tp->snd_nxt++;
358: tp->t_flags |= TF_SENTFIN;
359: }
360: tp->snd_nxt += len;
361: if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
362: tp->snd_max = tp->snd_nxt;
363: /*
364: * Time this transmission if not a retransmission and
365: * not currently timing anything.
366: */
367: if (tp->t_rtt == 0) {
368: tp->t_rtt = 1;
369: tp->t_rtseq = startseq;
370: tcpstat.tcps_segstimed++;
371: }
372: }
373:
374: /*
375: * Set retransmit timer if not currently set,
376: * and not doing an ack or a keep-alive probe.
377: * Initial value for retransmit timer is smoothed
378: * round-trip time + 2 * round-trip time variance.
379: * Initialize shift counter which is used for backoff
380: * of retransmit time.
381: */
382: if (tp->t_timer[TCPT_REXMT] == 0 &&
383: tp->snd_nxt != tp->snd_una) {
384: tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
385: if (tp->t_timer[TCPT_PERSIST]) {
386: tp->t_timer[TCPT_PERSIST] = 0;
387: tp->t_rxtshift = 0;
388: }
389: }
390: } else
391: if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
392: tp->snd_max = tp->snd_nxt + len;
393:
394: /*
395: * Trace.
396: */
397: if (so->so_options & SO_DEBUG)
398: tcp_trace(TA_OUTPUT, tp->t_state, tp, ti, 0);
399:
400: /*
401: * Fill in IP length and desired time to live and
402: * send to IP level.
403: */
404: ((struct ip *)ti)->ip_len = sizeof (struct tcpiphdr) + optlen + len;
405: ((struct ip *)ti)->ip_ttl = TCP_TTL;
406: error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
407: so->so_options & SO_DONTROUTE);
408: if (error) {
409: if (error == ENOBUFS) {
410: tcp_quench(tp->t_inpcb);
411: return (0);
412: }
413: return (error);
414: }
415: tcpstat.tcps_sndtotal++;
416:
417: /*
418: * Data sent (as far as we can tell).
419: * If this advertises a larger window than any other segment,
420: * then remember the size of the advertised window.
421: * Any pending ACK has now been sent.
422: */
423: if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv))
424: tp->rcv_adv = tp->rcv_nxt + win;
425: tp->t_flags &= ~(TF_ACKNOW|TF_DELACK);
426: if (sendalot)
427: goto again;
428: return (0);
429: }
430:
431: tcp_setpersist(tp)
432: register struct tcpcb *tp;
433: {
434: register t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
435:
436: if (tp->t_timer[TCPT_REXMT])
437: panic("tcp_output REXMT");
438: /*
439: * Start/restart persistance timer.
440: */
441: TCPT_RANGESET(tp->t_timer[TCPT_PERSIST],
442: t * tcp_backoff[tp->t_rxtshift],
443: TCPTV_PERSMIN, TCPTV_PERSMAX);
444: if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
445: tp->t_rxtshift++;
446: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.