|
|
1.1 root 1: /*
2: * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3: *
4: * @APPLE_LICENSE_HEADER_START@
5: *
6: * The contents of this file constitute Original Code as defined in and
7: * are subject to the Apple Public Source License Version 1.1 (the
8: * "License"). You may not use this file except in compliance with the
9: * License. Please obtain a copy of the License at
10: * http://www.apple.com/publicsource and read it before using this file.
11: *
12: * This Original Code and all software distributed under the License are
13: * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14: * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15: * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16: * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17: * License for the specific language governing rights and limitations
18: * under the License.
19: *
20: * @APPLE_LICENSE_HEADER_END@
21: */
22: /*
23: * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
24: * The Regents of the University of California. All rights reserved.
25: *
26: * Redistribution and use in source and binary forms, with or without
27: * modification, are permitted provided that the following conditions
28: * are met:
29: * 1. Redistributions of source code must retain the above copyright
30: * notice, this list of conditions and the following disclaimer.
31: * 2. Redistributions in binary form must reproduce the above copyright
32: * notice, this list of conditions and the following disclaimer in the
33: * documentation and/or other materials provided with the distribution.
34: * 3. All advertising materials mentioning features or use of this software
35: * must display the following acknowledgement:
36: * This product includes software developed by the University of
37: * California, Berkeley and its contributors.
38: * 4. Neither the name of the University nor the names of its contributors
39: * may be used to endorse or promote products derived from this software
40: * without specific prior written permission.
41: *
42: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
43: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
46: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52: * SUCH DAMAGE.
53: *
54: * @(#)tcp_output.c 8.4 (Berkeley) 5/24/95
55: */
56:
57: #if ISFB31
58: #include "opt_tcpdebug.h"
59: #endif
60:
61: #include <stddef.h>
62:
63: #include <sys/param.h>
64: #include <sys/systm.h>
65: #include <sys/mbuf.h>
66: #include <sys/protosw.h>
67: #include <sys/socket.h>
68: #include <sys/socketvar.h>
69:
70: #include <net/route.h>
71:
72: #include <netinet/in.h>
73: #include <netinet/in_systm.h>
74: #include <netinet/ip.h>
75: #include <netinet/in_pcb.h>
76: #include <netinet/ip_var.h>
77: #include <netinet/tcp.h>
78: #define TCPOUTFLAGS
79: #include <netinet/tcp_fsm.h>
80: #include <netinet/tcp_seq.h>
81: #include <netinet/tcp_timer.h>
82: #include <netinet/tcp_var.h>
83: #include <netinet/tcpip.h>
84: #if TCPDEBUG
85: #include <netinet/tcp_debug.h>
86: #endif
87:
88: #ifdef notyet
89: extern struct mbuf *m_copypack();
90: #endif
91:
92:
93: /*
94: * Tcp output routine: figure out what should be sent and send it.
95: */
96: int
97: tcp_output(tp)
98: register struct tcpcb *tp;
99: {
100: register struct socket *so = tp->t_inpcb->inp_socket;
101: register long len, win;
102: int off, flags, error;
103: register struct mbuf *m;
104: register struct tcpiphdr *ti;
105: u_char opt[TCP_MAXOLEN];
106: unsigned ipoptlen, optlen, hdrlen;
107: int idle, sendalot;
108: struct rmxp_tao *taop;
109: struct rmxp_tao tao_noncached;
110:
111: /*
112: * Determine length of data that should be transmitted,
113: * and flags that will be used.
114: * If there is some data or critical controls (SYN, RST)
115: * to send, then transmit; otherwise, investigate further.
116: */
117: idle = (tp->snd_max == tp->snd_una);
118: if (idle && tp->t_idle >= tp->t_rxtcur)
119: /*
120: * We have been idle for "a while" and no acks are
121: * expected to clock out any data we send --
122: * slow start to get ack "clock" running again.
123: */
124: tp->snd_cwnd = tp->t_maxseg;
125: again:
126: sendalot = 0;
127: off = tp->snd_nxt - tp->snd_una;
128: win = min(tp->snd_wnd, tp->snd_cwnd);
129:
130: flags = tcp_outflags[tp->t_state];
131: /*
132: * Get standard flags, and add SYN or FIN if requested by 'hidden'
133: * state flags.
134: */
135: if (tp->t_flags & TF_NEEDFIN)
136: flags |= TH_FIN;
137: if (tp->t_flags & TF_NEEDSYN)
138: flags |= TH_SYN;
139:
140: /*
141: * If in persist timeout with window of 0, send 1 byte.
142: * Otherwise, if window is small but nonzero
143: * and timer expired, we will send what we can
144: * and go to transmit state.
145: */
146: if (tp->t_force) {
147: if (win == 0) {
148: /*
149: * If we still have some data to send, then
150: * clear the FIN bit. Usually this would
151: * happen below when it realizes that we
152: * aren't sending all the data. However,
153: * if we have exactly 1 byte of unset data,
154: * then it won't clear the FIN bit below,
155: * and if we are in persist state, we wind
156: * up sending the packet without recording
157: * that we sent the FIN bit.
158: *
159: * We can't just blindly clear the FIN bit,
160: * because if we don't have any more data
161: * to send then the probe will be the FIN
162: * itself.
163: */
164: if (off < so->so_snd.sb_cc)
165: flags &= ~TH_FIN;
166: win = 1;
167: } else {
168: tp->t_timer[TCPT_PERSIST] = 0;
169: tp->t_rxtshift = 0;
170: }
171: }
172:
173: len = (long)ulmin(so->so_snd.sb_cc, win) - off;
174:
175: if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) {
176: taop = &tao_noncached;
177: bzero(taop, sizeof(*taop));
178: }
179:
180: /*
181: * Lop off SYN bit if it has already been sent. However, if this
182: * is SYN-SENT state and if segment contains data and if we don't
183: * know that foreign host supports TAO, suppress sending segment.
184: */
185: if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
186: flags &= ~TH_SYN;
187: off--, len++;
188: if (len > 0 && tp->t_state == TCPS_SYN_SENT &&
189: taop->tao_ccsent == 0)
190: return 0;
191: }
192:
193: /*
194: * Be careful not to send data and/or FIN on SYN segments
195: * in cases when no CC option will be sent.
196: * This measure is needed to prevent interoperability problems
197: * with not fully conformant TCP implementations.
198: */
199: if ((flags & TH_SYN) &&
200: ((tp->t_flags & TF_NOOPT) || !(tp->t_flags & TF_REQ_CC) ||
201: ((flags & TH_ACK) && !(tp->t_flags & TF_RCVD_CC)))) {
202: len = 0;
203: flags &= ~TH_FIN;
204: }
205:
206: if (len < 0) {
207: /*
208: * If FIN has been sent but not acked,
209: * but we haven't been called to retransmit,
210: * len will be -1. Otherwise, window shrank
211: * after we sent into it. If window shrank to 0,
212: * cancel pending retransmit, pull snd_nxt back
213: * to (closed) window, and set the persist timer
214: * if it isn't already going. If the window didn't
215: * close completely, just wait for an ACK.
216: */
217: len = 0;
218: if (win == 0) {
219: tp->t_timer[TCPT_REXMT] = 0;
220: tp->t_rxtshift = 0;
221: tp->snd_nxt = tp->snd_una;
222: if (tp->t_timer[TCPT_PERSIST] == 0)
223: tcp_setpersist(tp);
224: }
225: }
226: if (len > tp->t_maxseg) {
227: len = tp->t_maxseg;
228: sendalot = 1;
229: }
230: if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc))
231: flags &= ~TH_FIN;
232:
233: win = sbspace(&so->so_rcv);
234:
235: /*
236: * Sender silly window avoidance. If connection is idle
237: * and can send all data, a maximum segment,
238: * at least a maximum default-size segment do it,
239: * or are forced, do it; otherwise don't bother.
240: * If peer's buffer is tiny, then send
241: * when window is at least half open.
242: * If retransmitting (possibly after persist timer forced us
243: * to send into a small window), then must resend.
244: */
245: if (len) {
246: if (len == tp->t_maxseg)
247: goto send;
248: if (!(tp->t_flags & TF_MORETOCOME) &&
249: (idle || tp->t_flags & TF_NODELAY) &&
250: (tp->t_flags & TF_NOPUSH) == 0 &&
251: len + off >= so->so_snd.sb_cc)
252: goto send;
253: if (tp->t_force)
254: goto send;
255: if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0)
256: goto send;
257: if (SEQ_LT(tp->snd_nxt, tp->snd_max))
258: goto send;
259: }
260:
261: /*
262: * Compare available window to amount of window
263: * known to peer (as advertised window less
264: * next expected input). If the difference is at least two
265: * max size segments, or at least 50% of the maximum possible
266: * window, then want to send a window update to peer.
267: */
268: if (win > 0) {
269: /*
270: * "adv" is the amount we can increase the window,
271: * taking into account that we are limited by
272: * TCP_MAXWIN << tp->rcv_scale.
273: */
274: long adv = min(win, (long)TCP_MAXWIN << tp->rcv_scale) -
275: (tp->rcv_adv - tp->rcv_nxt);
276:
277: if (adv >= (long) (2 * tp->t_maxseg))
278: goto send;
279: if (2 * adv >= (long) so->so_rcv.sb_hiwat)
280: goto send;
281: }
282:
283: /*
284: * Send if we owe peer an ACK.
285: */
286: if (tp->t_flags & TF_ACKNOW)
287: goto send;
288: if ((flags & TH_RST) ||
289: ((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0))
290: goto send;
291: if (SEQ_GT(tp->snd_up, tp->snd_una))
292: goto send;
293: /*
294: * If our state indicates that FIN should be sent
295: * and we have not yet done so, or we're retransmitting the FIN,
296: * then we need to send.
297: */
298: if (flags & TH_FIN &&
299: ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
300: goto send;
301:
302: /*
303: * TCP window updates are not reliable, rather a polling protocol
304: * using ``persist'' packets is used to insure receipt of window
305: * updates. The three ``states'' for the output side are:
306: * idle not doing retransmits or persists
307: * persisting to move a small or zero window
308: * (re)transmitting and thereby not persisting
309: *
310: * tp->t_timer[TCPT_PERSIST]
311: * is set when we are in persist state.
312: * tp->t_force
313: * is set when we are called to send a persist packet.
314: * tp->t_timer[TCPT_REXMT]
315: * is set when we are retransmitting
316: * The output side is idle when both timers are zero.
317: *
318: * If send window is too small, there is data to transmit, and no
319: * retransmit or persist is pending, then go to persist state.
320: * If nothing happens soon, send when timer expires:
321: * if window is nonzero, transmit what we can,
322: * otherwise force out a byte.
323: */
324: if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 &&
325: tp->t_timer[TCPT_PERSIST] == 0) {
326: tp->t_rxtshift = 0;
327: tcp_setpersist(tp);
328: }
329:
330: /*
331: * No reason to send a segment, just return.
332: */
333: return (0);
334:
335: send:
336: /*
337: * Before ESTABLISHED, force sending of initial options
338: * unless TCP set not to do any options.
339: * NOTE: we assume that the IP/TCP header plus TCP options
340: * always fit in a single mbuf, leaving room for a maximum
341: * link header, i.e.
342: * max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN
343: */
344: optlen = 0;
345: hdrlen = sizeof (struct tcpiphdr);
346: if (flags & TH_SYN) {
347: tp->snd_nxt = tp->iss;
348: if ((tp->t_flags & TF_NOOPT) == 0) {
349: u_short mss;
350:
351: opt[0] = TCPOPT_MAXSEG;
352: opt[1] = TCPOLEN_MAXSEG;
353: mss = htons((u_short) tcp_mssopt(tp));
354: (void)memcpy(opt + 2, &mss, sizeof(mss));
355: optlen = TCPOLEN_MAXSEG;
356:
357: if ((tp->t_flags & TF_REQ_SCALE) &&
358: ((flags & TH_ACK) == 0 ||
359: (tp->t_flags & TF_RCVD_SCALE))) {
360: *((u_int32_t *)(opt + optlen)) = htonl(
361: TCPOPT_NOP << 24 |
362: TCPOPT_WINDOW << 16 |
363: TCPOLEN_WINDOW << 8 |
364: tp->request_r_scale);
365: optlen += 4;
366: }
367: }
368: }
369:
370: /*
371: * Send a timestamp and echo-reply if this is a SYN and our side
372: * wants to use timestamps (TF_REQ_TSTMP is set) or both our side
373: * and our peer have sent timestamps in our SYN's.
374: */
375: if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
376: (flags & TH_RST) == 0 &&
377: ((flags & TH_ACK) == 0 ||
378: (tp->t_flags & TF_RCVD_TSTMP))) {
379: u_int32_t *lp = (u_int32_t *)(opt + optlen);
380:
381: /* Form timestamp option as shown in appendix A of RFC 1323. */
382: *lp++ = htonl(TCPOPT_TSTAMP_HDR);
383: *lp++ = htonl(tcp_now);
384: *lp = htonl(tp->ts_recent);
385: optlen += TCPOLEN_TSTAMP_APPA;
386: }
387:
388: /*
389: * Send `CC-family' options if our side wants to use them (TF_REQ_CC),
390: * options are allowed (!TF_NOOPT) and it's not a RST.
391: */
392: if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
393: (flags & TH_RST) == 0) {
394: switch (flags & (TH_SYN|TH_ACK)) {
395: /*
396: * This is a normal ACK, send CC if we received CC before
397: * from our peer.
398: */
399: case TH_ACK:
400: if (!(tp->t_flags & TF_RCVD_CC))
401: break;
402: /*FALLTHROUGH*/
403:
404: /*
405: * We can only get here in T/TCP's SYN_SENT* state, when
406: * we're a sending a non-SYN segment without waiting for
407: * the ACK of our SYN. A check above assures that we only
408: * do this if our peer understands T/TCP.
409: */
410: case 0:
411: opt[optlen++] = TCPOPT_NOP;
412: opt[optlen++] = TCPOPT_NOP;
413: opt[optlen++] = TCPOPT_CC;
414: opt[optlen++] = TCPOLEN_CC;
415: *(u_int32_t *)&opt[optlen] = htonl(tp->cc_send);
416:
417: optlen += 4;
418: break;
419:
420: /*
421: * This is our initial SYN, check whether we have to use
422: * CC or CC.new.
423: */
424: case TH_SYN:
425: opt[optlen++] = TCPOPT_NOP;
426: opt[optlen++] = TCPOPT_NOP;
427: opt[optlen++] = tp->t_flags & TF_SENDCCNEW ?
428: TCPOPT_CCNEW : TCPOPT_CC;
429: opt[optlen++] = TCPOLEN_CC;
430: *(u_int32_t *)&opt[optlen] = htonl(tp->cc_send);
431: optlen += 4;
432: break;
433:
434: /*
435: * This is a SYN,ACK; send CC and CC.echo if we received
436: * CC from our peer.
437: */
438: case (TH_SYN|TH_ACK):
439: if (tp->t_flags & TF_RCVD_CC) {
440: opt[optlen++] = TCPOPT_NOP;
441: opt[optlen++] = TCPOPT_NOP;
442: opt[optlen++] = TCPOPT_CC;
443: opt[optlen++] = TCPOLEN_CC;
444: *(u_int32_t *)&opt[optlen] =
445: htonl(tp->cc_send);
446: optlen += 4;
447: opt[optlen++] = TCPOPT_NOP;
448: opt[optlen++] = TCPOPT_NOP;
449: opt[optlen++] = TCPOPT_CCECHO;
450: opt[optlen++] = TCPOLEN_CC;
451: *(u_int32_t *)&opt[optlen] =
452: htonl(tp->cc_recv);
453: optlen += 4;
454: }
455: break;
456: }
457: }
458:
459: hdrlen += optlen;
460:
461: if (tp->t_inpcb->inp_options) {
462: ipoptlen = tp->t_inpcb->inp_options->m_len -
463: offsetof(struct ipoption, ipopt_list);
464: } else {
465: ipoptlen = 0;
466: }
467:
468: /*
469: * Adjust data length if insertion of options will
470: * bump the packet length beyond the t_maxopd length.
471: * Clear the FIN bit because we cut off the tail of
472: * the segment.
473: */
474: if (len + optlen + ipoptlen > tp->t_maxopd) {
475: /*
476: * If there is still more to send, don't close the connection.
477: */
478: flags &= ~TH_FIN;
479: len = tp->t_maxopd - optlen - ipoptlen;
480: sendalot = 1;
481: }
482:
483: /*#ifdef DIAGNOSTIC*/
484: if (max_linkhdr + hdrlen > MHLEN)
485: panic("tcphdr too big");
486: /*#endif*/
487:
488: /*
489: * Grab a header mbuf, attaching a copy of data to
490: * be transmitted, and initialize the header from
491: * the template for sends on this connection.
492: */
493: if (len) {
494: if (tp->t_force && len == 1)
495: tcpstat.tcps_sndprobe++;
496: else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
497: tcpstat.tcps_sndrexmitpack++;
498: tcpstat.tcps_sndrexmitbyte += len;
499: } else {
500: tcpstat.tcps_sndpack++;
501: tcpstat.tcps_sndbyte += len;
502: }
503: #ifdef notyet
504: if ((m = m_copypack(so->so_snd.sb_mb, off,
505: (int)len, max_linkhdr + hdrlen)) == 0) {
506: error = ENOBUFS;
507: goto out;
508: }
509: /*
510: * m_copypack left space for our hdr; use it.
511: */
512: m->m_len += hdrlen;
513: m->m_data -= hdrlen;
514: #else
515: MGETHDR(m, M_DONTWAIT, MT_HEADER);
516: if (m == NULL) {
517: error = ENOBUFS;
518: goto out;
519: }
520: m->m_data += max_linkhdr;
521: m->m_len = hdrlen;
522: if (len <= MHLEN - hdrlen - max_linkhdr) {
523: m_copydata(so->so_snd.sb_mb, off, (int) len,
524: mtod(m, caddr_t) + hdrlen);
525: m->m_len += len;
526: } else {
527: m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len);
528: if (m->m_next == 0) {
529: (void) m_free(m);
530: error = ENOBUFS;
531: goto out;
532: }
533: }
534: #endif
535: /*
536: * If we're sending everything we've got, set PUSH.
537: * (This will keep happy those implementations which only
538: * give data to the user when a buffer fills or
539: * a PUSH comes in.)
540: */
541: if (off + len == so->so_snd.sb_cc)
542: flags |= TH_PUSH;
543: } else {
544: if (tp->t_flags & TF_ACKNOW)
545: tcpstat.tcps_sndacks++;
546: else if (flags & (TH_SYN|TH_FIN|TH_RST))
547: tcpstat.tcps_sndctrl++;
548: else if (SEQ_GT(tp->snd_up, tp->snd_una))
549: tcpstat.tcps_sndurg++;
550: else
551: tcpstat.tcps_sndwinup++;
552:
553: MGETHDR(m, M_DONTWAIT, MT_HEADER);
554: if (m == NULL) {
555: error = ENOBUFS;
556: goto out;
557: }
558: m->m_data += max_linkhdr;
559: m->m_len = hdrlen;
560: }
561: m->m_pkthdr.rcvif = (struct ifnet *)0;
562: ti = mtod(m, struct tcpiphdr *);
563: if (tp->t_template == 0)
564: panic("tcp_output");
565: (void)memcpy(ti, tp->t_template, sizeof (struct tcpiphdr));
566:
567: /*
568: * Fill in fields, remembering maximum advertised
569: * window for use in delaying messages about window sizes.
570: * If resending a FIN, be sure not to use a new sequence number.
571: */
572: if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
573: tp->snd_nxt == tp->snd_max)
574: tp->snd_nxt--;
575: /*
576: * If we are doing retransmissions, then snd_nxt will
577: * not reflect the first unsent octet. For ACK only
578: * packets, we do not want the sequence number of the
579: * retransmitted packet, we want the sequence number
580: * of the next unsent octet. So, if there is no data
581: * (and no SYN or FIN), use snd_max instead of snd_nxt
582: * when filling in ti_seq. But if we are in persist
583: * state, snd_max might reflect one byte beyond the
584: * right edge of the window, so use snd_nxt in that
585: * case, since we know we aren't doing a retransmission.
586: * (retransmit and persist are mutually exclusive...)
587: */
588: if (len || (flags & (TH_SYN|TH_FIN)) || tp->t_timer[TCPT_PERSIST])
589: ti->ti_seq = htonl(tp->snd_nxt);
590: else
591: ti->ti_seq = htonl(tp->snd_max);
592: ti->ti_ack = htonl(tp->rcv_nxt);
593: if (optlen) {
594: bcopy(opt, ti + 1, optlen);
595: ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2;
596: }
597: ti->ti_flags = flags;
598: /*
599: * Calculate receive window. Don't shrink window,
600: * but avoid silly window syndrome.
601: */
602: if (win < (long)(so->so_rcv.sb_hiwat / 4) && win < (long)tp->t_maxseg)
603: win = 0;
604: if (win < (long)(tp->rcv_adv - tp->rcv_nxt))
605: win = (long)(tp->rcv_adv - tp->rcv_nxt);
606: if (win > (long)TCP_MAXWIN << tp->rcv_scale)
607: win = (long)TCP_MAXWIN << tp->rcv_scale;
608: ti->ti_win = htons((u_short) (win>>tp->rcv_scale));
609: if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
610: ti->ti_urp = htons((u_short)(tp->snd_up - tp->snd_nxt));
611: ti->ti_flags |= TH_URG;
612: } else
613: /*
614: * If no urgent pointer to send, then we pull
615: * the urgent pointer to the left edge of the send window
616: * so that it doesn't drift into the send window on sequence
617: * number wraparound.
618: */
619: tp->snd_up = tp->snd_una; /* drag it along */
620:
621: /*
622: * Put TCP length in extended header, and then
623: * checksum extended header and data.
624: */
625: if (len + optlen)
626: ti->ti_len = htons((u_short)(sizeof (struct tcphdr) +
627: optlen + len));
628: ti->ti_sum = in_cksum(m, (int)(hdrlen + len));
629:
630: /*
631: * In transmit state, time the transmission and arrange for
632: * the retransmit. In persist state, just set snd_max.
633: */
634: if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) {
635: tcp_seq startseq = tp->snd_nxt;
636:
637: /*
638: * Advance snd_nxt over sequence space of this segment.
639: */
640: if (flags & (TH_SYN|TH_FIN)) {
641: if (flags & TH_SYN)
642: tp->snd_nxt++;
643: if (flags & TH_FIN) {
644: tp->snd_nxt++;
645: tp->t_flags |= TF_SENTFIN;
646: }
647: }
648: tp->snd_nxt += len;
649: if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
650: tp->snd_max = tp->snd_nxt;
651: /*
652: * Time this transmission if not a retransmission and
653: * not currently timing anything.
654: */
655: if (tp->t_rtt == 0) {
656: tp->t_rtt = 1;
657: tp->t_rtseq = startseq;
658: tcpstat.tcps_segstimed++;
659: }
660: }
661:
662: /*
663: * Set retransmit timer if not currently set,
664: * and not doing an ack or a keep-alive probe.
665: * Initial value for retransmit timer is smoothed
666: * round-trip time + 2 * round-trip time variance.
667: * Initialize shift counter which is used for backoff
668: * of retransmit time.
669: */
670: if (tp->t_timer[TCPT_REXMT] == 0 &&
671: tp->snd_nxt != tp->snd_una) {
672: tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
673: if (tp->t_timer[TCPT_PERSIST]) {
674: tp->t_timer[TCPT_PERSIST] = 0;
675: tp->t_rxtshift = 0;
676: }
677: }
678: } else
679: if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
680: tp->snd_max = tp->snd_nxt + len;
681:
682: #if TCPDEBUG
683: /*
684: * Trace.
685: */
686: if (so->so_options & SO_DEBUG)
687: tcp_trace(TA_OUTPUT, tp->t_state, tp, ti, 0);
688: #endif
689:
690: /*
691: * Fill in IP length and desired time to live and
692: * send to IP level. There should be a better way
693: * to handle ttl and tos; we could keep them in
694: * the template, but need a way to checksum without them.
695: */
696: m->m_pkthdr.len = hdrlen + len;
697: {
698: #if 1
699: struct rtentry *rt;
700: #endif
701: ((struct ip *)ti)->ip_len = m->m_pkthdr.len;
702: ((struct ip *)ti)->ip_ttl = tp->t_inpcb->inp_ip_ttl; /* XXX */
703: ((struct ip *)ti)->ip_tos = tp->t_inpcb->inp_ip_tos; /* XXX */
704: #if 1
705: /*
706: * See if we should do MTU discovery. We do it only if the following
707: * are true:
708: * 1) we have a valid route to the destination
709: * 2) the MTU is not locked (if it is, then discovery has been
710: * disabled)
711: */
712: if ((rt = tp->t_inpcb->inp_route.ro_rt)
713: && rt->rt_flags & RTF_UP
714: && !(rt->rt_rmx.rmx_locks & RTV_MTU)) {
715: ((struct ip *)ti)->ip_off |= IP_DF;
716: }
717: #endif
718: error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
719: so->so_options & SO_DONTROUTE, 0);
720: }
721: if (error) {
722: out:
723: if (error == ENOBUFS) {
724: tcp_quench(tp->t_inpcb, 0);
725: return (0);
726: }
727: #if 1
728: if (error == EMSGSIZE) {
729: /*
730: * ip_output() will have already fixed the route
731: * for us. tcp_mtudisc() will, as its last action,
732: * initiate retransmission, so it is important to
733: * not do so here.
734: */
735: tcp_mtudisc(tp->t_inpcb, 0);
736: return 0;
737: }
738: #endif
739: if ((error == EHOSTUNREACH || error == ENETDOWN)
740: && TCPS_HAVERCVDSYN(tp->t_state)) {
741: tp->t_softerror = error;
742: return (0);
743: }
744: return (error);
745: }
746: tcpstat.tcps_sndtotal++;
747:
748: /*
749: * Data sent (as far as we can tell).
750: * If this advertises a larger window than any other segment,
751: * then remember the size of the advertised window.
752: * Any pending ACK has now been sent.
753: */
754: if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv))
755: tp->rcv_adv = tp->rcv_nxt + win;
756: tp->last_ack_sent = tp->rcv_nxt;
757: tp->t_flags &= ~(TF_ACKNOW|TF_DELACK);
758: if (sendalot)
759: goto again;
760: return (0);
761: }
762:
763: void
764: tcp_setpersist(tp)
765: register struct tcpcb *tp;
766: {
767: register int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
768:
769: if (tp->t_timer[TCPT_REXMT])
770: panic("tcp_output REXMT");
771: /*
772: * Start/restart persistance timer.
773: */
774: TCPT_RANGESET(tp->t_timer[TCPT_PERSIST],
775: t * tcp_backoff[tp->t_rxtshift],
776: TCPTV_PERSMIN, TCPTV_PERSMAX);
777: if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
778: tp->t_rxtshift++;
779: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.