|
|
1.1 root 1: /*
2: * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994
3: * The Regents of the University of California. All rights reserved.
4: *
5: * Redistribution and use in source and binary forms, with or without
6: * modification, are permitted provided that the following conditions
7: * are met:
8: * 1. Redistributions of source code must retain the above copyright
9: * notice, this list of conditions and the following disclaimer.
10: * 2. Redistributions in binary form must reproduce the above copyright
11: * notice, this list of conditions and the following disclaimer in the
12: * documentation and/or other materials provided with the distribution.
13: * 3. All advertising materials mentioning features or use of this software
14: * must display the following acknowledgement:
15: * This product includes software developed by the University of
16: * California, Berkeley and its contributors.
17: * 4. Neither the name of the University nor the names of its contributors
18: * may be used to endorse or promote products derived from this software
19: * without specific prior written permission.
20: *
21: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31: * SUCH DAMAGE.
32: *
33: * @(#)tcp_input.c 8.5 (Berkeley) 4/10/94
34: * tcp_input.c,v 1.10 1994/10/13 18:36:32 wollman Exp
35: */
36:
37: /*
38: * Changes and additions relating to SLiRP
39: * Copyright (c) 1995 Danny Gasparovski.
1.1.1.3 ! root 40: *
! 41: * Please read the file COPYRIGHT for the
1.1 root 42: * terms and conditions of the copyright.
43: */
44:
45: #include <slirp.h>
46: #include "ip_icmp.h"
47:
48: struct socket tcb;
49:
1.1.1.3 ! root 50: #define TCPREXMTTHRESH 3
1.1 root 51: struct socket *tcp_last_so = &tcb;
52:
53: tcp_seq tcp_iss; /* tcp initial send seq # */
54:
55: #define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ)
56:
57: /* for modulo comparisons of timestamps */
58: #define TSTMP_LT(a,b) ((int)((a)-(b)) < 0)
59: #define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0)
60:
61: /*
62: * Insert segment ti into reassembly queue of tcp with
63: * control block tp. Return TH_FIN if reassembly now includes
64: * a segment with FIN. The macro form does the common case inline
65: * (segment is the next to be received on an established connection,
66: * and the queue is empty), avoiding linkage into and removal
67: * from the queue and repetition of various conversions.
68: * Set DELACK for segments received in order, but ack immediately
69: * when segments are out of order (so fast retransmit can work).
70: */
71: #ifdef TCP_ACK_HACK
72: #define TCP_REASS(tp, ti, m, so, flags) {\
73: if ((ti)->ti_seq == (tp)->rcv_nxt && \
74: (tp)->seg_next == (tcpiphdrp_32)(tp) && \
75: (tp)->t_state == TCPS_ESTABLISHED) {\
76: if (ti->ti_flags & TH_PUSH) \
77: tp->t_flags |= TF_ACKNOW; \
78: else \
79: tp->t_flags |= TF_DELACK; \
80: (tp)->rcv_nxt += (ti)->ti_len; \
81: flags = (ti)->ti_flags & TH_FIN; \
1.1.1.3 ! root 82: STAT(tcpstat.tcps_rcvpack++); \
! 83: STAT(tcpstat.tcps_rcvbyte += (ti)->ti_len); \
1.1 root 84: if (so->so_emu) { \
85: if (tcp_emu((so),(m))) sbappend((so), (m)); \
86: } else \
87: sbappend((so), (m)); \
88: /* sorwakeup(so); */ \
89: } else {\
90: (flags) = tcp_reass((tp), (ti), (m)); \
91: tp->t_flags |= TF_ACKNOW; \
92: } \
93: }
94: #else
95: #define TCP_REASS(tp, ti, m, so, flags) { \
96: if ((ti)->ti_seq == (tp)->rcv_nxt && \
97: (tp)->seg_next == (tcpiphdrp_32)(tp) && \
98: (tp)->t_state == TCPS_ESTABLISHED) { \
99: tp->t_flags |= TF_DELACK; \
100: (tp)->rcv_nxt += (ti)->ti_len; \
101: flags = (ti)->ti_flags & TH_FIN; \
1.1.1.3 ! root 102: STAT(tcpstat.tcps_rcvpack++); \
! 103: STAT(tcpstat.tcps_rcvbyte += (ti)->ti_len); \
1.1 root 104: if (so->so_emu) { \
105: if (tcp_emu((so),(m))) sbappend(so, (m)); \
106: } else \
107: sbappend((so), (m)); \
108: /* sorwakeup(so); */ \
109: } else { \
110: (flags) = tcp_reass((tp), (ti), (m)); \
111: tp->t_flags |= TF_ACKNOW; \
112: } \
113: }
114: #endif
1.1.1.3 ! root 115: static void tcp_dooptions(struct tcpcb *tp, u_char *cp, int cnt,
! 116: struct tcpiphdr *ti);
! 117: static void tcp_xmit_timer(register struct tcpcb *tp, int rtt);
! 118:
! 119: static int
! 120: tcp_reass(register struct tcpcb *tp, register struct tcpiphdr *ti,
! 121: struct mbuf *m)
1.1 root 122: {
123: register struct tcpiphdr *q;
124: struct socket *so = tp->t_socket;
125: int flags;
1.1.1.3 ! root 126:
1.1 root 127: /*
128: * Call with ti==0 after become established to
129: * force pre-ESTABLISHED data up to user socket.
130: */
131: if (ti == 0)
132: goto present;
133:
134: /*
135: * Find a segment which begins after this one does.
136: */
137: for (q = (struct tcpiphdr *)tp->seg_next; q != (struct tcpiphdr *)tp;
138: q = (struct tcpiphdr *)q->ti_next)
139: if (SEQ_GT(q->ti_seq, ti->ti_seq))
140: break;
141:
142: /*
143: * If there is a preceding segment, it may provide some of
144: * our data already. If so, drop the data from the incoming
145: * segment. If it provides all of our data, drop us.
146: */
147: if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) {
148: register int i;
149: q = (struct tcpiphdr *)q->ti_prev;
150: /* conversion to int (in i) handles seq wraparound */
151: i = q->ti_seq + q->ti_len - ti->ti_seq;
152: if (i > 0) {
153: if (i >= ti->ti_len) {
1.1.1.3 ! root 154: STAT(tcpstat.tcps_rcvduppack++);
! 155: STAT(tcpstat.tcps_rcvdupbyte += ti->ti_len);
1.1 root 156: m_freem(m);
157: /*
158: * Try to present any queued data
159: * at the left window edge to the user.
160: * This is needed after the 3-WHS
161: * completes.
162: */
163: goto present; /* ??? */
164: }
165: m_adj(m, i);
166: ti->ti_len -= i;
167: ti->ti_seq += i;
168: }
169: q = (struct tcpiphdr *)(q->ti_next);
170: }
1.1.1.3 ! root 171: STAT(tcpstat.tcps_rcvoopack++);
! 172: STAT(tcpstat.tcps_rcvoobyte += ti->ti_len);
1.1 root 173: REASS_MBUF(ti) = (mbufp_32) m; /* XXX */
174:
175: /*
176: * While we overlap succeeding segments trim them or,
177: * if they are completely covered, dequeue them.
178: */
179: while (q != (struct tcpiphdr *)tp) {
180: register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq;
181: if (i <= 0)
182: break;
183: if (i < q->ti_len) {
184: q->ti_seq += i;
185: q->ti_len -= i;
186: m_adj((struct mbuf *) REASS_MBUF(q), i);
187: break;
188: }
189: q = (struct tcpiphdr *)q->ti_next;
190: m = (struct mbuf *) REASS_MBUF((struct tcpiphdr *)q->ti_prev);
191: remque_32((void *)(q->ti_prev));
192: m_freem(m);
193: }
194:
195: /*
196: * Stick new segment in its place.
197: */
198: insque_32(ti, (void *)(q->ti_prev));
199:
200: present:
201: /*
202: * Present data to user, advancing rcv_nxt through
203: * completed sequence space.
204: */
205: if (!TCPS_HAVEESTABLISHED(tp->t_state))
206: return (0);
207: ti = (struct tcpiphdr *) tp->seg_next;
208: if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt)
209: return (0);
210: if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len)
211: return (0);
212: do {
213: tp->rcv_nxt += ti->ti_len;
214: flags = ti->ti_flags & TH_FIN;
215: remque_32(ti);
216: m = (struct mbuf *) REASS_MBUF(ti); /* XXX */
217: ti = (struct tcpiphdr *)ti->ti_next;
218: /* if (so->so_state & SS_FCANTRCVMORE) */
219: if (so->so_state & SS_FCANTSENDMORE)
220: m_freem(m);
221: else {
222: if (so->so_emu) {
223: if (tcp_emu(so,m)) sbappend(so, m);
224: } else
225: sbappend(so, m);
226: }
227: } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt);
228: /* sorwakeup(so); */
229: return (flags);
230: }
231:
232: /*
233: * TCP input routine, follows pages 65-76 of the
234: * protocol specification dated September, 1981 very closely.
235: */
236: void
237: tcp_input(m, iphlen, inso)
238: register struct mbuf *m;
239: int iphlen;
240: struct socket *inso;
241: {
242: struct ip save_ip, *ip;
243: register struct tcpiphdr *ti;
244: caddr_t optp = NULL;
245: int optlen = 0;
246: int len, tlen, off;
247: register struct tcpcb *tp = 0;
248: register int tiflags;
249: struct socket *so = 0;
250: int todrop, acked, ourfinisacked, needoutput = 0;
251: /* int dropsocket = 0; */
252: int iss = 0;
253: u_long tiwin;
254: int ret;
255: /* int ts_present = 0; */
256:
257: DEBUG_CALL("tcp_input");
1.1.1.3 ! root 258: DEBUG_ARGS((dfd," m = %8lx iphlen = %2d inso = %lx\n",
1.1 root 259: (long )m, iphlen, (long )inso ));
1.1.1.3 ! root 260:
1.1 root 261: /*
262: * If called with m == 0, then we're continuing the connect
263: */
264: if (m == NULL) {
265: so = inso;
1.1.1.3 ! root 266:
1.1 root 267: /* Re-set a few variables */
268: tp = sototcpcb(so);
269: m = so->so_m;
270: so->so_m = 0;
271: ti = so->so_ti;
272: tiwin = ti->ti_win;
273: tiflags = ti->ti_flags;
1.1.1.3 ! root 274:
1.1 root 275: goto cont_conn;
276: }
1.1.1.3 ! root 277:
! 278:
! 279: STAT(tcpstat.tcps_rcvtotal++);
1.1 root 280: /*
281: * Get IP and TCP header together in first mbuf.
282: * Note: IP leaves IP header in first mbuf.
283: */
284: ti = mtod(m, struct tcpiphdr *);
285: if (iphlen > sizeof(struct ip )) {
286: ip_stripoptions(m, (struct mbuf *)0);
287: iphlen=sizeof(struct ip );
288: }
289: /* XXX Check if too short */
1.1.1.3 ! root 290:
1.1 root 291:
292: /*
293: * Save a copy of the IP header in case we want restore it
294: * for sending an ICMP error message in response.
295: */
296: ip=mtod(m, struct ip *);
1.1.1.3 ! root 297: save_ip = *ip;
1.1 root 298: save_ip.ip_len+= iphlen;
299:
300: /*
301: * Checksum extended TCP header and data.
302: */
303: tlen = ((struct ip *)ti)->ip_len;
304: ti->ti_next = ti->ti_prev = 0;
305: ti->ti_x1 = 0;
306: ti->ti_len = htons((u_int16_t)tlen);
307: len = sizeof(struct ip ) + tlen;
308: /* keep checksum for ICMP reply
1.1.1.3 ! root 309: * ti->ti_sum = cksum(m, len);
1.1 root 310: * if (ti->ti_sum) { */
311: if(cksum(m, len)) {
1.1.1.3 ! root 312: STAT(tcpstat.tcps_rcvbadsum++);
1.1 root 313: goto drop;
314: }
315:
316: /*
317: * Check that TCP offset makes sense,
318: * pull out TCP options and adjust length. XXX
319: */
320: off = ti->ti_off << 2;
321: if (off < sizeof (struct tcphdr) || off > tlen) {
1.1.1.3 ! root 322: STAT(tcpstat.tcps_rcvbadoff++);
1.1 root 323: goto drop;
324: }
325: tlen -= off;
326: ti->ti_len = tlen;
327: if (off > sizeof (struct tcphdr)) {
328: optlen = off - sizeof (struct tcphdr);
329: optp = mtod(m, caddr_t) + sizeof (struct tcpiphdr);
330:
1.1.1.3 ! root 331: /*
1.1 root 332: * Do quick retrieval of timestamp options ("options
333: * prediction?"). If timestamp is the only option and it's
334: * formatted as recommended in RFC 1323 appendix A, we
335: * quickly get the values now and not bother calling
336: * tcp_dooptions(), etc.
337: */
338: /* if ((optlen == TCPOLEN_TSTAMP_APPA ||
339: * (optlen > TCPOLEN_TSTAMP_APPA &&
340: * optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) &&
341: * *(u_int32_t *)optp == htonl(TCPOPT_TSTAMP_HDR) &&
342: * (ti->ti_flags & TH_SYN) == 0) {
343: * ts_present = 1;
344: * ts_val = ntohl(*(u_int32_t *)(optp + 4));
345: * ts_ecr = ntohl(*(u_int32_t *)(optp + 8));
346: * optp = NULL; / * we've parsed the options * /
347: * }
348: */
349: }
350: tiflags = ti->ti_flags;
1.1.1.3 ! root 351:
1.1 root 352: /*
353: * Convert TCP protocol specific fields to host format.
354: */
355: NTOHL(ti->ti_seq);
356: NTOHL(ti->ti_ack);
357: NTOHS(ti->ti_win);
358: NTOHS(ti->ti_urp);
359:
360: /*
361: * Drop TCP, IP headers and TCP options.
362: */
363: m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
364: m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
1.1.1.3 ! root 365:
1.1 root 366: /*
367: * Locate pcb for segment.
368: */
369: findso:
370: so = tcp_last_so;
371: if (so->so_fport != ti->ti_dport ||
372: so->so_lport != ti->ti_sport ||
373: so->so_laddr.s_addr != ti->ti_src.s_addr ||
374: so->so_faddr.s_addr != ti->ti_dst.s_addr) {
375: so = solookup(&tcb, ti->ti_src, ti->ti_sport,
376: ti->ti_dst, ti->ti_dport);
377: if (so)
378: tcp_last_so = so;
1.1.1.3 ! root 379: STAT(tcpstat.tcps_socachemiss++);
1.1 root 380: }
381:
382: /*
383: * If the state is CLOSED (i.e., TCB does not exist) then
384: * all data in the incoming segment is discarded.
385: * If the TCB exists but is in CLOSED state, it is embryonic,
386: * but should either do a listen or a connect soon.
387: *
388: * state == CLOSED means we've done socreate() but haven't
1.1.1.3 ! root 389: * attached it to a protocol yet...
! 390: *
1.1 root 391: * XXX If a TCB does not exist, and the TH_SYN flag is
392: * the only flag set, then create a session, mark it
393: * as if it was LISTENING, and continue...
394: */
395: if (so == 0) {
396: if ((tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) != TH_SYN)
397: goto dropwithreset;
1.1.1.3 ! root 398:
1.1 root 399: if ((so = socreate()) == NULL)
400: goto dropwithreset;
401: if (tcp_attach(so) < 0) {
402: free(so); /* Not sofree (if it failed, it's not insqued) */
403: goto dropwithreset;
404: }
1.1.1.3 ! root 405:
! 406: sbreserve(&so->so_snd, TCP_SNDSPACE);
! 407: sbreserve(&so->so_rcv, TCP_RCVSPACE);
! 408:
1.1 root 409: /* tcp_last_so = so; */ /* XXX ? */
410: /* tp = sototcpcb(so); */
1.1.1.3 ! root 411:
1.1 root 412: so->so_laddr = ti->ti_src;
413: so->so_lport = ti->ti_sport;
414: so->so_faddr = ti->ti_dst;
415: so->so_fport = ti->ti_dport;
1.1.1.3 ! root 416:
1.1 root 417: if ((so->so_iptos = tcp_tos(so)) == 0)
418: so->so_iptos = ((struct ip *)ti)->ip_tos;
1.1.1.3 ! root 419:
1.1 root 420: tp = sototcpcb(so);
421: tp->t_state = TCPS_LISTEN;
422: }
1.1.1.3 ! root 423:
1.1 root 424: /*
425: * If this is a still-connecting socket, this probably
426: * a retransmit of the SYN. Whether it's a retransmit SYN
427: * or something else, we nuke it.
428: */
429: if (so->so_state & SS_ISFCONNECTING)
430: goto drop;
431:
432: tp = sototcpcb(so);
1.1.1.3 ! root 433:
1.1 root 434: /* XXX Should never fail */
435: if (tp == 0)
436: goto dropwithreset;
437: if (tp->t_state == TCPS_CLOSED)
438: goto drop;
1.1.1.3 ! root 439:
1.1 root 440: /* Unscale the window into a 32-bit value. */
441: /* if ((tiflags & TH_SYN) == 0)
442: * tiwin = ti->ti_win << tp->snd_scale;
443: * else
444: */
445: tiwin = ti->ti_win;
446:
447: /*
448: * Segment received on connection.
449: * Reset idle time and keep-alive timer.
450: */
451: tp->t_idle = 0;
1.1.1.3 ! root 452: if (SO_OPTIONS)
! 453: tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL;
1.1 root 454: else
1.1.1.3 ! root 455: tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE;
1.1 root 456:
457: /*
458: * Process options if not in LISTEN state,
459: * else do it below (after getting remote address).
460: */
461: if (optp && tp->t_state != TCPS_LISTEN)
1.1.1.3 ! root 462: tcp_dooptions(tp, (u_char *)optp, optlen, ti);
1.1 root 463: /* , */
464: /* &ts_present, &ts_val, &ts_ecr); */
465:
1.1.1.3 ! root 466: /*
1.1 root 467: * Header prediction: check for the two common cases
468: * of a uni-directional data xfer. If the packet has
469: * no control flags, is in-sequence, the window didn't
470: * change and we're not retransmitting, it's a
471: * candidate. If the length is zero and the ack moved
472: * forward, we're the sender side of the xfer. Just
473: * free the data acked & wake any higher level process
474: * that was blocked waiting for space. If the length
475: * is non-zero and the ack didn't move, we're the
476: * receiver side. If we're getting packets in-order
477: * (the reassembly queue is empty), add the data to
478: * the socket buffer and note that we need a delayed ack.
479: *
480: * XXX Some of these tests are not needed
481: * eg: the tiwin == tp->snd_wnd prevents many more
482: * predictions.. with no *real* advantage..
483: */
484: if (tp->t_state == TCPS_ESTABLISHED &&
485: (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
486: /* (!ts_present || TSTMP_GEQ(ts_val, tp->ts_recent)) && */
487: ti->ti_seq == tp->rcv_nxt &&
488: tiwin && tiwin == tp->snd_wnd &&
489: tp->snd_nxt == tp->snd_max) {
1.1.1.3 ! root 490: /*
1.1 root 491: * If last ACK falls within this segment's sequence numbers,
492: * record the timestamp.
493: */
494: /* if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) &&
495: * SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len)) {
496: * tp->ts_recent_age = tcp_now;
497: * tp->ts_recent = ts_val;
498: * }
499: */
500: if (ti->ti_len == 0) {
501: if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
502: SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
503: tp->snd_cwnd >= tp->snd_wnd) {
504: /*
505: * this is a pure ack for outstanding data.
506: */
1.1.1.3 ! root 507: STAT(tcpstat.tcps_predack++);
1.1 root 508: /* if (ts_present)
509: * tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
1.1.1.3 ! root 510: * else
1.1 root 511: */ if (tp->t_rtt &&
512: SEQ_GT(ti->ti_ack, tp->t_rtseq))
513: tcp_xmit_timer(tp, tp->t_rtt);
514: acked = ti->ti_ack - tp->snd_una;
1.1.1.3 ! root 515: STAT(tcpstat.tcps_rcvackpack++);
! 516: STAT(tcpstat.tcps_rcvackbyte += acked);
1.1 root 517: sbdrop(&so->so_snd, acked);
518: tp->snd_una = ti->ti_ack;
519: m_freem(m);
520:
521: /*
522: * If all outstanding data are acked, stop
523: * retransmit timer, otherwise restart timer
524: * using current (possibly backed-off) value.
525: * If process is waiting for space,
526: * wakeup/selwakeup/signal. If data
527: * are ready to send, let tcp_output
528: * decide between more output or persist.
529: */
530: if (tp->snd_una == tp->snd_max)
531: tp->t_timer[TCPT_REXMT] = 0;
532: else if (tp->t_timer[TCPT_PERSIST] == 0)
533: tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
534:
1.1.1.3 ! root 535: /*
1.1 root 536: * There's room in so_snd, sowwakup will read()
537: * from the socket if we can
538: */
539: /* if (so->so_snd.sb_flags & SB_NOTIFY)
540: * sowwakeup(so);
541: */
1.1.1.3 ! root 542: /*
1.1 root 543: * This is called because sowwakeup might have
544: * put data into so_snd. Since we don't so sowwakeup,
545: * we don't need this.. XXX???
546: */
547: if (so->so_snd.sb_cc)
548: (void) tcp_output(tp);
549:
550: return;
551: }
552: } else if (ti->ti_ack == tp->snd_una &&
553: tp->seg_next == (tcpiphdrp_32)tp &&
554: ti->ti_len <= sbspace(&so->so_rcv)) {
555: /*
556: * this is a pure, in-sequence data packet
557: * with nothing on the reassembly queue and
558: * we have enough buffer space to take it.
559: */
1.1.1.3 ! root 560: STAT(tcpstat.tcps_preddat++);
1.1 root 561: tp->rcv_nxt += ti->ti_len;
1.1.1.3 ! root 562: STAT(tcpstat.tcps_rcvpack++);
! 563: STAT(tcpstat.tcps_rcvbyte += ti->ti_len);
1.1 root 564: /*
565: * Add data to socket buffer.
566: */
567: if (so->so_emu) {
568: if (tcp_emu(so,m)) sbappend(so, m);
569: } else
570: sbappend(so, m);
1.1.1.3 ! root 571:
! 572: /*
1.1 root 573: * XXX This is called when data arrives. Later, check
574: * if we can actually write() to the socket
575: * XXX Need to check? It's be NON_BLOCKING
576: */
577: /* sorwakeup(so); */
1.1.1.3 ! root 578:
1.1 root 579: /*
580: * If this is a short packet, then ACK now - with Nagel
581: * congestion avoidance sender won't send more until
582: * he gets an ACK.
1.1.1.3 ! root 583: *
1.1.1.2 root 584: * It is better to not delay acks at all to maximize
585: * TCP throughput. See RFC 2581.
1.1.1.3 ! root 586: */
1.1.1.2 root 587: tp->t_flags |= TF_ACKNOW;
588: tcp_output(tp);
1.1 root 589: return;
590: }
591: } /* header prediction */
592: /*
593: * Calculate amount of space in receive window,
594: * and then do TCP input processing.
595: * Receive window is amount of space in rcv queue,
596: * but not less than advertised window.
597: */
598: { int win;
599: win = sbspace(&so->so_rcv);
600: if (win < 0)
601: win = 0;
602: tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt));
603: }
604:
605: switch (tp->t_state) {
606:
607: /*
608: * If the state is LISTEN then ignore segment if it contains an RST.
609: * If the segment contains an ACK then it is bad and send a RST.
610: * If it does not contain a SYN then it is not interesting; drop it.
611: * Don't bother responding if the destination was a broadcast.
612: * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
613: * tp->iss, and send a segment:
614: * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
615: * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
616: * Fill in remote peer address fields if not previously specified.
617: * Enter SYN_RECEIVED state, and process any other fields of this
618: * segment in this state.
619: */
620: case TCPS_LISTEN: {
621:
622: if (tiflags & TH_RST)
623: goto drop;
624: if (tiflags & TH_ACK)
625: goto dropwithreset;
626: if ((tiflags & TH_SYN) == 0)
627: goto drop;
1.1.1.3 ! root 628:
1.1 root 629: /*
630: * This has way too many gotos...
631: * But a bit of spaghetti code never hurt anybody :)
632: */
1.1.1.3 ! root 633:
1.1 root 634: /*
635: * If this is destined for the control address, then flag to
636: * tcp_ctl once connected, otherwise connect
637: */
638: if ((so->so_faddr.s_addr&htonl(0xffffff00)) == special_addr.s_addr) {
639: int lastbyte=ntohl(so->so_faddr.s_addr) & 0xff;
640: if (lastbyte!=CTL_ALIAS && lastbyte!=CTL_DNS) {
641: #if 0
642: if(lastbyte==CTL_CMD || lastbyte==CTL_EXEC) {
643: /* Command or exec adress */
644: so->so_state |= SS_CTL;
1.1.1.3 ! root 645: } else
1.1 root 646: #endif
647: {
648: /* May be an add exec */
649: struct ex_list *ex_ptr;
650: for(ex_ptr = exec_list; ex_ptr; ex_ptr = ex_ptr->ex_next) {
1.1.1.3 ! root 651: if(ex_ptr->ex_fport == so->so_fport &&
1.1 root 652: lastbyte == ex_ptr->ex_addr) {
653: so->so_state |= SS_CTL;
654: break;
655: }
656: }
657: }
658: if(so->so_state & SS_CTL) goto cont_input;
659: }
660: /* CTL_ALIAS: Do nothing, tcp_fconnect will be called on it */
661: }
1.1.1.3 ! root 662:
1.1 root 663: if (so->so_emu & EMU_NOCONNECT) {
664: so->so_emu &= ~EMU_NOCONNECT;
665: goto cont_input;
666: }
1.1.1.3 ! root 667:
1.1 root 668: if((tcp_fconnect(so) == -1) && (errno != EINPROGRESS) && (errno != EWOULDBLOCK)) {
669: u_char code=ICMP_UNREACH_NET;
670: DEBUG_MISC((dfd," tcp fconnect errno = %d-%s\n",
671: errno,strerror(errno)));
672: if(errno == ECONNREFUSED) {
673: /* ACK the SYN, send RST to refuse the connection */
674: tcp_respond(tp, ti, m, ti->ti_seq+1, (tcp_seq)0,
1.1.1.3 ! root 675: TH_RST|TH_ACK);
1.1 root 676: } else {
677: if(errno == EHOSTUNREACH) code=ICMP_UNREACH_HOST;
678: HTONL(ti->ti_seq); /* restore tcp header */
679: HTONL(ti->ti_ack);
680: HTONS(ti->ti_win);
681: HTONS(ti->ti_urp);
682: m->m_data -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
683: m->m_len += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
684: *ip=save_ip;
685: icmp_error(m, ICMP_UNREACH,code, 0,strerror(errno));
686: }
687: tp = tcp_close(tp);
688: m_free(m);
689: } else {
690: /*
691: * Haven't connected yet, save the current mbuf
692: * and ti, and return
693: * XXX Some OS's don't tell us whether the connect()
694: * succeeded or not. So we must time it out.
695: */
696: so->so_m = m;
697: so->so_ti = ti;
698: tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
699: tp->t_state = TCPS_SYN_RECEIVED;
700: }
701: return;
702:
1.1.1.3 ! root 703: cont_conn:
! 704: /* m==NULL
1.1 root 705: * Check if the connect succeeded
706: */
707: if (so->so_state & SS_NOFDREF) {
708: tp = tcp_close(tp);
709: goto dropwithreset;
710: }
1.1.1.3 ! root 711: cont_input:
1.1 root 712: tcp_template(tp);
1.1.1.3 ! root 713:
1.1 root 714: if (optp)
715: tcp_dooptions(tp, (u_char *)optp, optlen, ti);
716: /* , */
717: /* &ts_present, &ts_val, &ts_ecr); */
1.1.1.3 ! root 718:
1.1 root 719: if (iss)
720: tp->iss = iss;
1.1.1.3 ! root 721: else
1.1 root 722: tp->iss = tcp_iss;
723: tcp_iss += TCP_ISSINCR/2;
724: tp->irs = ti->ti_seq;
725: tcp_sendseqinit(tp);
726: tcp_rcvseqinit(tp);
727: tp->t_flags |= TF_ACKNOW;
728: tp->t_state = TCPS_SYN_RECEIVED;
729: tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
1.1.1.3 ! root 730: STAT(tcpstat.tcps_accepts++);
1.1 root 731: goto trimthenstep6;
732: } /* case TCPS_LISTEN */
1.1.1.3 ! root 733:
1.1 root 734: /*
735: * If the state is SYN_SENT:
736: * if seg contains an ACK, but not for our SYN, drop the input.
737: * if seg contains a RST, then drop the connection.
738: * if seg does not contain SYN, then drop it.
739: * Otherwise this is an acceptable SYN segment
740: * initialize tp->rcv_nxt and tp->irs
741: * if seg contains ack then advance tp->snd_una
742: * if SYN has been acked change to ESTABLISHED else SYN_RCVD state
743: * arrange for segment to be acked (eventually)
744: * continue processing rest of data/controls, beginning with URG
745: */
746: case TCPS_SYN_SENT:
747: if ((tiflags & TH_ACK) &&
748: (SEQ_LEQ(ti->ti_ack, tp->iss) ||
749: SEQ_GT(ti->ti_ack, tp->snd_max)))
750: goto dropwithreset;
751:
752: if (tiflags & TH_RST) {
753: if (tiflags & TH_ACK)
754: tp = tcp_drop(tp,0); /* XXX Check t_softerror! */
755: goto drop;
756: }
757:
758: if ((tiflags & TH_SYN) == 0)
759: goto drop;
760: if (tiflags & TH_ACK) {
761: tp->snd_una = ti->ti_ack;
762: if (SEQ_LT(tp->snd_nxt, tp->snd_una))
763: tp->snd_nxt = tp->snd_una;
764: }
765:
766: tp->t_timer[TCPT_REXMT] = 0;
767: tp->irs = ti->ti_seq;
768: tcp_rcvseqinit(tp);
769: tp->t_flags |= TF_ACKNOW;
770: if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
1.1.1.3 ! root 771: STAT(tcpstat.tcps_connects++);
1.1 root 772: soisfconnected(so);
773: tp->t_state = TCPS_ESTABLISHED;
1.1.1.3 ! root 774:
1.1 root 775: /* Do window scaling on this connection? */
776: /* if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
777: * (TF_RCVD_SCALE|TF_REQ_SCALE)) {
778: * tp->snd_scale = tp->requested_s_scale;
779: * tp->rcv_scale = tp->request_r_scale;
780: * }
781: */
782: (void) tcp_reass(tp, (struct tcpiphdr *)0,
783: (struct mbuf *)0);
784: /*
785: * if we didn't have to retransmit the SYN,
786: * use its rtt as our initial srtt & rtt var.
787: */
788: if (tp->t_rtt)
789: tcp_xmit_timer(tp, tp->t_rtt);
790: } else
791: tp->t_state = TCPS_SYN_RECEIVED;
792:
793: trimthenstep6:
794: /*
795: * Advance ti->ti_seq to correspond to first data byte.
796: * If data, trim to stay within window,
797: * dropping FIN if necessary.
798: */
799: ti->ti_seq++;
800: if (ti->ti_len > tp->rcv_wnd) {
801: todrop = ti->ti_len - tp->rcv_wnd;
802: m_adj(m, -todrop);
803: ti->ti_len = tp->rcv_wnd;
804: tiflags &= ~TH_FIN;
1.1.1.3 ! root 805: STAT(tcpstat.tcps_rcvpackafterwin++);
! 806: STAT(tcpstat.tcps_rcvbyteafterwin += todrop);
1.1 root 807: }
808: tp->snd_wl1 = ti->ti_seq - 1;
809: tp->rcv_up = ti->ti_seq;
810: goto step6;
811: } /* switch tp->t_state */
812: /*
813: * States other than LISTEN or SYN_SENT.
814: * First check timestamp, if present.
1.1.1.3 ! root 815: * Then check that at least some bytes of segment are within
1.1 root 816: * receive window. If segment begins before rcv_nxt,
817: * drop leading data (and SYN); if nothing left, just ack.
1.1.1.3 ! root 818: *
1.1 root 819: * RFC 1323 PAWS: If we have a timestamp reply on this segment
820: * and it's less than ts_recent, drop it.
821: */
822: /* if (ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent &&
823: * TSTMP_LT(ts_val, tp->ts_recent)) {
824: *
825: */ /* Check to see if ts_recent is over 24 days old. */
826: /* if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) {
827: */ /*
828: * * Invalidate ts_recent. If this segment updates
829: * * ts_recent, the age will be reset later and ts_recent
830: * * will get a valid value. If it does not, setting
831: * * ts_recent to zero will at least satisfy the
832: * * requirement that zero be placed in the timestamp
833: * * echo reply when ts_recent isn't valid. The
834: * * age isn't reset until we get a valid ts_recent
835: * * because we don't want out-of-order segments to be
836: * * dropped when ts_recent is old.
837: * */
838: /* tp->ts_recent = 0;
839: * } else {
840: * tcpstat.tcps_rcvduppack++;
841: * tcpstat.tcps_rcvdupbyte += ti->ti_len;
842: * tcpstat.tcps_pawsdrop++;
843: * goto dropafterack;
844: * }
845: * }
846: */
847:
848: todrop = tp->rcv_nxt - ti->ti_seq;
849: if (todrop > 0) {
850: if (tiflags & TH_SYN) {
851: tiflags &= ~TH_SYN;
852: ti->ti_seq++;
1.1.1.3 ! root 853: if (ti->ti_urp > 1)
1.1 root 854: ti->ti_urp--;
855: else
856: tiflags &= ~TH_URG;
857: todrop--;
858: }
859: /*
860: * Following if statement from Stevens, vol. 2, p. 960.
861: */
862: if (todrop > ti->ti_len
863: || (todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) {
864: /*
865: * Any valid FIN must be to the left of the window.
866: * At this point the FIN must be a duplicate or out
867: * of sequence; drop it.
868: */
869: tiflags &= ~TH_FIN;
1.1.1.3 ! root 870:
1.1 root 871: /*
872: * Send an ACK to resynchronize and drop any data.
873: * But keep on processing for RST or ACK.
874: */
875: tp->t_flags |= TF_ACKNOW;
876: todrop = ti->ti_len;
1.1.1.3 ! root 877: STAT(tcpstat.tcps_rcvduppack++);
! 878: STAT(tcpstat.tcps_rcvdupbyte += todrop);
1.1 root 879: } else {
1.1.1.3 ! root 880: STAT(tcpstat.tcps_rcvpartduppack++);
! 881: STAT(tcpstat.tcps_rcvpartdupbyte += todrop);
1.1 root 882: }
883: m_adj(m, todrop);
884: ti->ti_seq += todrop;
885: ti->ti_len -= todrop;
886: if (ti->ti_urp > todrop)
887: ti->ti_urp -= todrop;
888: else {
889: tiflags &= ~TH_URG;
890: ti->ti_urp = 0;
891: }
892: }
893: /*
894: * If new data are received on a connection after the
895: * user processes are gone, then RST the other end.
896: */
897: if ((so->so_state & SS_NOFDREF) &&
898: tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) {
899: tp = tcp_close(tp);
1.1.1.3 ! root 900: STAT(tcpstat.tcps_rcvafterclose++);
1.1 root 901: goto dropwithreset;
902: }
903:
904: /*
905: * If segment ends after window, drop trailing data
906: * (and PUSH and FIN); if nothing left, just ACK.
907: */
908: todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd);
909: if (todrop > 0) {
1.1.1.3 ! root 910: STAT(tcpstat.tcps_rcvpackafterwin++);
1.1 root 911: if (todrop >= ti->ti_len) {
1.1.1.3 ! root 912: STAT(tcpstat.tcps_rcvbyteafterwin += ti->ti_len);
1.1 root 913: /*
914: * If a new connection request is received
915: * while in TIME_WAIT, drop the old connection
916: * and start over if the sequence numbers
917: * are above the previous ones.
918: */
919: if (tiflags & TH_SYN &&
920: tp->t_state == TCPS_TIME_WAIT &&
921: SEQ_GT(ti->ti_seq, tp->rcv_nxt)) {
922: iss = tp->rcv_nxt + TCP_ISSINCR;
923: tp = tcp_close(tp);
924: goto findso;
925: }
926: /*
927: * If window is closed can only take segments at
928: * window edge, and have to drop data and PUSH from
929: * incoming segments. Continue processing, but
930: * remember to ack. Otherwise, drop segment
931: * and ack.
932: */
933: if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) {
934: tp->t_flags |= TF_ACKNOW;
1.1.1.3 ! root 935: STAT(tcpstat.tcps_rcvwinprobe++);
1.1 root 936: } else
937: goto dropafterack;
938: } else
1.1.1.3 ! root 939: STAT(tcpstat.tcps_rcvbyteafterwin += todrop);
1.1 root 940: m_adj(m, -todrop);
941: ti->ti_len -= todrop;
942: tiflags &= ~(TH_PUSH|TH_FIN);
943: }
944:
945: /*
946: * If last ACK falls within this segment's sequence numbers,
947: * record its timestamp.
948: */
949: /* if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) &&
950: * SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len +
951: * ((tiflags & (TH_SYN|TH_FIN)) != 0))) {
952: * tp->ts_recent_age = tcp_now;
953: * tp->ts_recent = ts_val;
954: * }
955: */
956:
957: /*
958: * If the RST bit is set examine the state:
959: * SYN_RECEIVED STATE:
960: * If passive open, return to LISTEN state.
961: * If active open, inform user that connection was refused.
962: * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
963: * Inform user that connection was reset, and close tcb.
964: * CLOSING, LAST_ACK, TIME_WAIT STATES
965: * Close the tcb.
966: */
967: if (tiflags&TH_RST) switch (tp->t_state) {
968:
969: case TCPS_SYN_RECEIVED:
970: /* so->so_error = ECONNREFUSED; */
971: goto close;
972:
973: case TCPS_ESTABLISHED:
974: case TCPS_FIN_WAIT_1:
975: case TCPS_FIN_WAIT_2:
976: case TCPS_CLOSE_WAIT:
977: /* so->so_error = ECONNRESET; */
978: close:
979: tp->t_state = TCPS_CLOSED;
1.1.1.3 ! root 980: STAT(tcpstat.tcps_drops++);
1.1 root 981: tp = tcp_close(tp);
982: goto drop;
983:
984: case TCPS_CLOSING:
985: case TCPS_LAST_ACK:
986: case TCPS_TIME_WAIT:
987: tp = tcp_close(tp);
988: goto drop;
989: }
990:
991: /*
992: * If a SYN is in the window, then this is an
993: * error and we send an RST and drop the connection.
994: */
995: if (tiflags & TH_SYN) {
996: tp = tcp_drop(tp,0);
997: goto dropwithreset;
998: }
999:
1000: /*
1001: * If the ACK bit is off we drop the segment and return.
1002: */
1003: if ((tiflags & TH_ACK) == 0) goto drop;
1004:
1005: /*
1006: * Ack processing.
1007: */
1008: switch (tp->t_state) {
1009: /*
1010: * In SYN_RECEIVED state if the ack ACKs our SYN then enter
1011: * ESTABLISHED state and continue processing, otherwise
1012: * send an RST. una<=ack<=max
1013: */
1014: case TCPS_SYN_RECEIVED:
1015:
1016: if (SEQ_GT(tp->snd_una, ti->ti_ack) ||
1017: SEQ_GT(ti->ti_ack, tp->snd_max))
1018: goto dropwithreset;
1.1.1.3 ! root 1019: STAT(tcpstat.tcps_connects++);
1.1 root 1020: tp->t_state = TCPS_ESTABLISHED;
1.1.1.3 ! root 1021: /*
! 1022: * The sent SYN is ack'ed with our sequence number +1
! 1023: * The first data byte already in the buffer will get
1.1 root 1024: * lost if no correction is made. This is only needed for
1025: * SS_CTL since the buffer is empty otherwise.
1.1.1.3 ! root 1026: * tp->snd_una++; or:
1.1 root 1027: */
1028: tp->snd_una=ti->ti_ack;
1029: if (so->so_state & SS_CTL) {
1030: /* So tcp_ctl reports the right state */
1031: ret = tcp_ctl(so);
1032: if (ret == 1) {
1033: soisfconnected(so);
1034: so->so_state &= ~SS_CTL; /* success XXX */
1035: } else if (ret == 2) {
1036: so->so_state = SS_NOFDREF; /* CTL_CMD */
1037: } else {
1038: needoutput = 1;
1039: tp->t_state = TCPS_FIN_WAIT_1;
1040: }
1041: } else {
1042: soisfconnected(so);
1043: }
1.1.1.3 ! root 1044:
1.1 root 1045: /* Do window scaling? */
1046: /* if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
1047: * (TF_RCVD_SCALE|TF_REQ_SCALE)) {
1048: * tp->snd_scale = tp->requested_s_scale;
1049: * tp->rcv_scale = tp->request_r_scale;
1050: * }
1051: */
1052: (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0);
1053: tp->snd_wl1 = ti->ti_seq - 1;
1054: /* Avoid ack processing; snd_una==ti_ack => dup ack */
1055: goto synrx_to_est;
1056: /* fall into ... */
1057:
1058: /*
1059: * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
1060: * ACKs. If the ack is in the range
1061: * tp->snd_una < ti->ti_ack <= tp->snd_max
1062: * then advance tp->snd_una to ti->ti_ack and drop
1063: * data from the retransmission queue. If this ACK reflects
1064: * more up to date window information we update our window information.
1065: */
1066: case TCPS_ESTABLISHED:
1067: case TCPS_FIN_WAIT_1:
1068: case TCPS_FIN_WAIT_2:
1069: case TCPS_CLOSE_WAIT:
1070: case TCPS_CLOSING:
1071: case TCPS_LAST_ACK:
1072: case TCPS_TIME_WAIT:
1073:
1074: if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) {
1075: if (ti->ti_len == 0 && tiwin == tp->snd_wnd) {
1.1.1.3 ! root 1076: STAT(tcpstat.tcps_rcvdupack++);
1.1 root 1077: DEBUG_MISC((dfd," dup ack m = %lx so = %lx \n",
1078: (long )m, (long )so));
1079: /*
1080: * If we have outstanding data (other than
1081: * a window probe), this is a completely
1082: * duplicate ack (ie, window info didn't
1083: * change), the ack is the biggest we've
1084: * seen and we've seen exactly our rexmt
1085: * threshold of them, assume a packet
1086: * has been dropped and retransmit it.
1087: * Kludge snd_nxt & the congestion
1088: * window so we send only this one
1089: * packet.
1090: *
1091: * We know we're losing at the current
1092: * window size so do congestion avoidance
1093: * (set ssthresh to half the current window
1094: * and pull our congestion window back to
1095: * the new ssthresh).
1096: *
1097: * Dup acks mean that packets have left the
1.1.1.3 ! root 1098: * network (they're now cached at the receiver)
1.1 root 1099: * so bump cwnd by the amount in the receiver
1100: * to keep a constant cwnd packets in the
1101: * network.
1102: */
1103: if (tp->t_timer[TCPT_REXMT] == 0 ||
1104: ti->ti_ack != tp->snd_una)
1105: tp->t_dupacks = 0;
1.1.1.3 ! root 1106: else if (++tp->t_dupacks == TCPREXMTTHRESH) {
1.1 root 1107: tcp_seq onxt = tp->snd_nxt;
1108: u_int win =
1109: min(tp->snd_wnd, tp->snd_cwnd) / 2 /
1110: tp->t_maxseg;
1111:
1112: if (win < 2)
1113: win = 2;
1114: tp->snd_ssthresh = win * tp->t_maxseg;
1115: tp->t_timer[TCPT_REXMT] = 0;
1116: tp->t_rtt = 0;
1117: tp->snd_nxt = ti->ti_ack;
1118: tp->snd_cwnd = tp->t_maxseg;
1119: (void) tcp_output(tp);
1120: tp->snd_cwnd = tp->snd_ssthresh +
1121: tp->t_maxseg * tp->t_dupacks;
1122: if (SEQ_GT(onxt, tp->snd_nxt))
1123: tp->snd_nxt = onxt;
1124: goto drop;
1.1.1.3 ! root 1125: } else if (tp->t_dupacks > TCPREXMTTHRESH) {
1.1 root 1126: tp->snd_cwnd += tp->t_maxseg;
1127: (void) tcp_output(tp);
1128: goto drop;
1129: }
1130: } else
1131: tp->t_dupacks = 0;
1132: break;
1133: }
1134: synrx_to_est:
1135: /*
1136: * If the congestion window was inflated to account
1137: * for the other side's cached packets, retract it.
1138: */
1.1.1.3 ! root 1139: if (tp->t_dupacks > TCPREXMTTHRESH &&
1.1 root 1140: tp->snd_cwnd > tp->snd_ssthresh)
1141: tp->snd_cwnd = tp->snd_ssthresh;
1142: tp->t_dupacks = 0;
1143: if (SEQ_GT(ti->ti_ack, tp->snd_max)) {
1.1.1.3 ! root 1144: STAT(tcpstat.tcps_rcvacktoomuch++);
1.1 root 1145: goto dropafterack;
1146: }
1147: acked = ti->ti_ack - tp->snd_una;
1.1.1.3 ! root 1148: STAT(tcpstat.tcps_rcvackpack++);
! 1149: STAT(tcpstat.tcps_rcvackbyte += acked);
1.1 root 1150:
1151: /*
1152: * If we have a timestamp reply, update smoothed
1153: * round trip time. If no timestamp is present but
1154: * transmit timer is running and timed sequence
1155: * number was acked, update smoothed round trip time.
1156: * Since we now have an rtt measurement, cancel the
1157: * timer backoff (cf., Phil Karn's retransmit alg.).
1158: * Recompute the initial retransmit timer.
1159: */
1160: /* if (ts_present)
1161: * tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
1162: * else
1.1.1.3 ! root 1163: */
1.1 root 1164: if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))
1165: tcp_xmit_timer(tp,tp->t_rtt);
1166:
1167: /*
1168: * If all outstanding data is acked, stop retransmit
1169: * timer and remember to restart (more output or persist).
1170: * If there is more data to be acked, restart retransmit
1171: * timer, using current (possibly backed-off) value.
1172: */
1173: if (ti->ti_ack == tp->snd_max) {
1174: tp->t_timer[TCPT_REXMT] = 0;
1175: needoutput = 1;
1176: } else if (tp->t_timer[TCPT_PERSIST] == 0)
1177: tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
1178: /*
1179: * When new data is acked, open the congestion window.
1180: * If the window gives us less than ssthresh packets
1181: * in flight, open exponentially (maxseg per packet).
1182: * Otherwise open linearly: maxseg per window
1183: * (maxseg^2 / cwnd per packet).
1184: */
1185: {
1186: register u_int cw = tp->snd_cwnd;
1187: register u_int incr = tp->t_maxseg;
1188:
1189: if (cw > tp->snd_ssthresh)
1190: incr = incr * incr / cw;
1191: tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale);
1192: }
1193: if (acked > so->so_snd.sb_cc) {
1194: tp->snd_wnd -= so->so_snd.sb_cc;
1195: sbdrop(&so->so_snd, (int )so->so_snd.sb_cc);
1196: ourfinisacked = 1;
1197: } else {
1198: sbdrop(&so->so_snd, acked);
1199: tp->snd_wnd -= acked;
1200: ourfinisacked = 0;
1201: }
1202: /*
1203: * XXX sowwakup is called when data is acked and there's room for
1.1.1.3 ! root 1204: * for more data... it should read() the socket
1.1 root 1205: */
1206: /* if (so->so_snd.sb_flags & SB_NOTIFY)
1207: * sowwakeup(so);
1208: */
1209: tp->snd_una = ti->ti_ack;
1210: if (SEQ_LT(tp->snd_nxt, tp->snd_una))
1211: tp->snd_nxt = tp->snd_una;
1212:
1213: switch (tp->t_state) {
1214:
1215: /*
1216: * In FIN_WAIT_1 STATE in addition to the processing
1217: * for the ESTABLISHED state if our FIN is now acknowledged
1218: * then enter FIN_WAIT_2.
1219: */
1220: case TCPS_FIN_WAIT_1:
1221: if (ourfinisacked) {
1222: /*
1223: * If we can't receive any more
1224: * data, then closing user can proceed.
1225: * Starting the timer is contrary to the
1226: * specification, but if we don't get a FIN
1227: * we'll hang forever.
1228: */
1229: if (so->so_state & SS_FCANTRCVMORE) {
1230: soisfdisconnected(so);
1.1.1.3 ! root 1231: tp->t_timer[TCPT_2MSL] = TCP_MAXIDLE;
1.1 root 1232: }
1233: tp->t_state = TCPS_FIN_WAIT_2;
1234: }
1235: break;
1236:
1237: /*
1238: * In CLOSING STATE in addition to the processing for
1239: * the ESTABLISHED state if the ACK acknowledges our FIN
1240: * then enter the TIME-WAIT state, otherwise ignore
1241: * the segment.
1242: */
1243: case TCPS_CLOSING:
1244: if (ourfinisacked) {
1245: tp->t_state = TCPS_TIME_WAIT;
1246: tcp_canceltimers(tp);
1247: tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1248: soisfdisconnected(so);
1249: }
1250: break;
1251:
1252: /*
1253: * In LAST_ACK, we may still be waiting for data to drain
1254: * and/or to be acked, as well as for the ack of our FIN.
1255: * If our FIN is now acknowledged, delete the TCB,
1256: * enter the closed state and return.
1257: */
1258: case TCPS_LAST_ACK:
1259: if (ourfinisacked) {
1260: tp = tcp_close(tp);
1261: goto drop;
1262: }
1263: break;
1264:
1265: /*
1266: * In TIME_WAIT state the only thing that should arrive
1267: * is a retransmission of the remote FIN. Acknowledge
1268: * it and restart the finack timer.
1269: */
1270: case TCPS_TIME_WAIT:
1271: tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1272: goto dropafterack;
1273: }
1274: } /* switch(tp->t_state) */
1275:
1276: step6:
1277: /*
1278: * Update window information.
1279: * Don't look at window if no ACK: TAC's send garbage on first SYN.
1280: */
1281: if ((tiflags & TH_ACK) &&
1.1.1.3 ! root 1282: (SEQ_LT(tp->snd_wl1, ti->ti_seq) ||
1.1 root 1283: (tp->snd_wl1 == ti->ti_seq && (SEQ_LT(tp->snd_wl2, ti->ti_ack) ||
1284: (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) {
1285: /* keep track of pure window updates */
1286: if (ti->ti_len == 0 &&
1287: tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd)
1.1.1.3 ! root 1288: STAT(tcpstat.tcps_rcvwinupd++);
1.1 root 1289: tp->snd_wnd = tiwin;
1290: tp->snd_wl1 = ti->ti_seq;
1291: tp->snd_wl2 = ti->ti_ack;
1292: if (tp->snd_wnd > tp->max_sndwnd)
1293: tp->max_sndwnd = tp->snd_wnd;
1294: needoutput = 1;
1295: }
1296:
1297: /*
1298: * Process segments with URG.
1299: */
1300: if ((tiflags & TH_URG) && ti->ti_urp &&
1301: TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1302: /*
1303: * This is a kludge, but if we receive and accept
1304: * random urgent pointers, we'll crash in
1305: * soreceive. It's hard to imagine someone
1306: * actually wanting to send this much urgent data.
1307: */
1308: if (ti->ti_urp + so->so_rcv.sb_cc > so->so_rcv.sb_datalen) {
1309: ti->ti_urp = 0;
1310: tiflags &= ~TH_URG;
1311: goto dodata;
1312: }
1313: /*
1314: * If this segment advances the known urgent pointer,
1315: * then mark the data stream. This should not happen
1316: * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
1.1.1.3 ! root 1317: * a FIN has been received from the remote side.
1.1 root 1318: * In these states we ignore the URG.
1319: *
1320: * According to RFC961 (Assigned Protocols),
1321: * the urgent pointer points to the last octet
1322: * of urgent data. We continue, however,
1323: * to consider it to indicate the first octet
1.1.1.3 ! root 1324: * of data past the urgent section as the original
1.1 root 1325: * spec states (in one of two places).
1326: */
1327: if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) {
1328: tp->rcv_up = ti->ti_seq + ti->ti_urp;
1329: so->so_urgc = so->so_rcv.sb_cc +
1330: (tp->rcv_up - tp->rcv_nxt); /* -1; */
1331: tp->rcv_up = ti->ti_seq + ti->ti_urp;
1.1.1.3 ! root 1332:
1.1 root 1333: }
1334: } else
1335: /*
1336: * If no out of band data is expected,
1337: * pull receive urgent pointer along
1338: * with the receive window.
1339: */
1340: if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
1341: tp->rcv_up = tp->rcv_nxt;
1342: dodata:
1343:
1344: /*
1345: * Process the segment text, merging it into the TCP sequencing queue,
1346: * and arranging for acknowledgment of receipt if necessary.
1347: * This process logically involves adjusting tp->rcv_wnd as data
1348: * is presented to the user (this happens in tcp_usrreq.c,
1349: * case PRU_RCVD). If a FIN has already been received on this
1350: * connection then we just ignore the text.
1351: */
1352: if ((ti->ti_len || (tiflags&TH_FIN)) &&
1353: TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1354: TCP_REASS(tp, ti, m, so, tiflags);
1355: /*
1356: * Note the amount of data that peer has sent into
1357: * our window, in order to estimate the sender's
1358: * buffer size.
1359: */
1360: len = so->so_rcv.sb_datalen - (tp->rcv_adv - tp->rcv_nxt);
1361: } else {
1362: m_free(m);
1363: tiflags &= ~TH_FIN;
1364: }
1365:
1366: /*
1367: * If FIN is received ACK the FIN and let the user know
1368: * that the connection is closing.
1369: */
1370: if (tiflags & TH_FIN) {
1371: if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1372: /*
1373: * If we receive a FIN we can't send more data,
1374: * set it SS_FDRAIN
1375: * Shutdown the socket if there is no rx data in the
1376: * buffer.
1377: * soread() is called on completion of shutdown() and
1378: * will got to TCPS_LAST_ACK, and use tcp_output()
1379: * to send the FIN.
1380: */
1381: /* sofcantrcvmore(so); */
1382: sofwdrain(so);
1.1.1.3 ! root 1383:
1.1 root 1384: tp->t_flags |= TF_ACKNOW;
1385: tp->rcv_nxt++;
1386: }
1387: switch (tp->t_state) {
1388:
1389: /*
1390: * In SYN_RECEIVED and ESTABLISHED STATES
1391: * enter the CLOSE_WAIT state.
1392: */
1393: case TCPS_SYN_RECEIVED:
1394: case TCPS_ESTABLISHED:
1395: if(so->so_emu == EMU_CTL) /* no shutdown on socket */
1396: tp->t_state = TCPS_LAST_ACK;
1.1.1.3 ! root 1397: else
1.1 root 1398: tp->t_state = TCPS_CLOSE_WAIT;
1399: break;
1400:
1401: /*
1402: * If still in FIN_WAIT_1 STATE FIN has not been acked so
1403: * enter the CLOSING state.
1404: */
1405: case TCPS_FIN_WAIT_1:
1406: tp->t_state = TCPS_CLOSING;
1407: break;
1408:
1409: /*
1410: * In FIN_WAIT_2 state enter the TIME_WAIT state,
1.1.1.3 ! root 1411: * starting the time-wait timer, turning off the other
1.1 root 1412: * standard timers.
1413: */
1414: case TCPS_FIN_WAIT_2:
1415: tp->t_state = TCPS_TIME_WAIT;
1416: tcp_canceltimers(tp);
1417: tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1418: soisfdisconnected(so);
1419: break;
1420:
1421: /*
1422: * In TIME_WAIT state restart the 2 MSL time_wait timer.
1423: */
1424: case TCPS_TIME_WAIT:
1425: tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1426: break;
1427: }
1428: }
1429:
1430: /*
1431: * If this is a small packet, then ACK now - with Nagel
1432: * congestion avoidance sender won't send more until
1433: * he gets an ACK.
1.1.1.3 ! root 1434: *
1.1 root 1435: * See above.
1436: */
1437: /* if (ti->ti_len && (unsigned)ti->ti_len < tp->t_maxseg) {
1438: */
1439: /* if ((ti->ti_len && (unsigned)ti->ti_len < tp->t_maxseg &&
1440: * (so->so_iptos & IPTOS_LOWDELAY) == 0) ||
1441: * ((so->so_iptos & IPTOS_LOWDELAY) &&
1442: * ((struct tcpiphdr_2 *)ti)->first_char == (char)27)) {
1443: */
1444: if (ti->ti_len && (unsigned)ti->ti_len <= 5 &&
1445: ((struct tcpiphdr_2 *)ti)->first_char == (char)27) {
1446: tp->t_flags |= TF_ACKNOW;
1447: }
1448:
1449: /*
1450: * Return any desired output.
1451: */
1452: if (needoutput || (tp->t_flags & TF_ACKNOW)) {
1453: (void) tcp_output(tp);
1454: }
1455: return;
1456:
1457: dropafterack:
1458: /*
1459: * Generate an ACK dropping incoming segment if it occupies
1460: * sequence space, where the ACK reflects our state.
1461: */
1462: if (tiflags & TH_RST)
1463: goto drop;
1464: m_freem(m);
1465: tp->t_flags |= TF_ACKNOW;
1466: (void) tcp_output(tp);
1467: return;
1468:
1469: dropwithreset:
1470: /* reuses m if m!=NULL, m_free() unnecessary */
1471: if (tiflags & TH_ACK)
1472: tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST);
1473: else {
1474: if (tiflags & TH_SYN) ti->ti_len++;
1475: tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0,
1476: TH_RST|TH_ACK);
1477: }
1478:
1479: return;
1480:
1481: drop:
1482: /*
1483: * Drop space held by incoming segment and return.
1484: */
1485: m_free(m);
1486:
1487: return;
1488: }
1489:
1490: /* , ts_present, ts_val, ts_ecr) */
1491: /* int *ts_present;
1492: * u_int32_t *ts_val, *ts_ecr;
1493: */
1.1.1.3 ! root 1494: static void
! 1495: tcp_dooptions(struct tcpcb *tp, u_char *cp, int cnt, struct tcpiphdr *ti)
1.1 root 1496: {
1497: u_int16_t mss;
1498: int opt, optlen;
1499:
1500: DEBUG_CALL("tcp_dooptions");
1501: DEBUG_ARGS((dfd," tp = %lx cnt=%i \n", (long )tp, cnt));
1502:
1503: for (; cnt > 0; cnt -= optlen, cp += optlen) {
1504: opt = cp[0];
1505: if (opt == TCPOPT_EOL)
1506: break;
1507: if (opt == TCPOPT_NOP)
1508: optlen = 1;
1509: else {
1510: optlen = cp[1];
1511: if (optlen <= 0)
1512: break;
1513: }
1514: switch (opt) {
1515:
1516: default:
1517: continue;
1518:
1519: case TCPOPT_MAXSEG:
1520: if (optlen != TCPOLEN_MAXSEG)
1521: continue;
1522: if (!(ti->ti_flags & TH_SYN))
1523: continue;
1524: memcpy((char *) &mss, (char *) cp + 2, sizeof(mss));
1525: NTOHS(mss);
1526: (void) tcp_mss(tp, mss); /* sets t_maxseg */
1527: break;
1528:
1529: /* case TCPOPT_WINDOW:
1530: * if (optlen != TCPOLEN_WINDOW)
1531: * continue;
1532: * if (!(ti->ti_flags & TH_SYN))
1533: * continue;
1534: * tp->t_flags |= TF_RCVD_SCALE;
1535: * tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
1536: * break;
1537: */
1538: /* case TCPOPT_TIMESTAMP:
1539: * if (optlen != TCPOLEN_TIMESTAMP)
1540: * continue;
1541: * *ts_present = 1;
1542: * memcpy((char *) ts_val, (char *)cp + 2, sizeof(*ts_val));
1543: * NTOHL(*ts_val);
1544: * memcpy((char *) ts_ecr, (char *)cp + 6, sizeof(*ts_ecr));
1545: * NTOHL(*ts_ecr);
1546: *
1.1.1.3 ! root 1547: */ /*
1.1 root 1548: * * A timestamp received in a SYN makes
1549: * * it ok to send timestamp requests and replies.
1550: * */
1551: /* if (ti->ti_flags & TH_SYN) {
1552: * tp->t_flags |= TF_RCVD_TSTMP;
1553: * tp->ts_recent = *ts_val;
1554: * tp->ts_recent_age = tcp_now;
1555: * }
1556: */ break;
1557: }
1558: }
1559: }
1560:
1561:
1562: /*
1563: * Pull out of band byte out of a segment so
1564: * it doesn't appear in the user's data queue.
1565: * It is still reflected in the segment length for
1566: * sequencing purposes.
1567: */
1568:
1569: #ifdef notdef
1570:
1571: void
1572: tcp_pulloutofband(so, ti, m)
1573: struct socket *so;
1574: struct tcpiphdr *ti;
1575: register struct mbuf *m;
1576: {
1577: int cnt = ti->ti_urp - 1;
1.1.1.3 ! root 1578:
1.1 root 1579: while (cnt >= 0) {
1580: if (m->m_len > cnt) {
1581: char *cp = mtod(m, caddr_t) + cnt;
1582: struct tcpcb *tp = sototcpcb(so);
1583:
1584: tp->t_iobc = *cp;
1585: tp->t_oobflags |= TCPOOB_HAVEDATA;
1586: memcpy(sp, cp+1, (unsigned)(m->m_len - cnt - 1));
1587: m->m_len--;
1588: return;
1589: }
1590: cnt -= m->m_len;
1591: m = m->m_next; /* XXX WRONG! Fix it! */
1592: if (m == 0)
1593: break;
1594: }
1595: panic("tcp_pulloutofband");
1596: }
1597:
1598: #endif /* notdef */
1599:
1600: /*
1601: * Collect new round-trip time estimate
1602: * and update averages and current timeout.
1603: */
1604:
1.1.1.3 ! root 1605: static void
! 1606: tcp_xmit_timer(register struct tcpcb *tp, int rtt)
1.1 root 1607: {
1608: register short delta;
1609:
1610: DEBUG_CALL("tcp_xmit_timer");
1611: DEBUG_ARG("tp = %lx", (long)tp);
1612: DEBUG_ARG("rtt = %d", rtt);
1.1.1.3 ! root 1613:
! 1614: STAT(tcpstat.tcps_rttupdated++);
1.1 root 1615: if (tp->t_srtt != 0) {
1616: /*
1617: * srtt is stored as fixed point with 3 bits after the
1618: * binary point (i.e., scaled by 8). The following magic
1619: * is equivalent to the smoothing algorithm in rfc793 with
1620: * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
1621: * point). Adjust rtt to origin 0.
1622: */
1623: delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT);
1624: if ((tp->t_srtt += delta) <= 0)
1625: tp->t_srtt = 1;
1626: /*
1627: * We accumulate a smoothed rtt variance (actually, a
1628: * smoothed mean difference), then set the retransmit
1629: * timer to smoothed rtt + 4 times the smoothed variance.
1630: * rttvar is stored as fixed point with 2 bits after the
1631: * binary point (scaled by 4). The following is
1632: * equivalent to rfc793 smoothing with an alpha of .75
1633: * (rttvar = rttvar*3/4 + |delta| / 4). This replaces
1634: * rfc793's wired-in beta.
1635: */
1636: if (delta < 0)
1637: delta = -delta;
1638: delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT);
1639: if ((tp->t_rttvar += delta) <= 0)
1640: tp->t_rttvar = 1;
1641: } else {
1.1.1.3 ! root 1642: /*
1.1 root 1643: * No rtt measurement yet - use the unsmoothed rtt.
1644: * Set the variance to half the rtt (so our first
1645: * retransmit happens at 3*rtt).
1646: */
1647: tp->t_srtt = rtt << TCP_RTT_SHIFT;
1648: tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
1649: }
1650: tp->t_rtt = 0;
1651: tp->t_rxtshift = 0;
1652:
1653: /*
1654: * the retransmit should happen at rtt + 4 * rttvar.
1655: * Because of the way we do the smoothing, srtt and rttvar
1656: * will each average +1/2 tick of bias. When we compute
1657: * the retransmit timer, we want 1/2 tick of rounding and
1658: * 1 extra tick because of +-1/2 tick uncertainty in the
1659: * firing of the timer. The bias will give us exactly the
1660: * 1.5 tick we need. But, because the bias is
1661: * statistical, we have to test that we don't drop below
1662: * the minimum feasible timer (which is 2 ticks).
1663: */
1664: TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
1665: (short)tp->t_rttmin, TCPTV_REXMTMAX); /* XXX */
1.1.1.3 ! root 1666:
1.1 root 1667: /*
1668: * We received an ack for a packet that wasn't retransmitted;
1669: * it is probably safe to discard any error indications we've
1670: * received recently. This isn't quite right, but close enough
1671: * for now (a route might have failed after we sent a segment,
1672: * and the return path might not be symmetrical).
1673: */
1674: tp->t_softerror = 0;
1675: }
1676:
1677: /*
1678: * Determine a reasonable value for maxseg size.
1679: * If the route is known, check route for mtu.
1680: * If none, use an mss that can be handled on the outgoing
1681: * interface without forcing IP to fragment; if bigger than
1682: * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
1683: * to utilize large mbufs. If no route is found, route has no mtu,
1684: * or the destination isn't local, use a default, hopefully conservative
1685: * size (usually 512 or the default IP max size, but no more than the mtu
1686: * of the interface), as we can't discover anything about intervening
1687: * gateways or networks. We also initialize the congestion/slow start
1688: * window to be a single segment if the destination isn't local.
1689: * While looking at the routing entry, we also initialize other path-dependent
1690: * parameters from pre-set or cached values in the routing entry.
1691: */
1692:
1693: int
1694: tcp_mss(tp, offer)
1695: register struct tcpcb *tp;
1696: u_int offer;
1697: {
1698: struct socket *so = tp->t_socket;
1699: int mss;
1.1.1.3 ! root 1700:
1.1 root 1701: DEBUG_CALL("tcp_mss");
1702: DEBUG_ARG("tp = %lx", (long)tp);
1703: DEBUG_ARG("offer = %d", offer);
1.1.1.3 ! root 1704:
! 1705: mss = min(IF_MTU, IF_MRU) - sizeof(struct tcpiphdr);
1.1 root 1706: if (offer)
1707: mss = min(mss, offer);
1708: mss = max(mss, 32);
1709: if (mss < tp->t_maxseg || offer != 0)
1710: tp->t_maxseg = mss;
1.1.1.3 ! root 1711:
1.1 root 1712: tp->snd_cwnd = mss;
1.1.1.3 ! root 1713:
! 1714: sbreserve(&so->so_snd, TCP_SNDSPACE + ((TCP_SNDSPACE % mss) ?
! 1715: (mss - (TCP_SNDSPACE % mss)) :
! 1716: 0));
! 1717: sbreserve(&so->so_rcv, TCP_RCVSPACE + ((TCP_RCVSPACE % mss) ?
! 1718: (mss - (TCP_RCVSPACE % mss)) :
! 1719: 0));
! 1720:
1.1 root 1721: DEBUG_MISC((dfd, " returning mss = %d\n", mss));
1.1.1.3 ! root 1722:
1.1 root 1723: return mss;
1724: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.