|
|
1.1 root 1: /*
2: * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3: *
4: * @APPLE_LICENSE_HEADER_START@
5: *
6: * The contents of this file constitute Original Code as defined in and
7: * are subject to the Apple Public Source License Version 1.1 (the
8: * "License"). You may not use this file except in compliance with the
9: * License. Please obtain a copy of the License at
10: * http://www.apple.com/publicsource and read it before using this file.
11: *
12: * This Original Code and all software distributed under the License are
13: * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14: * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15: * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16: * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17: * License for the specific language governing rights and limitations
18: * under the License.
19: *
20: * @APPLE_LICENSE_HEADER_END@
21: */
22: /*-
23: * Copyright (c) 1991, 1993
24: * The Regents of the University of California. All rights reserved.
25: *
26: * Redistribution and use in source and binary forms, with or without
27: * modification, are permitted provided that the following conditions
28: * are met:
29: * 1. Redistributions of source code must retain the above copyright
30: * notice, this list of conditions and the following disclaimer.
31: * 2. Redistributions in binary form must reproduce the above copyright
32: * notice, this list of conditions and the following disclaimer in the
33: * documentation and/or other materials provided with the distribution.
34: * 3. All advertising materials mentioning features or use of this software
35: * must display the following acknowledgement:
36: * This product includes software developed by the University of
37: * California, Berkeley and its contributors.
38: * 4. Neither the name of the University nor the names of its contributors
39: * may be used to endorse or promote products derived from this software
40: * without specific prior written permission.
41: *
42: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
43: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
46: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52: * SUCH DAMAGE.
53: *
54: * @(#)tp_subr.c 8.1 (Berkeley) 6/10/93
55: */
56:
57: /***********************************************************
58: Copyright IBM Corporation 1987
59:
60: All Rights Reserved
61:
62: Permission to use, copy, modify, and distribute this software and its
63: documentation for any purpose and without fee is hereby granted,
64: provided that the above copyright notice appear in all copies and that
65: both that copyright notice and this permission notice appear in
66: supporting documentation, and that the name of IBM not be
67: used in advertising or publicity pertaining to distribution of the
68: software without specific, written prior permission.
69:
70: IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
71: ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
72: IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
73: ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
74: WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
75: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
76: SOFTWARE.
77:
78: ******************************************************************/
79:
80: /*
81: * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
82: */
83: /*
84: * ARGO TP
85: *
86: * The main work of data transfer is done here.
87: * These routines are called from tp.trans.
88: * They include the routines that check the validity of acks and Xacks,
89: * (tp_goodack() and tp_goodXack() )
90: * take packets from socket buffers and send them (tp_send()),
91: * drop the data from the socket buffers (tp_sbdrop()),
92: * and put incoming packet data into socket buffers (tp_stash()).
93: */
94:
95: #include <sys/param.h>
96: #include <sys/systm.h>
97: #include <sys/mbuf.h>
98: #include <sys/socket.h>
99: #include <sys/socketvar.h>
100: #include <sys/protosw.h>
101: #include <sys/errno.h>
102: #include <sys/time.h>
103: #include <sys/kernel.h>
104:
105: #include <netiso/tp_ip.h>
106: #include <netiso/iso.h>
107: #include <netiso/argo_debug.h>
108: #include <netiso/tp_timer.h>
109: #include <netiso/tp_param.h>
110: #include <netiso/tp_stat.h>
111: #include <netiso/tp_pcb.h>
112: #include <netiso/tp_tpdu.h>
113: #include <netiso/tp_trace.h>
114: #include <netiso/tp_meas.h>
115: #include <netiso/tp_seq.h>
116:
117: int tp_emit(), tp_sbdrop();
118: int tprexmtthresh = 3;
119: extern int ticks;
120: void tp_send();
121:
122: /*
123: * CALLED FROM:
124: * tp.trans, when an XAK arrives
125: * FUNCTION and ARGUMENTS:
126: * Determines if the sequence number (seq) from the XAK
127: * acks anything new. If so, drop the appropriate tpdu
128: * from the XPD send queue.
129: * RETURN VALUE:
130: * Returns 1 if it did this, 0 if the ack caused no action.
131: */
132: int
133: tp_goodXack(tpcb, seq)
134: struct tp_pcb *tpcb;
135: SeqNum seq;
136: {
137:
138: IFTRACE(D_XPD)
139: tptraceTPCB(TPPTgotXack,
140: seq, tpcb->tp_Xuna, tpcb->tp_Xsndnxt, tpcb->tp_sndnew,
141: tpcb->tp_snduna);
142: ENDTRACE
143:
144: if ( seq == tpcb->tp_Xuna ) {
145: tpcb->tp_Xuna = tpcb->tp_Xsndnxt;
146:
147: /* DROP 1 packet from the Xsnd socket buf - just so happens
148: * that only one packet can be there at any time
149: * so drop the whole thing. If you allow > 1 packet
150: * the socket buffer, then you'll have to keep
151: * track of how many characters went w/ each XPD tpdu, so this
152: * will get messier
153: */
154: IFDEBUG(D_XPD)
155: dump_mbuf(tpcb->tp_Xsnd.sb_mb,
156: "tp_goodXack Xsnd before sbdrop");
157: ENDDEBUG
158:
159: IFTRACE(D_XPD)
160: tptraceTPCB(TPPTmisc,
161: "goodXack: dropping cc ",
162: (int)(tpcb->tp_Xsnd.sb_cc),
163: 0,0,0);
164: ENDTRACE
165: sbdroprecord(&tpcb->tp_Xsnd);
166: return 1;
167: }
168: return 0;
169: }
170:
171: /*
172: * CALLED FROM:
173: * tp_good_ack()
174: * FUNCTION and ARGUMENTS:
175: * updates
176: * smoothed average round trip time (*rtt)
177: * roundtrip time variance (*rtv) - actually deviation, not variance
178: * given the new value (diff)
179: * RETURN VALUE:
180: * void
181: */
182:
183: void
184: tp_rtt_rtv(tpcb)
185: register struct tp_pcb *tpcb;
186: {
187: int old = tpcb->tp_rtt;
188: int delta, elapsed = ticks - tpcb->tp_rttemit;
189:
190: if (tpcb->tp_rtt != 0) {
191: /*
192: * rtt is the smoothed round trip time in machine clock ticks (hz).
193: * It is stored as a fixed point number, unscaled (unlike the tcp
194: * srtt). The rationale here is that it is only significant to the
195: * nearest unit of slowtimo, which is at least 8 machine clock ticks
196: * so there is no need to scale. The smoothing is done according
197: * to the same formula as TCP (rtt = rtt*7/8 + measured_rtt/8).
198: */
199: delta = elapsed - tpcb->tp_rtt;
200: if ((tpcb->tp_rtt += (delta >> TP_RTT_ALPHA)) <= 0)
201: tpcb->tp_rtt = 1;
202: /*
203: * rtv is a smoothed accumulated mean difference, unscaled
204: * for reasons expressed above.
205: * It is smoothed with an alpha of .75, and the round trip timer
206: * will be set to rtt + 4*rtv, also as TCP does.
207: */
208: if (delta < 0)
209: delta = -delta;
210: if ((tpcb->tp_rtv += ((delta - tpcb->tp_rtv) >> TP_RTV_ALPHA)) <= 0)
211: tpcb->tp_rtv = 1;
212: } else {
213: /*
214: * No rtt measurement yet - use the unsmoothed rtt.
215: * Set the variance to half the rtt (so our first
216: * retransmit happens at 3*rtt)
217: */
218: tpcb->tp_rtt = elapsed;
219: tpcb->tp_rtv = elapsed >> 1;
220: }
221: tpcb->tp_rttemit = 0;
222: tpcb->tp_rxtshift = 0;
223: /*
224: * Quoting TCP: "the retransmit should happen at rtt + 4 * rttvar.
225: * Because of the way we do the smoothing, srtt and rttvar
226: * will each average +1/2 tick of bias. When we compute
227: * the retransmit timer, we want 1/2 tick of rounding and
228: * 1 extra tick because of +-1/2 tick uncertainty in the
229: * firing of the timer. The bias will give us exactly the
230: * 1.5 tick we need. But, because the bias is
231: * statistical, we have to test that we don't drop below
232: * the minimum feasible timer (which is 2 ticks)."
233: */
234: TP_RANGESET(tpcb->tp_dt_ticks, TP_REXMTVAL(tpcb),
235: tpcb->tp_peer_acktime, 128 /* XXX */);
236: IFDEBUG(D_RTT)
237: printf("%s tpcb 0x%x, elapsed %d, delta %d, rtt %d, rtv %d, old %d\n",
238: "tp_rtt_rtv:",tpcb,elapsed,delta,tpcb->tp_rtt,tpcb->tp_rtv,old);
239: ENDDEBUG
240: tpcb->tp_rxtcur = tpcb->tp_dt_ticks;
241: }
242:
243: /*
244: * CALLED FROM:
245: * tp.trans when an AK arrives
246: * FUNCTION and ARGUMENTS:
247: * Given (cdt), the credit from the AK tpdu, and
248: * (seq), the sequence number from the AK tpdu,
249: * tp_goodack() determines if the AK acknowledges something in the send
250: * window, and if so, drops the appropriate packets from the retransmission
251: * list, computes the round trip time, and updates the retransmission timer
252: * based on the new smoothed round trip time.
253: * RETURN VALUE:
254: * Returns 1 if
255: * EITHER it actually acked something heretofore unacknowledged
256: * OR no news but the credit should be processed.
257: * If something heretofore unacked was acked with this sequence number,
258: * the appropriate tpdus are dropped from the retransmission control list,
259: * by calling tp_sbdrop().
260: * No need to see the tpdu itself.
261: */
262: int
263: tp_goodack(tpcb, cdt, seq, subseq)
264: register struct tp_pcb *tpcb;
265: u_int cdt;
266: register SeqNum seq;
267: u_int subseq;
268: {
269: int old_fcredit;
270: int bang = 0; /* bang --> ack for something heretofore unacked */
271: u_int bytes_acked;
272:
273: IFDEBUG(D_ACKRECV)
274: printf("goodack tpcb 0x%x seq 0x%x cdt %d una 0x%x new 0x%x nxt 0x%x\n",
275: tpcb, seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, tpcb->tp_sndnxt);
276: ENDDEBUG
277: IFTRACE(D_ACKRECV)
278: tptraceTPCB(TPPTgotack,
279: seq,cdt, tpcb->tp_snduna,tpcb->tp_sndnew,subseq);
280: ENDTRACE
281:
282: IFPERF(tpcb)
283: tpmeas(tpcb->tp_lref, TPtime_ack_rcvd, (struct timeval *)0, seq, 0, 0);
284: ENDPERF
285:
286: if (seq == tpcb->tp_snduna) {
287: if (subseq < tpcb->tp_r_subseq ||
288: (subseq == tpcb->tp_r_subseq && cdt <= tpcb->tp_fcredit)) {
289: discard_the_ack:
290: IFDEBUG(D_ACKRECV)
291: printf("goodack discard : tpcb 0x%x subseq %d r_subseq %d\n",
292: tpcb, subseq, tpcb->tp_r_subseq);
293: ENDDEBUG
294: goto done;
295: }
296: if (cdt == tpcb->tp_fcredit /*&& thus subseq > tpcb->tp_r_subseq */) {
297: tpcb->tp_r_subseq = subseq;
298: if (tpcb->tp_timer[TM_data_retrans] == 0)
299: tpcb->tp_dupacks = 0;
300: else if (++tpcb->tp_dupacks == tprexmtthresh) {
301: /* partner went out of his way to signal with different
302: subsequences that he has the same lack of an expected
303: packet. This may be an early indiciation of a loss */
304:
305: SeqNum onxt = tpcb->tp_sndnxt;
306: struct mbuf *onxt_m = tpcb->tp_sndnxt_m;
307: u_int win = min(tpcb->tp_fcredit,
308: tpcb->tp_cong_win / tpcb->tp_l_tpdusize) / 2;
309: IFDEBUG(D_ACKRECV)
310: printf("%s tpcb 0x%x seq 0x%x rttseq 0x%x onxt 0x%x\n",
311: "goodack dupacks:", tpcb, seq, tpcb->tp_rttseq, onxt);
312: ENDDEBUG
313: if (win < 2)
314: win = 2;
315: tpcb->tp_ssthresh = win * tpcb->tp_l_tpdusize;
316: tpcb->tp_timer[TM_data_retrans] = 0;
317: tpcb->tp_rttemit = 0;
318: tpcb->tp_sndnxt = tpcb->tp_snduna;
319: tpcb->tp_sndnxt_m = 0;
320: tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
321: tp_send(tpcb);
322: tpcb->tp_cong_win = tpcb->tp_ssthresh +
323: tpcb->tp_dupacks * tpcb->tp_l_tpdusize;
324: if (SEQ_GT(tpcb, onxt, tpcb->tp_sndnxt)) {
325: tpcb->tp_sndnxt = onxt;
326: tpcb->tp_sndnxt_m = onxt_m;
327: }
328:
329: } else if (tpcb->tp_dupacks > tprexmtthresh) {
330: tpcb->tp_cong_win += tpcb->tp_l_tpdusize;
331: }
332: goto done;
333: }
334: } else if (SEQ_LT(tpcb, seq, tpcb->tp_snduna))
335: goto discard_the_ack;
336: /*
337: * If the congestion window was inflated to account
338: * for the other side's cached packets, retract it.
339: */
340: if (tpcb->tp_dupacks > tprexmtthresh &&
341: tpcb->tp_cong_win > tpcb->tp_ssthresh)
342: tpcb->tp_cong_win = tpcb->tp_ssthresh;
343: tpcb->tp_r_subseq = subseq;
344: old_fcredit = tpcb->tp_fcredit;
345: tpcb->tp_fcredit = cdt;
346: if (cdt > tpcb->tp_maxfcredit)
347: tpcb->tp_maxfcredit = cdt;
348: tpcb->tp_dupacks = 0;
349:
350: if (IN_SWINDOW(tpcb, seq, tpcb->tp_snduna, tpcb->tp_sndnew)) {
351:
352: tpsbcheck(tpcb, 0);
353: bytes_acked = tp_sbdrop(tpcb, seq);
354: tpsbcheck(tpcb, 1);
355: /*
356: * If transmit timer is running and timed sequence
357: * number was acked, update smoothed round trip time.
358: * Since we now have an rtt measurement, cancel the
359: * timer backoff (cf., Phil Karn's retransmit alg.).
360: * Recompute the initial retransmit timer.
361: */
362: if (tpcb->tp_rttemit && SEQ_GT(tpcb, seq, tpcb->tp_rttseq))
363: tp_rtt_rtv(tpcb);
364: /*
365: * If all outstanding data is acked, stop retransmit timer.
366: * If there is more data to be acked, restart retransmit
367: * timer, using current (possibly backed-off) value.
368: * OSI combines the keepalive and persistance functions.
369: * So, there is no persistance timer per se, to restart.
370: */
371: if (tpcb->tp_class != TP_CLASS_0)
372: tpcb->tp_timer[TM_data_retrans] =
373: (seq == tpcb->tp_sndnew) ? 0 : tpcb->tp_rxtcur;
374: /*
375: * When new data is acked, open the congestion window.
376: * If the window gives us less than ssthresh packets
377: * in flight, open exponentially (maxseg per packet).
378: * Otherwise open linearly: maxseg per window
379: * (maxseg^2 / cwnd per packet), plus a constant
380: * fraction of a packet (maxseg/8) to help larger windows
381: * open quickly enough.
382: */
383: {
384: u_int cw = tpcb->tp_cong_win, incr = tpcb->tp_l_tpdusize;
385:
386: incr = min(incr, bytes_acked);
387: if (cw > tpcb->tp_ssthresh)
388: incr = incr * incr / cw + incr / 8;
389: tpcb->tp_cong_win =
390: min(cw + incr, tpcb->tp_sock->so_snd.sb_hiwat);
391: }
392: tpcb->tp_snduna = seq;
393: if (SEQ_LT(tpcb, tpcb->tp_sndnxt, seq)) {
394: tpcb->tp_sndnxt = seq;
395: tpcb->tp_sndnxt_m = 0;
396: }
397: bang++;
398: }
399:
400: if( cdt != 0 && old_fcredit == 0 ) {
401: tpcb->tp_sendfcc = 1;
402: }
403: if (cdt == 0) {
404: if (old_fcredit != 0)
405: IncStat(ts_zfcdt);
406: /* The following might mean that the window shrunk */
407: if (tpcb->tp_timer[TM_data_retrans]) {
408: tpcb->tp_timer[TM_data_retrans] = 0;
409: tpcb->tp_timer[TM_sendack] = tpcb->tp_dt_ticks;
410: if (tpcb->tp_sndnxt != tpcb->tp_snduna) {
411: tpcb->tp_sndnxt = tpcb->tp_snduna;
412: tpcb->tp_sndnxt_m = 0;
413: }
414: }
415: }
416: tpcb->tp_fcredit = cdt;
417: bang |= (old_fcredit < cdt);
418:
419: done:
420: IFDEBUG(D_ACKRECV)
421: printf("goodack returns 0x%x, cdt 0x%x ocdt 0x%x cwin 0x%x\n",
422: bang, cdt, old_fcredit, tpcb->tp_cong_win);
423: ENDDEBUG
424: /* if (bang) XXXXX Very bad to remove this test, but somethings broken */
425: tp_send(tpcb);
426: return (bang);
427: }
428:
429: /*
430: * CALLED FROM:
431: * tp_goodack()
432: * FUNCTION and ARGUMENTS:
433: * drops everything up TO but not INCLUDING seq # (seq)
434: * from the retransmission queue.
435: */
436: tp_sbdrop(tpcb, seq)
437: register struct tp_pcb *tpcb;
438: SeqNum seq;
439: {
440: struct sockbuf *sb = &tpcb->tp_sock->so_snd;
441: register int i = SEQ_SUB(tpcb, seq, tpcb->tp_snduna);
442: int oldcc = sb->sb_cc, oldi = i;
443:
444: if (i >= tpcb->tp_seqhalf)
445: printf("tp_spdropping too much -- should panic");
446: while (i-- > 0)
447: sbdroprecord(sb);
448: IFDEBUG(D_ACKRECV)
449: printf("tp_sbdroping %d pkts %d bytes on %x at 0x%x\n",
450: oldi, oldcc - sb->sb_cc, tpcb, seq);
451: ENDDEBUG
452: if (sb->sb_flags & SB_NOTIFY)
453: sowwakeup(tpcb->tp_sock);
454: return (oldcc - sb->sb_cc);
455: }
456:
457: /*
458: * CALLED FROM:
459: * tp.trans on user send request, arrival of AK and arrival of XAK
460: * FUNCTION and ARGUMENTS:
461: * Emits tpdus starting at sequence number (tpcb->tp_sndnxt).
462: * Emits until a) runs out of data, or b) runs into an XPD mark, or
463: * c) it hits seq number (highseq) limited by cong or credit.
464: *
465: * If you want XPD to buffer > 1 du per socket buffer, you can
466: * modifiy this to issue XPD tpdus also, but then it'll have
467: * to take some argument(s) to distinguish between the type of DU to
468: * hand tp_emit.
469: *
470: * When something is sent for the first time, its time-of-send
471: * is stashed (in system clock ticks rather than pf_slowtimo ticks).
472: * When the ack arrives, the smoothed round-trip time is figured
473: * using this value.
474: */
475: void
476: tp_send(tpcb)
477: register struct tp_pcb *tpcb;
478: {
479: register int len;
480: register struct mbuf *m;
481: struct mbuf *mb = 0;
482: struct sockbuf *sb = &tpcb->tp_sock->so_snd;
483: unsigned int eotsdu = 0;
484: SeqNum highseq, checkseq;
485: int idle, idleticks, off, cong_win;
486: #ifdef TP_PERF_MEAS
487: int send_start_time = ticks;
488: SeqNum oldnxt = tpcb->tp_sndnxt;
489: #endif /* TP_PERF_MEAS */
490:
491: idle = (tpcb->tp_snduna == tpcb->tp_sndnew);
492: if (idle) {
493: idleticks = tpcb->tp_inact_ticks - tpcb->tp_timer[TM_inact];
494: if (idleticks > tpcb->tp_dt_ticks)
495: /*
496: * We have been idle for "a while" and no acks are
497: * expected to clock out any data we send --
498: * slow start to get ack "clock" running again.
499: */
500: tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
501: }
502:
503: cong_win = tpcb->tp_cong_win;
504: highseq = SEQ(tpcb, tpcb->tp_fcredit + tpcb->tp_snduna);
505: if (tpcb->tp_Xsnd.sb_mb)
506: highseq = SEQ_MIN(tpcb, highseq, tpcb->tp_sndnew);
507:
508: IFDEBUG(D_DATA)
509: printf("tp_send enter tpcb 0x%x nxt 0x%x win %d high 0x%x\n",
510: tpcb, tpcb->tp_sndnxt, cong_win, highseq);
511: ENDDEBUG
512: IFTRACE(D_DATA)
513: tptraceTPCB( TPPTmisc, "tp_send sndnew snduna",
514: tpcb->tp_sndnew, tpcb->tp_snduna, 0, 0);
515: tptraceTPCB( TPPTmisc, "tp_send tpcb->tp_sndnxt win fcredit congwin",
516: tpcb->tp_sndnxt, cong_win, tpcb->tp_fcredit, tpcb->tp_cong_win);
517: ENDTRACE
518: IFTRACE(D_DATA)
519: tptraceTPCB( TPPTmisc, "tp_send 2 nxt high fcredit congwin",
520: tpcb->tp_sndnxt, highseq, tpcb->tp_fcredit, cong_win);
521: ENDTRACE
522:
523: if (tpcb->tp_sndnxt_m)
524: m = tpcb->tp_sndnxt_m;
525: else {
526: off = SEQ_SUB(tpcb, tpcb->tp_sndnxt, tpcb->tp_snduna);
527: for (m = sb->sb_mb; m && off > 0; m = m->m_next)
528: off--;
529: }
530: send:
531: /*
532: * Avoid silly window syndrome here . . . figure out how!
533: */
534: checkseq = tpcb->tp_sndnum;
535: if (idle && SEQ_LT(tpcb, tpcb->tp_sndnum, highseq))
536: checkseq = highseq; /* i.e. DON'T retain highest assigned packet */
537:
538: while ((SEQ_LT(tpcb, tpcb->tp_sndnxt, highseq)) && m && cong_win > 0) {
539:
540: eotsdu = (m->m_flags & M_EOR) != 0;
541: len = m->m_pkthdr.len;
542: if (tpcb->tp_sndnxt == checkseq && eotsdu == 0 &&
543: len < (tpcb->tp_l_tpdusize / 2))
544: break; /* Nagle . . . . . */
545: cong_win -= len;
546: /* make a copy - mb goes into the retransmission list
547: * while m gets emitted. m_copy won't copy a zero-length mbuf.
548: */
549: mb = m;
550: m = m_copy(mb, 0, M_COPYALL);
551: if (m == MNULL)
552: break;
553: IFTRACE(D_STASH)
554: tptraceTPCB( TPPTmisc,
555: "tp_send mcopy nxt high eotsdu len",
556: tpcb->tp_sndnxt, highseq, eotsdu, len);
557: ENDTRACE
558:
559: IFDEBUG(D_DATA)
560: printf("tp_sending tpcb 0x%x nxt 0x%x\n",
561: tpcb, tpcb->tp_sndnxt);
562: ENDDEBUG
563: /* when headers are precomputed, may need to fill
564: in checksum here */
565: if (tpcb->tp_sock->so_error =
566: tp_emit(DT_TPDU_type, tpcb, tpcb->tp_sndnxt, eotsdu, m)) {
567: /* error */
568: break;
569: }
570: m = mb->m_nextpkt;
571: tpcb->tp_sndnxt_m = m;
572: if (tpcb->tp_sndnxt == tpcb->tp_sndnew) {
573: SEQ_INC(tpcb, tpcb->tp_sndnew);
574: /*
575: * Time this transmission if not a retransmission and
576: * not currently timing anything.
577: */
578: if (tpcb->tp_rttemit == 0) {
579: tpcb->tp_rttemit = ticks;
580: tpcb->tp_rttseq = tpcb->tp_sndnxt;
581: }
582: tpcb->tp_sndnxt = tpcb->tp_sndnew;
583: } else
584: SEQ_INC(tpcb, tpcb->tp_sndnxt);
585: /*
586: * Set retransmit timer if not currently set.
587: * Initial value for retransmit timer is smoothed
588: * round-trip time + 2 * round-trip time variance.
589: * Initialize shift counter which is used for backoff
590: * of retransmit time.
591: */
592: if (tpcb->tp_timer[TM_data_retrans] == 0 &&
593: tpcb->tp_class != TP_CLASS_0) {
594: tpcb->tp_timer[TM_data_retrans] = tpcb->tp_dt_ticks;
595: tpcb->tp_timer[TM_sendack] = tpcb->tp_keepalive_ticks;
596: tpcb->tp_rxtshift = 0;
597: }
598: }
599: if (SEQ_GT(tpcb, tpcb->tp_sndnew, tpcb->tp_sndnum))
600: tpcb->tp_oktonagle = 0;
601: #ifdef TP_PERF_MEAS
602: IFPERF(tpcb)
603: {
604: register int npkts;
605: int elapsed = ticks - send_start_time, *t;
606: struct timeval now;
607:
608: npkts = SEQ_SUB(tpcb, tpcb->tp_sndnxt, oldnxt);
609:
610: if (npkts > 0)
611: tpcb->tp_Nwindow++;
612:
613: if (npkts > TP_PM_MAX)
614: npkts = TP_PM_MAX;
615:
616: t = &(tpcb->tp_p_meas->tps_sendtime[npkts]);
617: *t += (t - elapsed) >> TP_RTT_ALPHA;
618:
619: if (mb == 0) {
620: IncPStat(tpcb, tps_win_lim_by_data[npkts] );
621: } else {
622: IncPStat(tpcb, tps_win_lim_by_cdt[npkts] );
623: /* not true with congestion-window being used */
624: }
625: now.tv_sec = elapsed / hz;
626: now.tv_usec = (elapsed - (hz * now.tv_sec)) * 1000000 / hz;
627: tpmeas( tpcb->tp_lref,
628: TPsbsend, &elapsed, newseq, tpcb->tp_Nwindow, npkts);
629: }
630: ENDPERF
631: #endif /* TP_PERF_MEAS */
632:
633:
634: IFTRACE(D_DATA)
635: tptraceTPCB( TPPTmisc,
636: "tp_send at end: new nxt eotsdu error",
637: tpcb->tp_sndnew, tpcb->tp_sndnxt, eotsdu, tpcb->tp_sock->so_error);
638:
639: ENDTRACE
640: }
641:
642: int TPNagleok;
643: int TPNagled;
644:
645: tp_packetize(tpcb, m, eotsdu)
646: register struct tp_pcb *tpcb;
647: register struct mbuf *m;
648: int eotsdu;
649: {
650: register struct mbuf *n;
651: register struct sockbuf *sb = &tpcb->tp_sock->so_snd;
652: int maxsize = tpcb->tp_l_tpdusize
653: - tp_headersize(DT_TPDU_type, tpcb)
654: - (tpcb->tp_use_checksum?4:0) ;
655: int totlen = m->m_pkthdr.len;
656: struct mbuf *m_split();
657: /*
658: * Pre-packetize the data in the sockbuf
659: * according to negotiated mtu. Do it here
660: * where we can safely wait for mbufs.
661: *
662: * This presumes knowledge of sockbuf conventions.
663: * TODO: allocate space for header and fill it in (once!).
664: */
665: IFDEBUG(D_DATA)
666: printf("SEND BF: maxsize %d totlen %d eotsdu %d sndnum 0x%x\n",
667: maxsize, totlen, eotsdu, tpcb->tp_sndnum);
668: ENDTRACE
669: if (tpcb->tp_oktonagle) {
670: if ((n = sb->sb_mb) == 0)
671: panic("tp_packetize");
672: while (n->m_act)
673: n = n->m_act;
674: if (n->m_flags & M_EOR)
675: panic("tp_packetize 2");
676: SEQ_INC(tpcb, tpcb->tp_sndnum);
677: if (totlen + n->m_pkthdr.len < maxsize) {
678: /* There is an unsent packet with space, combine data */
679: struct mbuf *old_n = n;
680: tpsbcheck(tpcb,3);
681: n->m_pkthdr.len += totlen;
682: while (n->m_next)
683: n = n->m_next;
684: sbcompress(sb, m, n);
685: tpsbcheck(tpcb,4);
686: n = old_n;
687: TPNagled++;
688: goto out;
689: }
690: }
691: while (m) {
692: n = m;
693: if (totlen > maxsize) {
694: if ((m = m_split(n, maxsize, M_WAIT)) == 0)
695: panic("tp_packetize");
696: } else
697: m = 0;
698: totlen -= maxsize;
699: tpsbcheck(tpcb, 5);
700: sbappendrecord(sb, n);
701: tpsbcheck(tpcb, 6);
702: SEQ_INC(tpcb, tpcb->tp_sndnum);
703: }
704: out:
705: if (eotsdu) {
706: n->m_flags |= M_EOR; /* XXX belongs at end */
707: tpcb->tp_oktonagle = 0;
708: } else {
709: SEQ_DEC(tpcb, tpcb->tp_sndnum);
710: tpcb->tp_oktonagle = 1;
711: TPNagleok++;
712: }
713: IFDEBUG(D_DATA)
714: printf("SEND out: oktonagle %d sndnum 0x%x\n",
715: tpcb->tp_oktonagle, tpcb->tp_sndnum);
716: ENDTRACE
717: return 0;
718: }
719:
720:
721: /*
722: * NAME: tp_stash()
723: * CALLED FROM:
724: * tp.trans on arrival of a DT tpdu
725: * FUNCTION, ARGUMENTS, and RETURN VALUE:
726: * Returns 1 if
727: * a) something new arrived and it's got eotsdu_reached bit on,
728: * b) this arrival was caused other out-of-sequence things to be
729: * accepted, or
730: * c) this arrival is the highest seq # for which we last gave credit
731: * (sender just sent a whole window)
732: * In other words, returns 1 if tp should send an ack immediately, 0 if
733: * the ack can wait a while.
734: *
735: * Note: this implementation no longer renegs on credit, (except
736: * when debugging option D_RENEG is on, for the purpose of testing
737: * ack subsequencing), so we don't need to check for incoming tpdus
738: * being in a reneged portion of the window.
739: */
740:
741: tp_stash(tpcb, e)
742: register struct tp_pcb *tpcb;
743: register struct tp_event *e;
744: {
745: register int ack_reason= tpcb->tp_ack_strat & ACK_STRAT_EACH;
746: /* 0--> delay acks until full window */
747: /* 1--> ack each tpdu */
748: #ifndef lint
749: #define E e->ATTR(DT_TPDU)
750: #else /* lint */
751: #define E e->ev_union.EV_DT_TPDU
752: #endif /* lint */
753:
754: if ( E.e_eot ) {
755: register struct mbuf *n = E.e_data;
756: n->m_flags |= M_EOR;
757: n->m_act = 0;
758: }
759: IFDEBUG(D_STASH)
760: dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
761: "stash: so_rcv before appending");
762: dump_mbuf(E.e_data,
763: "stash: e_data before appending");
764: ENDDEBUG
765:
766: IFPERF(tpcb)
767: PStat(tpcb, Nb_from_ll) += E.e_datalen;
768: tpmeas(tpcb->tp_lref, TPtime_from_ll, &e->e_time,
769: E.e_seq, (u_int)PStat(tpcb, Nb_from_ll), (u_int)E.e_datalen);
770: ENDPERF
771:
772: if (E.e_seq == tpcb->tp_rcvnxt) {
773:
774: IFDEBUG(D_STASH)
775: printf("stash EQ: seq 0x%x datalen 0x%x eot 0x%x\n",
776: E.e_seq, E.e_datalen, E.e_eot);
777: ENDDEBUG
778:
779: IFTRACE(D_STASH)
780: tptraceTPCB(TPPTmisc, "stash EQ: seq len eot",
781: E.e_seq, E.e_datalen, E.e_eot, 0);
782: ENDTRACE
783:
784: SET_DELACK(tpcb);
785:
786: sbappend(&tpcb->tp_sock->so_rcv, E.e_data);
787:
788: SEQ_INC( tpcb, tpcb->tp_rcvnxt );
789: /*
790: * move chains from the reassembly queue to the socket buffer
791: */
792: if (tpcb->tp_rsycnt) {
793: register struct mbuf **mp;
794: struct mbuf **mplim;
795:
796: mp = tpcb->tp_rsyq + (tpcb->tp_rcvnxt % tpcb->tp_maxlcredit);
797: mplim = tpcb->tp_rsyq + tpcb->tp_maxlcredit;
798:
799: while (tpcb->tp_rsycnt && *mp) {
800: sbappend(&tpcb->tp_sock->so_rcv, *mp);
801: tpcb->tp_rsycnt--;
802: *mp = 0;
803: SEQ_INC(tpcb, tpcb->tp_rcvnxt);
804: ack_reason |= ACK_REORDER;
805: if (++mp == mplim)
806: mp = tpcb->tp_rsyq;
807: }
808: }
809: IFDEBUG(D_STASH)
810: dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
811: "stash: so_rcv after appending");
812: ENDDEBUG
813:
814: } else {
815: register struct mbuf **mp;
816: SeqNum uwe;
817:
818: IFTRACE(D_STASH)
819: tptraceTPCB(TPPTmisc, "stash Reseq: seq rcvnxt lcdt",
820: E.e_seq, tpcb->tp_rcvnxt, tpcb->tp_lcredit, 0);
821: ENDTRACE
822:
823: if (tpcb->tp_rsyq == 0)
824: tp_rsyset(tpcb);
825: uwe = SEQ(tpcb, tpcb->tp_rcvnxt + tpcb->tp_maxlcredit);
826: if (tpcb->tp_rsyq == 0 ||
827: !IN_RWINDOW(tpcb, E.e_seq, tpcb->tp_rcvnxt, uwe)) {
828: ack_reason = ACK_DONT;
829: m_freem(E.e_data);
830: } else if (*(mp = tpcb->tp_rsyq + (E.e_seq % tpcb->tp_maxlcredit))) {
831: IFDEBUG(D_STASH)
832: printf("tp_stash - drop & ack\n");
833: ENDDEBUG
834:
835: /* retransmission - drop it and force an ack */
836: IncStat(ts_dt_dup);
837: IFPERF(tpcb)
838: IncPStat(tpcb, tps_n_ack_cuz_dup);
839: ENDPERF
840:
841: m_freem(E.e_data);
842: ack_reason |= ACK_DUP;
843: } else {
844: *mp = E.e_data;
845: tpcb->tp_rsycnt++;
846: ack_reason = ACK_DONT;
847: }
848: }
849: /* there were some comments of historical interest here. */
850: {
851: LOCAL_CREDIT(tpcb);
852:
853: if ( E.e_seq == tpcb->tp_sent_uwe )
854: ack_reason |= ACK_STRAT_FULLWIN;
855:
856: IFTRACE(D_STASH)
857: tptraceTPCB(TPPTmisc,
858: "end of stash, eot, ack_reason, sent_uwe ",
859: E.e_eot, ack_reason, tpcb->tp_sent_uwe, 0);
860: ENDTRACE
861:
862: if ( ack_reason == ACK_DONT ) {
863: IncStat( ts_ackreason[ACK_DONT] );
864: return 0;
865: } else {
866: IFPERF(tpcb)
867: if(ack_reason & ACK_STRAT_EACH) {
868: IncPStat(tpcb, tps_n_ack_cuz_strat);
869: } else if(ack_reason & ACK_STRAT_FULLWIN) {
870: IncPStat(tpcb, tps_n_ack_cuz_fullwin);
871: } else if(ack_reason & ACK_REORDER) {
872: IncPStat(tpcb, tps_n_ack_cuz_reorder);
873: }
874: tpmeas(tpcb->tp_lref, TPtime_ack_sent, 0,
875: SEQ_ADD(tpcb, E.e_seq, 1), 0, 0);
876: ENDPERF
877: {
878: register int i;
879:
880: /* keep track of all reasons that apply */
881: for( i=1; i<_ACK_NUM_REASONS_ ;i++) {
882: if( ack_reason & (1<<i) )
883: IncStat( ts_ackreason[i] );
884: }
885: }
886: return 1;
887: }
888: }
889: }
890:
891: /*
892: * tp_rsyflush - drop all the packets on the reassembly queue.
893: * Do this when closing the socket, or when somebody has changed
894: * the space avaible in the receive socket (XXX).
895: */
896: tp_rsyflush(tpcb)
897: register struct tp_pcb *tpcb;
898: {
899: register struct mbuf *m, **mp;
900: if (tpcb->tp_rsycnt) {
901: for (mp == tpcb->tp_rsyq + tpcb->tp_maxlcredit;
902: --mp >= tpcb->tp_rsyq; )
903: if (*mp) {
904: tpcb->tp_rsycnt--;
905: m_freem(*mp);
906: }
907: if (tpcb->tp_rsycnt) {
908: printf("tp_rsyflush %x\n", tpcb);
909: tpcb->tp_rsycnt = 0;
910: }
911: }
912: FREE((caddr_t)tpcb->tp_rsyq, M_PCB);
913: tpcb->tp_rsyq = 0;
914: }
915:
916: tp_rsyset(tpcb)
917: register struct tp_pcb *tpcb;
918: {
919: register struct socket *so = tpcb->tp_sock;
920: int maxcredit = tpcb->tp_xtd_format ? 0xffff : 0xf;
921: int old_credit = tpcb->tp_maxlcredit;
922: caddr_t rsyq;
923:
924: tpcb->tp_maxlcredit = maxcredit = min(maxcredit,
925: (so->so_rcv.sb_hiwat + tpcb->tp_l_tpdusize)/ tpcb->tp_l_tpdusize);
926:
927: if (old_credit == tpcb->tp_maxlcredit && tpcb->tp_rsyq != 0)
928: return;
929: maxcredit *= sizeof(struct mbuf *);
930: if (tpcb->tp_rsyq)
931: tp_rsyflush(tpcb);
932: // if (rsyq = (caddr_t)malloc(maxcredit, M_PCB, M_NOWAIT))
933: MALLOC(rsyq, caddr_t, maxcredit, M_PCB, M_NOWAIT);
934: if (rsyq)
935: bzero(rsyq, maxcredit);
936: tpcb->tp_rsyq = (struct mbuf **)rsyq;
937: }
938:
939: tpsbcheck(tpcb, i)
940: struct tp_pcb *tpcb;
941: {
942: register struct mbuf *n, *m;
943: register int len = 0, mbcnt = 0, pktlen;
944: struct sockbuf *sb = &tpcb->tp_sock->so_snd;
945:
946: for (n = sb->sb_mb; n; n = n->m_nextpkt) {
947: if ((n->m_flags & M_PKTHDR) == 0)
948: panic("tpsbcheck nohdr");
949: pktlen = len + n->m_pkthdr.len;
950: for (m = n; m; m = m->m_next) {
951: len += m->m_len;
952: mbcnt += MSIZE;
953: if (m->m_flags & M_EXT)
954: mbcnt += m->m_ext.ext_size;
955: }
956: if (len != pktlen) {
957: printf("test %d; len %d != pktlen %d on mbuf 0x%x\n",
958: i, len, pktlen, n);
959: panic("tpsbcheck short");
960: }
961: }
962: if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
963: printf("test %d: cc %d != %d || mbcnt %d != %d\n", i, len, sb->sb_cc,
964: mbcnt, sb->sb_mbcnt);
965: panic("tpsbcheck");
966: }
967: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.