|
|
1.1 root 1: /*
2: * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3: *
4: * @APPLE_LICENSE_HEADER_START@
5: *
6: * The contents of this file constitute Original Code as defined in and
7: * are subject to the Apple Public Source License Version 1.1 (the
8: * "License"). You may not use this file except in compliance with the
9: * License. Please obtain a copy of the License at
10: * http://www.apple.com/publicsource and read it before using this file.
11: *
12: * This Original Code and all software distributed under the License are
13: * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14: * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15: * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16: * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17: * License for the specific language governing rights and limitations
18: * under the License.
19: *
20: * @APPLE_LICENSE_HEADER_END@
21: */
22: /*
23: * Copyright (c) 1998 Luigi Rizzo
24: *
25: * Redistribution and use in source forms, with and without modification,
26: * are permitted provided that this entire comment appears intact.
27: *
28: * Redistribution in binary form may occur without any restrictions.
29: * Obviously, it would be nice if you gave credit where credit is due
30: * but requiring it would be too onerous.
31: *
32: * This software is provided ``AS IS'' without any warranties of any kind.
33: *
34: */
35:
36: /*
37: * This module implements IP dummynet, a bandwidth limiter/delay emulator
38: * used in conjunction with the ipfw package.
39: *
40: * Changes:
41: *
42: * 980821: changed conventions in the queueing logic
43: * packets passed from dummynet to ip_in/out are prepended with
44: * a vestigial mbuf type MT_DUMMYNET which contains a pointer
45: * to the matching rule.
46: * ip_input/output will extract the parameters, free the vestigial mbuf,
47: * and do the processing.
48: *
49: * 980519: fixed behaviour when deleting rules.
50: * 980518: added splimp()/splx() to protect against races
51: * 980513: initial release
52: */
53:
54: /* include files marked with XXX are probably not needed */
55:
56: #include <sys/param.h>
57: #include <sys/systm.h>
58: #include <sys/malloc.h>
59: #include <sys/mbuf.h>
60: #include <sys/queue.h> /* XXX */
61: #include <sys/kernel.h>
62: #include <sys/socket.h>
63: #include <sys/socketvar.h>
64: #include <sys/time.h>
65: #include <sys/sysctl.h>
66: #include <net/if.h>
67: #include <net/route.h>
68: #include <netinet/in.h>
69: #include <netinet/in_systm.h>
70: #include <netinet/in_var.h>
71: #include <netinet/ip.h>
72: #include <netinet/ip_fw.h>
73: #include <netinet/ip_dummynet.h>
74: #include <netinet/ip_var.h>
75:
76: #if BRIDGE
77: #include <netinet/if_ether.h> /* for struct arpcom */
78: #include <net/bridge.h>
79: #endif
80:
81: static struct dn_pipe *all_pipes = NULL ; /* list of all pipes */
82:
83: static int dn_debug = 0 ; /* verbose */
84: static int dn_calls = 0 ; /* number of calls */
85: static int dn_idle = 1;
86: #ifdef SYSCTL_NODE
87: SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
88: SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug, CTLFLAG_RW, &dn_debug, 0, "");
89: SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, calls, CTLFLAG_RD, &dn_calls, 0, "");
90: SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, idle, CTLFLAG_RD, &dn_idle, 0, "");
91: #endif
92:
93: static int ip_dn_ctl(struct sockopt *sopt);
94:
95: static void rt_unref(struct rtentry *);
96: static void dummynet(void *);
97: static void dn_restart(void);
98: static void dn_move(struct dn_pipe *pipe, int immediate);
99: static void dummynet_flush(void);
100:
101: /*
102: * the following is needed when deleting a pipe, because rules can
103: * hold references to the pipe.
104: */
105: extern LIST_HEAD (ip_fw_head, ip_fw_chain) ip_fw_chain;
106:
107: /*
108: * invoked to reschedule the periodic task if necessary.
109: * Should only be called when dn_idle = 1 ;
110: */
111: static void
112: dn_restart()
113: {
114: struct dn_pipe *pipe;
115:
116: if (!dn_idle)
117: return;
118:
119: for (pipe = all_pipes ; pipe ; pipe = pipe->next ) {
120: /* if there any pipe that needs work, restart */
121: if (pipe->r.head || pipe->p.head || pipe->numbytes < 0 ) {
122: dn_idle = 0;
123: timeout(dummynet, NULL, 1);
124: return ;
125: }
126: }
127: }
128:
129: static void
130: rt_unref(struct rtentry *rt)
131: {
132: if (rt == NULL)
133: return ;
134: if (rt->rt_refcnt <= 0)
135: printf("-- warning, refcnt now %d, decreasing\n", rt->rt_refcnt);
136: RTFREE(rt);
137: }
138:
139: /*
140: * move packets from R-queue to P-queue
141: */
142: static void
143: dn_move(struct dn_pipe *pipe, int immediate)
144: {
145: struct dn_pkt *pkt;
146:
147: /*
148: * consistency check, should catch new pipes which are
149: * not initialized properly.
150: */
151: if ( pipe->p.head == NULL &&
152: pipe->ticks_from_last_insert != pipe->delay) {
153: printf("Warning, empty pipe and delay %d (should be %d)\n",
154: pipe->ticks_from_last_insert, pipe->delay);
155: pipe->ticks_from_last_insert = pipe->delay;
156: }
157: /* this ought to go in dn_dequeue() */
158: if (!immediate && pipe->ticks_from_last_insert < pipe->delay)
159: pipe->ticks_from_last_insert++;
160: if ( pkt = pipe->r.head ) {
161: /*
162: * Move at most numbytes bytes from src and move to dst.
163: * delay is set to ticks_from_last_insert, which
164: * is reset after the first insertion;
165: */
166: while ( pkt ) {
167: struct ip *ip=mtod(pkt->dn_m, struct ip *);
168:
169: /*
170: * queue limitation: pass packets down if the len is
171: * such that the pkt would go out before the next tick.
172: */
173: if (pipe->bandwidth) {
174: if (pipe->numbytes < ip->ip_len)
175: break;
176: pipe->numbytes -= ip->ip_len;
177: }
178: pipe->r_len--; /* elements in queue */
179: pipe->r_len_bytes -= ip->ip_len ;
180:
181: /*
182: * to add delay jitter, must act here. A lower value
183: * (bounded to 0) means lower delay.
184: */
185: pkt->delay = pipe->ticks_from_last_insert;
186: pipe->ticks_from_last_insert = 0;
187: /* compensate the decrement done next in dn_dequeue */
188: if (!immediate && pkt->delay >0 && pipe->p.head==NULL)
189: pkt->delay++;
190: if (pipe->p.head == NULL)
191: pipe->p.head = pkt;
192: else
193: (struct dn_pkt *)pipe->p.tail->dn_next = pkt;
194: pipe->p.tail = pkt;
195: pkt = (struct dn_pkt *)pkt->dn_next;
196: pipe->p.tail->dn_next = NULL;
197: }
198: pipe->r.head = pkt;
199:
200: /*** XXX just a sanity check */
201: if ( ( pkt == NULL && pipe->r_len != 0) ||
202: ( pkt != NULL && pipe->r_len == 0) )
203: printf("-- Warning, pipe head %p len %d\n",
204: (void *)pkt, pipe->r_len);
205: }
206:
207: /*
208: * deliver packets downstream after the delay in the P-queue.
209: */
210:
211: if (pipe->p.head == NULL)
212: return;
213: if (!immediate)
214: pipe->p.head->delay--;
215: while ( (pkt = pipe->p.head) && pkt->delay < 1) {
216: /*
217: * first unlink, then call procedures since ip_input()
218: * can result in a call to ip_output cnd viceversa,
219: * thus causing nested calls
220: */
221: pipe->p.head = (struct dn_pkt *) pkt->dn_next ;
222:
223: /*
224: * the trick to avoid flow-id settings here is to prepend a
225: * vestigial mbuf to the packet, with the following values:
226: * m_type = MT_DUMMYNET
227: * m_next = the actual mbuf to be processed by ip_input/output
228: * m_data = the matching rule
229: * The vestigial element is the same memory area used by
230: * the dn_pkt, and IS FREED IN ip_input/ip_output. IT IS
231: * NOT A REAL MBUF, just a block of memory acquired with malloc().
232: */
233: switch (pkt->dn_dir) {
234: case DN_TO_IP_OUT: {
235: struct rtentry *tmp_rt = pkt->ro.ro_rt ;
236:
237: (void)ip_output((struct mbuf *)pkt, (struct mbuf *)pkt->ifp,
238: &(pkt->ro), pkt->dn_hlen, NULL);
239: rt_unref (tmp_rt) ;
240: }
241: break ;
242: case DN_TO_IP_IN :
243: ip_input((struct mbuf *)pkt) ;
244: break ;
245: #if BRIDGE
246: case DN_TO_BDG_FWD :
247: bdg_forward((struct mbuf **)&pkt, pkt->ifp);
248: break ;
249: #endif
250: default:
251: printf("dummynet: bad switch %d!\n", pkt->dn_dir);
252: m_freem(pkt->dn_m);
253: FREE(pkt, M_IPFW);
254: break ;
255: }
256: }
257: }
258: /*
259: * this is the periodic task that moves packets between the R-
260: * and the P- queue
261: */
262: /*ARGSUSED*/
263: void
264: dummynet(void * __unused unused)
265: {
266: struct dn_pipe *p ;
267: int s ;
268:
269: dn_calls++ ;
270: for (p = all_pipes ; p ; p = p->next ) {
271: /*
272: * Increment the amount of data that can be sent. However,
273: * don't do that if the channel is idle
274: * (r.head == NULL && numbytes >= bandwidth).
275: * This bug fix is from tim shepard ([email protected])
276: */
277: s = splimp();
278: if (p->r.head != NULL || p->numbytes < p->bandwidth )
279: p->numbytes += p->bandwidth ;
280: dn_move(p, 0); /* is it really 0 (also below) ? */
281: splx(s);
282: }
283:
284: /*
285: * finally, if some queue has data, restart the timer.
286: */
287: dn_idle = 1;
288: dn_restart();
289: }
290:
291: /*
292: * dummynet hook for packets.
293: * input and output use the same code, so i use bit 16 in the pipe
294: * number to chose the direction: 1 for output packets, 0 for input.
295: * for input, only m is significant. For output, also the others.
296: */
297: int
298: dummynet_io(int pipe_nr, int dir,
299: struct mbuf *m, struct ifnet *ifp, struct route *ro, int hlen,
300: struct ip_fw_chain *rule)
301: {
302: struct dn_pkt *pkt;
303: struct dn_pipe *pipe;
304: struct ip *ip=mtod(m, struct ip *);
305:
306: int s=splimp();
307:
308: pipe_nr &= 0xffff ;
309: /*
310: * locate pipe. First time is expensive, next have direct access.
311: */
312:
313: if ( (pipe = rule->rule->pipe_ptr) == NULL ) {
314: for (pipe=all_pipes; pipe && pipe->pipe_nr !=pipe_nr; pipe=pipe->next)
315: ;
316: if (pipe == NULL) {
317: splx(s);
318: if (dn_debug)
319: printf("warning, pkt for no pipe %d\n", pipe_nr);
320: m_freem(m);
321: return 0 ;
322: } else
323: rule->rule->pipe_ptr = pipe ;
324: }
325:
326: /*
327: * should i drop ?
328: * This section implements random packet drop.
329: */
330: if ( (pipe->plr && random() < pipe->plr) ||
331: (pipe->queue_size && pipe->r_len >= pipe->queue_size) ||
332: (pipe->queue_size_bytes &&
333: ip->ip_len + pipe->r_len_bytes > pipe->queue_size_bytes) ||
334: (pkt = (struct dn_pkt *) _MALLOC(sizeof (*pkt),
335: M_IPFW, M_NOWAIT) ) == NULL ) {
336: splx(s);
337: if (dn_debug)
338: printf("-- dummynet: drop from pipe %d, have %d pks, %d bytes\n",
339: pipe_nr, pipe->r_len, pipe->r_len_bytes);
340: pipe->r_drops++ ;
341: m_freem(m);
342: return 0 ; /* XXX error */
343: }
344: bzero(pkt, sizeof(*pkt) );
345: /* build and enqueue packet */
346: pkt->hdr.mh_type = MT_DUMMYNET ;
347: (struct ip_fw_chain *)pkt->hdr.mh_data = rule ;
348: pkt->dn_next = NULL;
349: pkt->dn_m = m;
350: pkt->dn_dir = dir ;
351: pkt->delay = 0;
352:
353: pkt->ifp = ifp;
354: if (dir == DN_TO_IP_OUT) {
355: pkt->ro = *ro; /* XXX copied! */
356: if (ro->ro_rt)
357: ro->ro_rt->rt_refcnt++ ; /* XXX */
358: }
359: pkt->dn_hlen = hlen;
360: if (pipe->r.head == NULL)
361: pipe->r.head = pkt;
362: else
363: (struct dn_pkt *)pipe->r.tail->dn_next = pkt;
364: pipe->r.tail = pkt;
365: pipe->r_len++;
366: pipe->r_len_bytes += ip->ip_len ;
367:
368: /*
369: * here we could implement RED if we like to
370: */
371:
372: if (pipe->r.head == pkt) { /* process immediately */
373: dn_move(pipe, 1);
374: }
375: splx(s);
376: if (dn_idle)
377: dn_restart();
378: return 0;
379: }
380:
381: /*
382: * dispose all packets queued on a pipe
383: */
384: static void
385: purge_pipe(struct dn_pipe *pipe)
386: {
387: struct dn_pkt *pkt, *n ;
388: struct rtentry *tmp_rt ;
389:
390: for (pkt = pipe->r.head ; pkt ; ) {
391: rt_unref (tmp_rt = pkt->ro.ro_rt ) ;
392: m_freem(pkt->dn_m);
393: n = pkt ;
394: pkt = (struct dn_pkt *)pkt->dn_next ;
395: FREE(n, M_IPFW) ;
396: }
397: for (pkt = pipe->p.head ; pkt ; ) {
398: rt_unref (tmp_rt = pkt->ro.ro_rt ) ;
399: m_freem(pkt->dn_m);
400: n = pkt ;
401: pkt = (struct dn_pkt *)pkt->dn_next ;
402: FREE(n, M_IPFW) ;
403: }
404: }
405:
406: /*
407: * delete all pipes returning memory
408: */
409: static void
410: dummynet_flush()
411: {
412: struct dn_pipe *q, *p = all_pipes ;
413: int s = splnet() ;
414:
415: all_pipes = NULL ;
416: splx(s) ;
417: /*
418: * purge all queued pkts and delete all pipes
419: */
420: for ( ; p ; ) {
421: purge_pipe(p);
422: q = p ;
423: p = p->next ;
424: FREE(q, M_IPFW);
425: }
426: }
427:
428: extern struct ip_fw_chain *ip_fw_default_rule ;
429: /*
430: * when a firewall rule is deleted, scan all pipes and remove the flow-id
431: * from packets matching this rule.
432: */
433: void
434: dn_rule_delete(void *r)
435: {
436: struct dn_pipe *p ;
437: int matches = 0 ;
438:
439: for ( p = all_pipes ; p ; p = p->next ) {
440: struct dn_pkt *x ;
441: for (x = p->r.head ; x ; x = (struct dn_pkt *)x->dn_next )
442: if (x->hdr.mh_data == r) {
443: matches++ ;
444: x->hdr.mh_data = (void *)ip_fw_default_rule ;
445: }
446: for (x = p->p.head ; x ; x = (struct dn_pkt *)x->dn_next )
447: if (x->hdr.mh_data == r) {
448: matches++ ;
449: x->hdr.mh_data = (void *)ip_fw_default_rule ;
450: }
451: }
452: printf("dn_rule_delete, r %p, default %p%s, %d matches\n",
453: (void *)r, (void *)ip_fw_default_rule,
454: r == ip_fw_default_rule ? " AARGH!":"", matches);
455: }
456:
457: /*
458: * handler for the various dummynet socket options
459: * (get, flush, config, del)
460: */
461: static int
462: ip_dn_ctl(struct sockopt *sopt)
463: {
464: int error = 0 ;
465: size_t size ;
466: char *buf, *bp ;
467: struct dn_pipe *p, tmp_pipe ;
468:
469: struct dn_pipe *x, *a, *b ;
470:
471: /* Disallow sets in really-really secure mode. */
472: if (sopt->sopt_dir == SOPT_SET && securelevel >= 3)
473: return (EPERM);
474:
475: switch (sopt->sopt_name) {
476: default :
477: panic("ip_dn_ctl -- unknown option");
478:
479: case IP_DUMMYNET_GET :
480: for (p = all_pipes, size = 0 ; p ; p = p->next )
481: size += sizeof( *p ) ;
482: buf = _MALLOC(size, M_TEMP, M_WAITOK);
483: if (buf == 0) {
484: error = ENOBUFS ;
485: break ;
486: }
487: for (p = all_pipes, bp = buf ; p ; p = p->next ) {
488: struct dn_pipe *q = (struct dn_pipe *)bp ;
489:
490: bcopy(p, bp, sizeof( *p ) );
491: /*
492: * return bw and delay in bits/s and ms, respectively
493: */
494: q->bandwidth *= (8*hz) ;
495: q->delay = (q->delay * 1000) / hz ;
496: bp += sizeof( *p ) ;
497: }
498: error = sooptcopyout(sopt, buf, size);
499: FREE(buf, M_TEMP);
500: break ;
501: case IP_DUMMYNET_FLUSH :
502: dummynet_flush() ;
503: break ;
504: case IP_DUMMYNET_CONFIGURE :
505: p = &tmp_pipe ;
506: error = sooptcopyin(sopt, p, sizeof *p, sizeof *p);
507: if (error)
508: break ;
509: /*
510: * The config program passes parameters as follows:
511: * bandwidth = bits/second (0 = no limits);
512: * must be translated in bytes/tick.
513: * delay = ms
514: * must be translated in ticks.
515: * queue_size = slots (0 = no limit)
516: * queue_size_bytes = bytes (0 = no limit)
517: * only one can be set, must be bound-checked
518: */
519: if ( p->bandwidth > 0 ) {
520: p->bandwidth = p->bandwidth / 8 / hz ;
521: if (p->bandwidth == 0) /* too little does not make sense! */
522: p->bandwidth = 10 ;
523: }
524: p->delay = ( p->delay * hz ) / 1000 ;
525: if (p->queue_size == 0 && p->queue_size_bytes == 0)
526: p->queue_size = 100 ;
527: if (p->queue_size != 0 ) /* buffers are prevailing */
528: p->queue_size_bytes = 0 ;
529: if (p->queue_size > 100)
530: p->queue_size = 100 ;
531: if (p->queue_size_bytes > 1024*1024)
532: p->queue_size_bytes = 1024*1024 ;
533: #if 0
534: printf("ip_dn: config pipe %d %d bit/s %d ms %d bufs\n",
535: p->pipe_nr,
536: p->bandwidth * 8 * hz ,
537: p->delay * 1000 / hz , p->queue_size);
538: #endif
539: for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ;
540: a = b , b = b->next) ;
541: if (b && b->pipe_nr == p->pipe_nr) {
542: /* XXX should spl and flush old pipe... */
543: b->bandwidth = p->bandwidth ;
544: b->delay = p->delay ;
545: b->ticks_from_last_insert = p->delay ;
546: b->queue_size = p->queue_size ;
547: b->queue_size_bytes = p->queue_size_bytes ;
548: b->plr = p->plr ;
549: } else {
550: int s ;
551: x = _MALLOC(sizeof(struct dn_pipe), M_IPFW, M_DONTWAIT) ;
552: if (x == NULL) {
553: printf("ip_dummynet.c: sorry no memory\n");
554: error = ENOSPC ;
555: break ;
556: }
557: bzero(x, sizeof(*x) );
558: x->bandwidth = p->bandwidth ;
559: x->delay = p->delay ;
560: x->ticks_from_last_insert = p->delay ;
561: x->pipe_nr = p->pipe_nr ;
562: x->queue_size = p->queue_size ;
563: x->queue_size_bytes = p->queue_size_bytes ;
564: x->plr = p->plr ;
565:
566: s = splnet() ;
567: x->next = b ;
568: if (a == NULL)
569: all_pipes = x ;
570: else
571: a->next = x ;
572: splx(s);
573: }
574: break ;
575:
576: case IP_DUMMYNET_DEL :
577: p = &tmp_pipe ;
578: error = sooptcopyin(sopt, p, sizeof *p, sizeof *p);
579: if (error)
580: break ;
581:
582: for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ;
583: a = b , b = b->next) ;
584: if (b && b->pipe_nr == p->pipe_nr) { /* found pipe */
585: int s = splnet() ;
586: struct ip_fw_chain *chain = ip_fw_chain.lh_first;
587:
588: if (a == NULL)
589: all_pipes = b->next ;
590: else
591: a->next = b->next ;
592: /*
593: * remove references to this pipe from the ip_fw rules.
594: */
595: for (; chain; chain = chain->chain.le_next) {
596: register struct ip_fw *const f = chain->rule;
597: if (f->pipe_ptr == b)
598: f->pipe_ptr = NULL ;
599: }
600: splx(s);
601: purge_pipe(b); /* remove pkts from here */
602: FREE(b, M_IPFW);
603: }
604: break ;
605: }
606: return error ;
607: }
608:
609: void
610: ip_dn_init(void)
611: {
612: printf("DUMMYNET initialized (980901) -- size dn_pkt %d\n",
613: sizeof(struct dn_pkt));
614: all_pipes = NULL ;
615: ip_dn_ctl_ptr = ip_dn_ctl;
616: }
617:
618: #if DUMMYNET_MODULE
619:
620: #include <sys/exec.h>
621: #include <sys/sysent.h>
622: #include <sys/lkm.h>
623:
624: MOD_MISC(dummynet);
625:
626: static ip_dn_ctl_t *old_dn_ctl_ptr ;
627:
628: static int
629: dummynet_load(struct lkm_table *lkmtp, int cmd)
630: {
631: int s=splnet();
632: old_dn_ctl_ptr = ip_dn_ctl_ptr;
633: ip_dn_init();
634: splx(s);
635: return 0;
636: }
637:
638: static int
639: dummynet_unload(struct lkm_table *lkmtp, int cmd)
640: {
641: int s=splnet();
642: ip_dn_ctl_ptr = old_dn_ctl_ptr;
643: splx(s);
644: dummynet_flush();
645: printf("DUMMYNET unloaded\n");
646: return 0;
647: }
648:
649: int
650: dummynet_mod(struct lkm_table *lkmtp, int cmd, int ver)
651: {
652: DISPATCH(lkmtp, cmd, ver, dummynet_load, dummynet_unload, lkm_nullcmd);
653: }
654: #endif
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.