1 | /* |
2 | * Copyright (c) 2000-2017 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* |
29 | * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa |
30 | * Portions Copyright (c) 2000 Akamba Corp. |
31 | * All rights reserved |
32 | * |
33 | * Redistribution and use in source and binary forms, with or without |
34 | * modification, are permitted provided that the following conditions |
35 | * are met: |
36 | * 1. Redistributions of source code must retain the above copyright |
37 | * notice, this list of conditions and the following disclaimer. |
38 | * 2. Redistributions in binary form must reproduce the above copyright |
39 | * notice, this list of conditions and the following disclaimer in the |
40 | * documentation and/or other materials provided with the distribution. |
41 | * |
42 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
43 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
44 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
45 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
46 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
47 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
48 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
49 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
50 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
51 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
52 | * SUCH DAMAGE. |
53 | * |
54 | * $FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.84 2004/08/25 09:31:30 pjd Exp $ |
55 | */ |
56 | |
57 | #define DUMMYNET_DEBUG |
58 | |
59 | /* |
60 | * This module implements IP dummynet, a bandwidth limiter/delay emulator |
61 | * used in conjunction with the ipfw package. |
62 | * Description of the data structures used is in ip_dummynet.h |
63 | * Here you mainly find the following blocks of code: |
64 | * + variable declarations; |
65 | * + heap management functions; |
66 | * + scheduler and dummynet functions; |
67 | * + configuration and initialization. |
68 | * |
69 | * NOTA BENE: critical sections are protected by the "dummynet lock". |
70 | * |
71 | * Most important Changes: |
72 | * |
73 | * 010124: Fixed WF2Q behaviour |
74 | * 010122: Fixed spl protection. |
75 | * 000601: WF2Q support |
76 | * 000106: large rewrite, use heaps to handle very many pipes. |
77 | * 980513: initial release |
78 | * |
79 | * include files marked with XXX are probably not needed |
80 | */ |
81 | |
82 | #include <sys/param.h> |
83 | #include <sys/systm.h> |
84 | #include <sys/malloc.h> |
85 | #include <sys/mbuf.h> |
86 | #include <sys/queue.h> /* XXX */ |
87 | #include <sys/kernel.h> |
88 | #include <sys/random.h> |
89 | #include <sys/socket.h> |
90 | #include <sys/socketvar.h> |
91 | #include <sys/time.h> |
92 | #include <sys/sysctl.h> |
93 | #include <net/if.h> |
94 | #include <net/route.h> |
95 | #include <net/kpi_protocol.h> |
96 | #if DUMMYNET |
97 | #include <net/kpi_protocol.h> |
98 | #endif /* DUMMYNET */ |
99 | #include <net/nwk_wq.h> |
100 | #include <net/pfvar.h> |
101 | #include <netinet/in.h> |
102 | #include <netinet/in_systm.h> |
103 | #include <netinet/in_var.h> |
104 | #include <netinet/ip.h> |
105 | #include <netinet/ip_fw.h> |
106 | #include <netinet/ip_dummynet.h> |
107 | #include <netinet/ip_var.h> |
108 | |
109 | #include <netinet/ip6.h> /* for ip6_input, ip6_output prototypes */ |
110 | #include <netinet6/ip6_var.h> |
111 | |
112 | static struct ip_fw default_rule; |
113 | |
114 | /* |
115 | * We keep a private variable for the simulation time, but we could |
116 | * probably use an existing one ("softticks" in sys/kern/kern_timer.c) |
117 | */ |
118 | static dn_key curr_time = 0 ; /* current simulation time */ |
119 | |
120 | /* this is for the timer that fires to call dummynet() - we only enable the timer when |
121 | there are packets to process, otherwise it's disabled */ |
122 | static int timer_enabled = 0; |
123 | |
124 | static int dn_hash_size = 64 ; /* default hash size */ |
125 | |
126 | /* statistics on number of queue searches and search steps */ |
127 | static int searches, search_steps ; |
128 | static int pipe_expire = 1 ; /* expire queue if empty */ |
129 | static int dn_max_ratio = 16 ; /* max queues/buckets ratio */ |
130 | |
131 | static int red_lookup_depth = 256; /* RED - default lookup table depth */ |
132 | static int red_avg_pkt_size = 512; /* RED - default medium packet size */ |
133 | static int red_max_pkt_size = 1500; /* RED - default max packet size */ |
134 | |
135 | static int serialize = 0; |
136 | |
137 | /* |
138 | * Three heaps contain queues and pipes that the scheduler handles: |
139 | * |
140 | * ready_heap contains all dn_flow_queue related to fixed-rate pipes. |
141 | * |
142 | * wfq_ready_heap contains the pipes associated with WF2Q flows |
143 | * |
144 | * extract_heap contains pipes associated with delay lines. |
145 | * |
146 | */ |
147 | static struct dn_heap ready_heap, , wfq_ready_heap ; |
148 | |
149 | static int heap_init(struct dn_heap *h, int size) ; |
150 | static int heap_insert (struct dn_heap *h, dn_key key1, void *p); |
151 | static void heap_extract(struct dn_heap *h, void *obj); |
152 | |
153 | |
154 | static void transmit_event(struct dn_pipe *pipe, struct mbuf **head, |
155 | struct mbuf **tail); |
156 | static void ready_event(struct dn_flow_queue *q, struct mbuf **head, |
157 | struct mbuf **tail); |
158 | static void ready_event_wfq(struct dn_pipe *p, struct mbuf **head, |
159 | struct mbuf **tail); |
160 | |
161 | /* |
162 | * Packets are retrieved from queues in Dummynet in chains instead of |
163 | * packet-by-packet. The entire list of packets is first dequeued and |
164 | * sent out by the following function. |
165 | */ |
166 | static void dummynet_send(struct mbuf *m); |
167 | |
168 | #define HASHSIZE 16 |
169 | #define HASH(num) ((((num) >> 8) ^ ((num) >> 4) ^ (num)) & 0x0f) |
170 | static struct dn_pipe_head pipehash[HASHSIZE]; /* all pipes */ |
171 | static struct dn_flow_set_head flowsethash[HASHSIZE]; /* all flowsets */ |
172 | |
173 | #ifdef SYSCTL_NODE |
174 | SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, |
175 | CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Dummynet" ); |
176 | SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size, |
177 | CTLFLAG_RW | CTLFLAG_LOCKED, &dn_hash_size, 0, "Default hash table size" ); |
178 | SYSCTL_QUAD(_net_inet_ip_dummynet, OID_AUTO, curr_time, |
179 | CTLFLAG_RD | CTLFLAG_LOCKED, &curr_time, "Current tick" ); |
180 | SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap, |
181 | CTLFLAG_RD | CTLFLAG_LOCKED, &ready_heap.size, 0, "Size of ready heap" ); |
182 | SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap, |
183 | CTLFLAG_RD | CTLFLAG_LOCKED, &extract_heap.size, 0, "Size of extract heap" ); |
184 | SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, searches, |
185 | CTLFLAG_RD | CTLFLAG_LOCKED, &searches, 0, "Number of queue searches" ); |
186 | SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, search_steps, |
187 | CTLFLAG_RD | CTLFLAG_LOCKED, &search_steps, 0, "Number of queue search steps" ); |
188 | SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire, |
189 | CTLFLAG_RW | CTLFLAG_LOCKED, &pipe_expire, 0, "Expire queue if empty" ); |
190 | SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, max_chain_len, |
191 | CTLFLAG_RW | CTLFLAG_LOCKED, &dn_max_ratio, 0, |
192 | "Max ratio between dynamic queues and buckets" ); |
193 | SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth, |
194 | CTLFLAG_RD | CTLFLAG_LOCKED, &red_lookup_depth, 0, "Depth of RED lookup table" ); |
195 | SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size, |
196 | CTLFLAG_RD | CTLFLAG_LOCKED, &red_avg_pkt_size, 0, "RED Medium packet size" ); |
197 | SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size, |
198 | CTLFLAG_RD | CTLFLAG_LOCKED, &red_max_pkt_size, 0, "RED Max packet size" ); |
199 | #endif |
200 | |
201 | #ifdef DUMMYNET_DEBUG |
202 | int dummynet_debug = 0; |
203 | #ifdef SYSCTL_NODE |
204 | SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED, &dummynet_debug, |
205 | 0, "control debugging printfs" ); |
206 | #endif |
207 | #define DPRINTF(X) if (dummynet_debug) printf X |
208 | #else |
209 | #define DPRINTF(X) |
210 | #endif |
211 | |
212 | /* dummynet lock */ |
213 | static lck_grp_t *dn_mutex_grp; |
214 | static lck_grp_attr_t *dn_mutex_grp_attr; |
215 | static lck_attr_t *dn_mutex_attr; |
216 | decl_lck_mtx_data(static, dn_mutex_data); |
217 | static lck_mtx_t *dn_mutex = &dn_mutex_data; |
218 | |
219 | static int config_pipe(struct dn_pipe *p); |
220 | static int ip_dn_ctl(struct sockopt *sopt); |
221 | |
222 | static void dummynet(void *); |
223 | static void dummynet_flush(void); |
224 | void dummynet_drain(void); |
225 | static ip_dn_io_t dummynet_io; |
226 | |
227 | static void cp_flow_set_to_64_user(struct dn_flow_set *set, struct dn_flow_set_64 *fs_bp); |
228 | static void cp_queue_to_64_user( struct dn_flow_queue *q, struct dn_flow_queue_64 *qp); |
229 | static char *cp_pipe_to_64_user(struct dn_pipe *p, struct dn_pipe_64 *pipe_bp); |
230 | static char* dn_copy_set_64(struct dn_flow_set *set, char *bp); |
231 | static int cp_pipe_from_user_64( struct sockopt *sopt, struct dn_pipe *p ); |
232 | |
233 | static void cp_flow_set_to_32_user(struct dn_flow_set *set, struct dn_flow_set_32 *fs_bp); |
234 | static void cp_queue_to_32_user( struct dn_flow_queue *q, struct dn_flow_queue_32 *qp); |
235 | static char *cp_pipe_to_32_user(struct dn_pipe *p, struct dn_pipe_32 *pipe_bp); |
236 | static char* dn_copy_set_32(struct dn_flow_set *set, char *bp); |
237 | static int cp_pipe_from_user_32( struct sockopt *sopt, struct dn_pipe *p ); |
238 | |
239 | struct eventhandler_lists_ctxt dummynet_evhdlr_ctxt; |
240 | |
241 | uint32_t my_random(void) |
242 | { |
243 | uint32_t val; |
244 | read_frandom(&val, sizeof(val)); |
245 | val &= 0x7FFFFFFF; |
246 | |
247 | return (val); |
248 | } |
249 | |
250 | /* |
251 | * Heap management functions. |
252 | * |
253 | * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2. |
254 | * Some macros help finding parent/children so we can optimize them. |
255 | * |
256 | * heap_init() is called to expand the heap when needed. |
257 | * Increment size in blocks of 16 entries. |
258 | * XXX failure to allocate a new element is a pretty bad failure |
259 | * as we basically stall a whole queue forever!! |
260 | * Returns 1 on error, 0 on success |
261 | */ |
262 | #define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 ) |
263 | #define HEAP_LEFT(x) ( 2*(x) + 1 ) |
264 | #define HEAP_IS_LEFT(x) ( (x) & 1 ) |
265 | #define HEAP_RIGHT(x) ( 2*(x) + 2 ) |
266 | #define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; } |
267 | #define HEAP_INCREMENT 15 |
268 | |
269 | |
270 | int cp_pipe_from_user_32( struct sockopt *sopt, struct dn_pipe *p ) |
271 | { |
272 | struct dn_pipe_32 user_pipe_32; |
273 | int error=0; |
274 | |
275 | error = sooptcopyin(sopt, &user_pipe_32, sizeof(struct dn_pipe_32), sizeof(struct dn_pipe_32)); |
276 | if ( !error ){ |
277 | p->pipe_nr = user_pipe_32.pipe_nr; |
278 | p->bandwidth = user_pipe_32.bandwidth; |
279 | p->delay = user_pipe_32.delay; |
280 | p->V = user_pipe_32.V; |
281 | p->sum = user_pipe_32.sum; |
282 | p->numbytes = user_pipe_32.numbytes; |
283 | p->sched_time = user_pipe_32.sched_time; |
284 | bcopy( user_pipe_32.if_name, p->if_name, IFNAMSIZ); |
285 | p->ready = user_pipe_32.ready; |
286 | |
287 | p->fs.fs_nr = user_pipe_32.fs.fs_nr; |
288 | p->fs.flags_fs = user_pipe_32.fs.flags_fs; |
289 | p->fs.parent_nr = user_pipe_32.fs.parent_nr; |
290 | p->fs.weight = user_pipe_32.fs.weight; |
291 | p->fs.qsize = user_pipe_32.fs.qsize; |
292 | p->fs.plr = user_pipe_32.fs.plr; |
293 | p->fs.flow_mask = user_pipe_32.fs.flow_mask; |
294 | p->fs.rq_size = user_pipe_32.fs.rq_size; |
295 | p->fs.rq_elements = user_pipe_32.fs.rq_elements; |
296 | p->fs.last_expired = user_pipe_32.fs.last_expired; |
297 | p->fs.backlogged = user_pipe_32.fs.backlogged; |
298 | p->fs.w_q = user_pipe_32.fs.w_q; |
299 | p->fs.max_th = user_pipe_32.fs.max_th; |
300 | p->fs.min_th = user_pipe_32.fs.min_th; |
301 | p->fs.max_p = user_pipe_32.fs.max_p; |
302 | p->fs.c_1 = user_pipe_32.fs.c_1; |
303 | p->fs.c_2 = user_pipe_32.fs.c_2; |
304 | p->fs.c_3 = user_pipe_32.fs.c_3; |
305 | p->fs.c_4 = user_pipe_32.fs.c_4; |
306 | p->fs.lookup_depth = user_pipe_32.fs.lookup_depth; |
307 | p->fs.lookup_step = user_pipe_32.fs.lookup_step; |
308 | p->fs.lookup_weight = user_pipe_32.fs.lookup_weight; |
309 | p->fs.avg_pkt_size = user_pipe_32.fs.avg_pkt_size; |
310 | p->fs.max_pkt_size = user_pipe_32.fs.max_pkt_size; |
311 | } |
312 | return error; |
313 | } |
314 | |
315 | |
316 | int cp_pipe_from_user_64( struct sockopt *sopt, struct dn_pipe *p ) |
317 | { |
318 | struct dn_pipe_64 user_pipe_64; |
319 | int error=0; |
320 | |
321 | error = sooptcopyin(sopt, &user_pipe_64, sizeof(struct dn_pipe_64), sizeof(struct dn_pipe_64)); |
322 | if ( !error ){ |
323 | p->pipe_nr = user_pipe_64.pipe_nr; |
324 | p->bandwidth = user_pipe_64.bandwidth; |
325 | p->delay = user_pipe_64.delay; |
326 | p->V = user_pipe_64.V; |
327 | p->sum = user_pipe_64.sum; |
328 | p->numbytes = user_pipe_64.numbytes; |
329 | p->sched_time = user_pipe_64.sched_time; |
330 | bcopy( user_pipe_64.if_name, p->if_name, IFNAMSIZ); |
331 | p->ready = user_pipe_64.ready; |
332 | |
333 | p->fs.fs_nr = user_pipe_64.fs.fs_nr; |
334 | p->fs.flags_fs = user_pipe_64.fs.flags_fs; |
335 | p->fs.parent_nr = user_pipe_64.fs.parent_nr; |
336 | p->fs.weight = user_pipe_64.fs.weight; |
337 | p->fs.qsize = user_pipe_64.fs.qsize; |
338 | p->fs.plr = user_pipe_64.fs.plr; |
339 | p->fs.flow_mask = user_pipe_64.fs.flow_mask; |
340 | p->fs.rq_size = user_pipe_64.fs.rq_size; |
341 | p->fs.rq_elements = user_pipe_64.fs.rq_elements; |
342 | p->fs.last_expired = user_pipe_64.fs.last_expired; |
343 | p->fs.backlogged = user_pipe_64.fs.backlogged; |
344 | p->fs.w_q = user_pipe_64.fs.w_q; |
345 | p->fs.max_th = user_pipe_64.fs.max_th; |
346 | p->fs.min_th = user_pipe_64.fs.min_th; |
347 | p->fs.max_p = user_pipe_64.fs.max_p; |
348 | p->fs.c_1 = user_pipe_64.fs.c_1; |
349 | p->fs.c_2 = user_pipe_64.fs.c_2; |
350 | p->fs.c_3 = user_pipe_64.fs.c_3; |
351 | p->fs.c_4 = user_pipe_64.fs.c_4; |
352 | p->fs.lookup_depth = user_pipe_64.fs.lookup_depth; |
353 | p->fs.lookup_step = user_pipe_64.fs.lookup_step; |
354 | p->fs.lookup_weight = user_pipe_64.fs.lookup_weight; |
355 | p->fs.avg_pkt_size = user_pipe_64.fs.avg_pkt_size; |
356 | p->fs.max_pkt_size = user_pipe_64.fs.max_pkt_size; |
357 | } |
358 | return error; |
359 | } |
360 | |
361 | static void |
362 | cp_flow_set_to_32_user(struct dn_flow_set *set, struct dn_flow_set_32 *fs_bp) |
363 | { |
364 | fs_bp->fs_nr = set->fs_nr; |
365 | fs_bp->flags_fs = set->flags_fs ; |
366 | fs_bp->parent_nr = set->parent_nr ; |
367 | fs_bp->weight = set->weight ; |
368 | fs_bp->qsize = set->qsize ; |
369 | fs_bp->plr = set->plr ; |
370 | fs_bp->flow_mask = set->flow_mask ; |
371 | fs_bp->rq_size = set->rq_size ; |
372 | fs_bp->rq_elements = set->rq_elements ; |
373 | fs_bp->last_expired = set->last_expired ; |
374 | fs_bp->backlogged = set->backlogged ; |
375 | fs_bp->w_q = set->w_q ; |
376 | fs_bp->max_th = set->max_th ; |
377 | fs_bp->min_th = set->min_th ; |
378 | fs_bp->max_p = set->max_p ; |
379 | fs_bp->c_1 = set->c_1 ; |
380 | fs_bp->c_2 = set->c_2 ; |
381 | fs_bp->c_3 = set->c_3 ; |
382 | fs_bp->c_4 = set->c_4 ; |
383 | fs_bp->w_q_lookup = CAST_DOWN_EXPLICIT(user32_addr_t, set->w_q_lookup) ; |
384 | fs_bp->lookup_depth = set->lookup_depth ; |
385 | fs_bp->lookup_step = set->lookup_step ; |
386 | fs_bp->lookup_weight = set->lookup_weight ; |
387 | fs_bp->avg_pkt_size = set->avg_pkt_size ; |
388 | fs_bp->max_pkt_size = set->max_pkt_size ; |
389 | } |
390 | |
391 | static void |
392 | cp_flow_set_to_64_user(struct dn_flow_set *set, struct dn_flow_set_64 *fs_bp) |
393 | { |
394 | fs_bp->fs_nr = set->fs_nr; |
395 | fs_bp->flags_fs = set->flags_fs ; |
396 | fs_bp->parent_nr = set->parent_nr ; |
397 | fs_bp->weight = set->weight ; |
398 | fs_bp->qsize = set->qsize ; |
399 | fs_bp->plr = set->plr ; |
400 | fs_bp->flow_mask = set->flow_mask ; |
401 | fs_bp->rq_size = set->rq_size ; |
402 | fs_bp->rq_elements = set->rq_elements ; |
403 | fs_bp->last_expired = set->last_expired ; |
404 | fs_bp->backlogged = set->backlogged ; |
405 | fs_bp->w_q = set->w_q ; |
406 | fs_bp->max_th = set->max_th ; |
407 | fs_bp->min_th = set->min_th ; |
408 | fs_bp->max_p = set->max_p ; |
409 | fs_bp->c_1 = set->c_1 ; |
410 | fs_bp->c_2 = set->c_2 ; |
411 | fs_bp->c_3 = set->c_3 ; |
412 | fs_bp->c_4 = set->c_4 ; |
413 | fs_bp->w_q_lookup = CAST_DOWN(user64_addr_t, set->w_q_lookup) ; |
414 | fs_bp->lookup_depth = set->lookup_depth ; |
415 | fs_bp->lookup_step = set->lookup_step ; |
416 | fs_bp->lookup_weight = set->lookup_weight ; |
417 | fs_bp->avg_pkt_size = set->avg_pkt_size ; |
418 | fs_bp->max_pkt_size = set->max_pkt_size ; |
419 | } |
420 | |
421 | static |
422 | void cp_queue_to_32_user( struct dn_flow_queue *q, struct dn_flow_queue_32 *qp) |
423 | { |
424 | qp->id = q->id; |
425 | qp->len = q->len; |
426 | qp->len_bytes = q->len_bytes; |
427 | qp->numbytes = q->numbytes; |
428 | qp->tot_pkts = q->tot_pkts; |
429 | qp->tot_bytes = q->tot_bytes; |
430 | qp->drops = q->drops; |
431 | qp->hash_slot = q->hash_slot; |
432 | qp->avg = q->avg; |
433 | qp->count = q->count; |
434 | qp->random = q->random; |
435 | qp->q_time = q->q_time; |
436 | qp->heap_pos = q->heap_pos; |
437 | qp->sched_time = q->sched_time; |
438 | qp->S = q->S; |
439 | qp->F = q->F; |
440 | } |
441 | |
442 | static |
443 | void cp_queue_to_64_user( struct dn_flow_queue *q, struct dn_flow_queue_64 *qp) |
444 | { |
445 | qp->id = q->id; |
446 | qp->len = q->len; |
447 | qp->len_bytes = q->len_bytes; |
448 | qp->numbytes = q->numbytes; |
449 | qp->tot_pkts = q->tot_pkts; |
450 | qp->tot_bytes = q->tot_bytes; |
451 | qp->drops = q->drops; |
452 | qp->hash_slot = q->hash_slot; |
453 | qp->avg = q->avg; |
454 | qp->count = q->count; |
455 | qp->random = q->random; |
456 | qp->q_time = q->q_time; |
457 | qp->heap_pos = q->heap_pos; |
458 | qp->sched_time = q->sched_time; |
459 | qp->S = q->S; |
460 | qp->F = q->F; |
461 | } |
462 | |
463 | static |
464 | char *cp_pipe_to_32_user(struct dn_pipe *p, struct dn_pipe_32 *pipe_bp) |
465 | { |
466 | char *bp; |
467 | |
468 | pipe_bp->pipe_nr = p->pipe_nr; |
469 | pipe_bp->bandwidth = p->bandwidth; |
470 | pipe_bp->delay = p->delay; |
471 | bcopy( &(p->scheduler_heap), &(pipe_bp->scheduler_heap), sizeof(struct dn_heap_32)); |
472 | pipe_bp->scheduler_heap.p = CAST_DOWN_EXPLICIT(user32_addr_t, pipe_bp->scheduler_heap.p); |
473 | bcopy( &(p->not_eligible_heap), &(pipe_bp->not_eligible_heap), sizeof(struct dn_heap_32)); |
474 | pipe_bp->not_eligible_heap.p = CAST_DOWN_EXPLICIT(user32_addr_t, pipe_bp->not_eligible_heap.p); |
475 | bcopy( &(p->idle_heap), &(pipe_bp->idle_heap), sizeof(struct dn_heap_32)); |
476 | pipe_bp->idle_heap.p = CAST_DOWN_EXPLICIT(user32_addr_t, pipe_bp->idle_heap.p); |
477 | pipe_bp->V = p->V; |
478 | pipe_bp->sum = p->sum; |
479 | pipe_bp->numbytes = p->numbytes; |
480 | pipe_bp->sched_time = p->sched_time; |
481 | bcopy( p->if_name, pipe_bp->if_name, IFNAMSIZ); |
482 | pipe_bp->ifp = CAST_DOWN_EXPLICIT(user32_addr_t, p->ifp); |
483 | pipe_bp->ready = p->ready; |
484 | |
485 | cp_flow_set_to_32_user( &(p->fs), &(pipe_bp->fs)); |
486 | |
487 | pipe_bp->delay = (pipe_bp->delay * 1000) / (hz*10) ; |
488 | /* |
489 | * XXX the following is a hack based on ->next being the |
490 | * first field in dn_pipe and dn_flow_set. The correct |
491 | * solution would be to move the dn_flow_set to the beginning |
492 | * of struct dn_pipe. |
493 | */ |
494 | pipe_bp->next = CAST_DOWN_EXPLICIT( user32_addr_t, DN_IS_PIPE ); |
495 | /* clean pointers */ |
496 | pipe_bp->head = pipe_bp->tail = (user32_addr_t) 0 ; |
497 | pipe_bp->fs.next = (user32_addr_t)0 ; |
498 | pipe_bp->fs.pipe = (user32_addr_t)0 ; |
499 | pipe_bp->fs.rq = (user32_addr_t)0 ; |
500 | bp = ((char *)pipe_bp) + sizeof(struct dn_pipe_32); |
501 | return( dn_copy_set_32( &(p->fs), bp) ); |
502 | } |
503 | |
504 | static |
505 | char *cp_pipe_to_64_user(struct dn_pipe *p, struct dn_pipe_64 *pipe_bp) |
506 | { |
507 | char *bp; |
508 | |
509 | pipe_bp->pipe_nr = p->pipe_nr; |
510 | pipe_bp->bandwidth = p->bandwidth; |
511 | pipe_bp->delay = p->delay; |
512 | bcopy( &(p->scheduler_heap), &(pipe_bp->scheduler_heap), sizeof(struct dn_heap_64)); |
513 | pipe_bp->scheduler_heap.p = CAST_DOWN(user64_addr_t, pipe_bp->scheduler_heap.p); |
514 | bcopy( &(p->not_eligible_heap), &(pipe_bp->not_eligible_heap), sizeof(struct dn_heap_64)); |
515 | pipe_bp->not_eligible_heap.p = CAST_DOWN(user64_addr_t, pipe_bp->not_eligible_heap.p); |
516 | bcopy( &(p->idle_heap), &(pipe_bp->idle_heap), sizeof(struct dn_heap_64)); |
517 | pipe_bp->idle_heap.p = CAST_DOWN(user64_addr_t, pipe_bp->idle_heap.p); |
518 | pipe_bp->V = p->V; |
519 | pipe_bp->sum = p->sum; |
520 | pipe_bp->numbytes = p->numbytes; |
521 | pipe_bp->sched_time = p->sched_time; |
522 | bcopy( p->if_name, pipe_bp->if_name, IFNAMSIZ); |
523 | pipe_bp->ifp = CAST_DOWN(user64_addr_t, p->ifp); |
524 | pipe_bp->ready = p->ready; |
525 | |
526 | cp_flow_set_to_64_user( &(p->fs), &(pipe_bp->fs)); |
527 | |
528 | pipe_bp->delay = (pipe_bp->delay * 1000) / (hz*10) ; |
529 | /* |
530 | * XXX the following is a hack based on ->next being the |
531 | * first field in dn_pipe and dn_flow_set. The correct |
532 | * solution would be to move the dn_flow_set to the beginning |
533 | * of struct dn_pipe. |
534 | */ |
535 | pipe_bp->next = CAST_DOWN( user64_addr_t, DN_IS_PIPE ); |
536 | /* clean pointers */ |
537 | pipe_bp->head = pipe_bp->tail = USER_ADDR_NULL ; |
538 | pipe_bp->fs.next = USER_ADDR_NULL ; |
539 | pipe_bp->fs.pipe = USER_ADDR_NULL ; |
540 | pipe_bp->fs.rq = USER_ADDR_NULL ; |
541 | bp = ((char *)pipe_bp) + sizeof(struct dn_pipe_64); |
542 | return( dn_copy_set_64( &(p->fs), bp) ); |
543 | } |
544 | |
545 | static int |
546 | heap_init(struct dn_heap *h, int new_size) |
547 | { |
548 | struct dn_heap_entry *p; |
549 | |
550 | if (h->size >= new_size ) { |
551 | printf("dummynet: heap_init, Bogus call, have %d want %d\n" , |
552 | h->size, new_size); |
553 | return 0 ; |
554 | } |
555 | new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT ; |
556 | p = _MALLOC(new_size * sizeof(*p), M_DUMMYNET, M_DONTWAIT ); |
557 | if (p == NULL) { |
558 | printf("dummynet: heap_init, resize %d failed\n" , new_size ); |
559 | return 1 ; /* error */ |
560 | } |
561 | if (h->size > 0) { |
562 | bcopy(h->p, p, h->size * sizeof(*p) ); |
563 | FREE(h->p, M_DUMMYNET); |
564 | } |
565 | h->p = p ; |
566 | h->size = new_size ; |
567 | return 0 ; |
568 | } |
569 | |
570 | /* |
571 | * Insert element in heap. Normally, p != NULL, we insert p in |
572 | * a new position and bubble up. If p == NULL, then the element is |
573 | * already in place, and key is the position where to start the |
574 | * bubble-up. |
575 | * Returns 1 on failure (cannot allocate new heap entry) |
576 | * |
577 | * If offset > 0 the position (index, int) of the element in the heap is |
578 | * also stored in the element itself at the given offset in bytes. |
579 | */ |
580 | #define SET_OFFSET(heap, node) \ |
581 | if (heap->offset > 0) \ |
582 | *((int *)((char *)(heap->p[node].object) + heap->offset)) = node ; |
583 | /* |
584 | * RESET_OFFSET is used for sanity checks. It sets offset to an invalid value. |
585 | */ |
586 | #define RESET_OFFSET(heap, node) \ |
587 | if (heap->offset > 0) \ |
588 | *((int *)((char *)(heap->p[node].object) + heap->offset)) = -1 ; |
589 | static int |
590 | heap_insert(struct dn_heap *h, dn_key key1, void *p) |
591 | { |
592 | int son = h->elements ; |
593 | |
594 | if (p == NULL) /* data already there, set starting point */ |
595 | son = key1 ; |
596 | else { /* insert new element at the end, possibly resize */ |
597 | son = h->elements ; |
598 | if (son == h->size) /* need resize... */ |
599 | if (heap_init(h, h->elements+1) ) |
600 | return 1 ; /* failure... */ |
601 | h->p[son].object = p ; |
602 | h->p[son].key = key1 ; |
603 | h->elements++ ; |
604 | } |
605 | while (son > 0) { /* bubble up */ |
606 | int father = HEAP_FATHER(son) ; |
607 | struct dn_heap_entry tmp ; |
608 | |
609 | if (DN_KEY_LT( h->p[father].key, h->p[son].key ) ) |
610 | break ; /* found right position */ |
611 | /* son smaller than father, swap and repeat */ |
612 | HEAP_SWAP(h->p[son], h->p[father], tmp) ; |
613 | SET_OFFSET(h, son); |
614 | son = father ; |
615 | } |
616 | SET_OFFSET(h, son); |
617 | return 0 ; |
618 | } |
619 | |
620 | /* |
621 | * remove top element from heap, or obj if obj != NULL |
622 | */ |
623 | static void |
624 | (struct dn_heap *h, void *obj) |
625 | { |
626 | int child, father, maxelt = h->elements - 1 ; |
627 | |
628 | if (maxelt < 0) { |
629 | printf("dummynet: warning, extract from empty heap 0x%llx\n" , |
630 | (uint64_t)VM_KERNEL_ADDRPERM(h)); |
631 | return ; |
632 | } |
633 | father = 0 ; /* default: move up smallest child */ |
634 | if (obj != NULL) { /* extract specific element, index is at offset */ |
635 | if (h->offset <= 0) |
636 | panic("dummynet: heap_extract from middle not supported on this heap!!!\n" ); |
637 | father = *((int *)((char *)obj + h->offset)) ; |
638 | if (father < 0 || father >= h->elements) { |
639 | printf("dummynet: heap_extract, father %d out of bound 0..%d\n" , |
640 | father, h->elements); |
641 | panic("dummynet: heap_extract" ); |
642 | } |
643 | } |
644 | RESET_OFFSET(h, father); |
645 | child = HEAP_LEFT(father) ; /* left child */ |
646 | while (child <= maxelt) { /* valid entry */ |
647 | if (child != maxelt && DN_KEY_LT(h->p[child+1].key, h->p[child].key) ) |
648 | child = child+1 ; /* take right child, otherwise left */ |
649 | h->p[father] = h->p[child] ; |
650 | SET_OFFSET(h, father); |
651 | father = child ; |
652 | child = HEAP_LEFT(child) ; /* left child for next loop */ |
653 | } |
654 | h->elements-- ; |
655 | if (father != maxelt) { |
656 | /* |
657 | * Fill hole with last entry and bubble up, reusing the insert code |
658 | */ |
659 | h->p[father] = h->p[maxelt] ; |
660 | heap_insert(h, father, NULL); /* this one cannot fail */ |
661 | } |
662 | } |
663 | |
664 | /* |
665 | * heapify() will reorganize data inside an array to maintain the |
666 | * heap property. It is needed when we delete a bunch of entries. |
667 | */ |
668 | static void |
669 | heapify(struct dn_heap *h) |
670 | { |
671 | int i ; |
672 | |
673 | for (i = 0 ; i < h->elements ; i++ ) |
674 | heap_insert(h, i , NULL) ; |
675 | } |
676 | |
677 | /* |
678 | * cleanup the heap and free data structure |
679 | */ |
680 | static void |
681 | heap_free(struct dn_heap *h) |
682 | { |
683 | if (h->size >0 ) |
684 | FREE(h->p, M_DUMMYNET); |
685 | bzero(h, sizeof(*h)); |
686 | } |
687 | |
688 | /* |
689 | * --- end of heap management functions --- |
690 | */ |
691 | |
692 | /* |
693 | * Return the mbuf tag holding the dummynet state. As an optimization |
694 | * this is assumed to be the first tag on the list. If this turns out |
695 | * wrong we'll need to search the list. |
696 | */ |
697 | static struct dn_pkt_tag * |
698 | dn_tag_get(struct mbuf *m) |
699 | { |
700 | struct m_tag *mtag = m_tag_first(m); |
701 | |
702 | if (!(mtag != NULL && |
703 | mtag->m_tag_id == KERNEL_MODULE_TAG_ID && |
704 | mtag->m_tag_type == KERNEL_TAG_TYPE_DUMMYNET)) |
705 | panic("packet on dummynet queue w/o dummynet tag: 0x%llx" , |
706 | (uint64_t)VM_KERNEL_ADDRPERM(m)); |
707 | |
708 | return (struct dn_pkt_tag *)(mtag+1); |
709 | } |
710 | |
711 | /* |
712 | * Scheduler functions: |
713 | * |
714 | * transmit_event() is called when the delay-line needs to enter |
715 | * the scheduler, either because of existing pkts getting ready, |
716 | * or new packets entering the queue. The event handled is the delivery |
717 | * time of the packet. |
718 | * |
719 | * ready_event() does something similar with fixed-rate queues, and the |
720 | * event handled is the finish time of the head pkt. |
721 | * |
722 | * wfq_ready_event() does something similar with WF2Q queues, and the |
723 | * event handled is the start time of the head pkt. |
724 | * |
725 | * In all cases, we make sure that the data structures are consistent |
726 | * before passing pkts out, because this might trigger recursive |
727 | * invocations of the procedures. |
728 | */ |
729 | static void |
730 | transmit_event(struct dn_pipe *pipe, struct mbuf **head, struct mbuf **tail) |
731 | { |
732 | struct mbuf *m ; |
733 | struct dn_pkt_tag *pkt = NULL; |
734 | u_int64_t schedule_time; |
735 | |
736 | LCK_MTX_ASSERT(dn_mutex, LCK_MTX_ASSERT_OWNED); |
737 | ASSERT(serialize >= 0); |
738 | if (serialize == 0) { |
739 | while ((m = pipe->head) != NULL) { |
740 | pkt = dn_tag_get(m); |
741 | if (!DN_KEY_LEQ(pkt->dn_output_time, curr_time)) |
742 | break; |
743 | |
744 | pipe->head = m->m_nextpkt; |
745 | if (*tail != NULL) |
746 | (*tail)->m_nextpkt = m; |
747 | else |
748 | *head = m; |
749 | *tail = m; |
750 | } |
751 | |
752 | if (*tail != NULL) |
753 | (*tail)->m_nextpkt = NULL; |
754 | } |
755 | |
756 | schedule_time = pkt == NULL || DN_KEY_LEQ(pkt->dn_output_time, curr_time) ? |
757 | curr_time + 1 : pkt->dn_output_time; |
758 | |
759 | /* if there are leftover packets, put the pipe into the heap for next ready event */ |
760 | if ((m = pipe->head) != NULL) { |
761 | pkt = dn_tag_get(m); |
762 | /* XXX should check errors on heap_insert, by draining the |
763 | * whole pipe p and hoping in the future we are more successful |
764 | */ |
765 | heap_insert(&extract_heap, schedule_time, pipe); |
766 | } |
767 | } |
768 | |
769 | /* |
770 | * the following macro computes how many ticks we have to wait |
771 | * before being able to transmit a packet. The credit is taken from |
772 | * either a pipe (WF2Q) or a flow_queue (per-flow queueing) |
773 | */ |
774 | |
775 | /* hz is 100, which gives a granularity of 10ms in the old timer. |
776 | * The timer has been changed to fire every 1ms, so the use of |
777 | * hz has been modified here. All instances of hz have been left |
778 | * in place but adjusted by a factor of 10 so that hz is functionally |
779 | * equal to 1000. |
780 | */ |
781 | #define SET_TICKS(_m, q, p) \ |
782 | ((_m)->m_pkthdr.len*8*(hz*10) - (q)->numbytes + p->bandwidth - 1 ) / \ |
783 | p->bandwidth ; |
784 | |
785 | /* |
786 | * extract pkt from queue, compute output time (could be now) |
787 | * and put into delay line (p_queue) |
788 | */ |
789 | static void |
790 | move_pkt(struct mbuf *pkt, struct dn_flow_queue *q, |
791 | struct dn_pipe *p, int len) |
792 | { |
793 | struct dn_pkt_tag *dt = dn_tag_get(pkt); |
794 | |
795 | q->head = pkt->m_nextpkt ; |
796 | q->len-- ; |
797 | q->len_bytes -= len ; |
798 | |
799 | dt->dn_output_time = curr_time + p->delay ; |
800 | |
801 | if (p->head == NULL) |
802 | p->head = pkt; |
803 | else |
804 | p->tail->m_nextpkt = pkt; |
805 | p->tail = pkt; |
806 | p->tail->m_nextpkt = NULL; |
807 | } |
808 | |
809 | /* |
810 | * ready_event() is invoked every time the queue must enter the |
811 | * scheduler, either because the first packet arrives, or because |
812 | * a previously scheduled event fired. |
813 | * On invokation, drain as many pkts as possible (could be 0) and then |
814 | * if there are leftover packets reinsert the pkt in the scheduler. |
815 | */ |
816 | static void |
817 | ready_event(struct dn_flow_queue *q, struct mbuf **head, struct mbuf **tail) |
818 | { |
819 | struct mbuf *pkt; |
820 | struct dn_pipe *p = q->fs->pipe ; |
821 | int p_was_empty ; |
822 | |
823 | LCK_MTX_ASSERT(dn_mutex, LCK_MTX_ASSERT_OWNED); |
824 | |
825 | if (p == NULL) { |
826 | printf("dummynet: ready_event pipe is gone\n" ); |
827 | return ; |
828 | } |
829 | p_was_empty = (p->head == NULL) ; |
830 | |
831 | /* |
832 | * schedule fixed-rate queues linked to this pipe: |
833 | * Account for the bw accumulated since last scheduling, then |
834 | * drain as many pkts as allowed by q->numbytes and move to |
835 | * the delay line (in p) computing output time. |
836 | * bandwidth==0 (no limit) means we can drain the whole queue, |
837 | * setting len_scaled = 0 does the job. |
838 | */ |
839 | q->numbytes += ( curr_time - q->sched_time ) * p->bandwidth; |
840 | while ( (pkt = q->head) != NULL ) { |
841 | int len = pkt->m_pkthdr.len; |
842 | int len_scaled = p->bandwidth ? len*8*(hz*10) : 0 ; |
843 | if (len_scaled > q->numbytes ) |
844 | break ; |
845 | q->numbytes -= len_scaled ; |
846 | move_pkt(pkt, q, p, len); |
847 | } |
848 | /* |
849 | * If we have more packets queued, schedule next ready event |
850 | * (can only occur when bandwidth != 0, otherwise we would have |
851 | * flushed the whole queue in the previous loop). |
852 | * To this purpose we record the current time and compute how many |
853 | * ticks to go for the finish time of the packet. |
854 | */ |
855 | if ( (pkt = q->head) != NULL ) { /* this implies bandwidth != 0 */ |
856 | dn_key t = SET_TICKS(pkt, q, p); /* ticks i have to wait */ |
857 | q->sched_time = curr_time ; |
858 | heap_insert(&ready_heap, curr_time + t, (void *)q ); |
859 | /* XXX should check errors on heap_insert, and drain the whole |
860 | * queue on error hoping next time we are luckier. |
861 | */ |
862 | } else { /* RED needs to know when the queue becomes empty */ |
863 | q->q_time = curr_time; |
864 | q->numbytes = 0; |
865 | } |
866 | /* |
867 | * If the delay line was empty call transmit_event(p) now. |
868 | * Otherwise, the scheduler will take care of it. |
869 | */ |
870 | if (p_was_empty) |
871 | transmit_event(p, head, tail); |
872 | } |
873 | |
874 | /* |
875 | * Called when we can transmit packets on WF2Q queues. Take pkts out of |
876 | * the queues at their start time, and enqueue into the delay line. |
877 | * Packets are drained until p->numbytes < 0. As long as |
878 | * len_scaled >= p->numbytes, the packet goes into the delay line |
879 | * with a deadline p->delay. For the last packet, if p->numbytes<0, |
880 | * there is an additional delay. |
881 | */ |
882 | static void |
883 | ready_event_wfq(struct dn_pipe *p, struct mbuf **head, struct mbuf **tail) |
884 | { |
885 | int p_was_empty = (p->head == NULL) ; |
886 | struct dn_heap *sch = &(p->scheduler_heap); |
887 | struct dn_heap *neh = &(p->not_eligible_heap) ; |
888 | int64_t p_numbytes = p->numbytes; |
889 | |
890 | LCK_MTX_ASSERT(dn_mutex, LCK_MTX_ASSERT_OWNED); |
891 | |
892 | if (p->if_name[0] == 0) /* tx clock is simulated */ |
893 | p_numbytes += ( curr_time - p->sched_time ) * p->bandwidth; |
894 | else { /* tx clock is for real, the ifq must be empty or this is a NOP */ |
895 | if (p->ifp && !IFCQ_IS_EMPTY(&p->ifp->if_snd)) |
896 | return ; |
897 | else { |
898 | DPRINTF(("dummynet: pipe %d ready from %s --\n" , |
899 | p->pipe_nr, p->if_name)); |
900 | } |
901 | } |
902 | |
903 | /* |
904 | * While we have backlogged traffic AND credit, we need to do |
905 | * something on the queue. |
906 | */ |
907 | while ( p_numbytes >=0 && (sch->elements>0 || neh->elements >0) ) { |
908 | if (sch->elements > 0) { /* have some eligible pkts to send out */ |
909 | struct dn_flow_queue *q = sch->p[0].object ; |
910 | struct mbuf *pkt = q->head; |
911 | struct dn_flow_set *fs = q->fs; |
912 | u_int64_t len = pkt->m_pkthdr.len; |
913 | int len_scaled = p->bandwidth ? len*8*(hz*10) : 0 ; |
914 | |
915 | heap_extract(sch, NULL); /* remove queue from heap */ |
916 | p_numbytes -= len_scaled ; |
917 | move_pkt(pkt, q, p, len); |
918 | |
919 | p->V += (len<<MY_M) / p->sum ; /* update V */ |
920 | q->S = q->F ; /* update start time */ |
921 | if (q->len == 0) { /* Flow not backlogged any more */ |
922 | fs->backlogged-- ; |
923 | heap_insert(&(p->idle_heap), q->F, q); |
924 | } else { /* still backlogged */ |
925 | /* |
926 | * update F and position in backlogged queue, then |
927 | * put flow in not_eligible_heap (we will fix this later). |
928 | */ |
929 | len = (q->head)->m_pkthdr.len; |
930 | q->F += (len<<MY_M)/(u_int64_t) fs->weight ; |
931 | if (DN_KEY_LEQ(q->S, p->V)) |
932 | heap_insert(neh, q->S, q); |
933 | else |
934 | heap_insert(sch, q->F, q); |
935 | } |
936 | } |
937 | /* |
938 | * now compute V = max(V, min(S_i)). Remember that all elements in sch |
939 | * have by definition S_i <= V so if sch is not empty, V is surely |
940 | * the max and we must not update it. Conversely, if sch is empty |
941 | * we only need to look at neh. |
942 | */ |
943 | if (sch->elements == 0 && neh->elements > 0) |
944 | p->V = MAX64 ( p->V, neh->p[0].key ); |
945 | /* move from neh to sch any packets that have become eligible */ |
946 | while (neh->elements > 0 && DN_KEY_LEQ(neh->p[0].key, p->V) ) { |
947 | struct dn_flow_queue *q = neh->p[0].object ; |
948 | heap_extract(neh, NULL); |
949 | heap_insert(sch, q->F, q); |
950 | } |
951 | |
952 | if (p->if_name[0] != '\0') {/* tx clock is from a real thing */ |
953 | p_numbytes = -1 ; /* mark not ready for I/O */ |
954 | break ; |
955 | } |
956 | } |
957 | if (sch->elements == 0 && neh->elements == 0 && p_numbytes >= 0 |
958 | && p->idle_heap.elements > 0) { |
959 | /* |
960 | * no traffic and no events scheduled. We can get rid of idle-heap. |
961 | */ |
962 | int i ; |
963 | |
964 | for (i = 0 ; i < p->idle_heap.elements ; i++) { |
965 | struct dn_flow_queue *q = p->idle_heap.p[i].object ; |
966 | |
967 | q->F = 0 ; |
968 | q->S = q->F + 1 ; |
969 | } |
970 | p->sum = 0 ; |
971 | p->V = 0 ; |
972 | p->idle_heap.elements = 0 ; |
973 | } |
974 | /* |
975 | * If we are getting clocks from dummynet (not a real interface) and |
976 | * If we are under credit, schedule the next ready event. |
977 | * Also fix the delivery time of the last packet. |
978 | */ |
979 | if (p->if_name[0]==0 && p_numbytes < 0) { /* this implies bandwidth >0 */ |
980 | dn_key t=0 ; /* number of ticks i have to wait */ |
981 | |
982 | if (p->bandwidth > 0) |
983 | t = ( p->bandwidth -1 - p_numbytes) / p->bandwidth ; |
984 | dn_tag_get(p->tail)->dn_output_time += t ; |
985 | p->sched_time = curr_time ; |
986 | heap_insert(&wfq_ready_heap, curr_time + t, (void *)p); |
987 | /* XXX should check errors on heap_insert, and drain the whole |
988 | * queue on error hoping next time we are luckier. |
989 | */ |
990 | } |
991 | |
992 | /* Fit (adjust if necessary) 64bit result into 32bit variable. */ |
993 | if (p_numbytes > INT_MAX) |
994 | p->numbytes = INT_MAX; |
995 | else if (p_numbytes < INT_MIN) |
996 | p->numbytes = INT_MIN; |
997 | else |
998 | p->numbytes = p_numbytes; |
999 | |
1000 | /* |
1001 | * If the delay line was empty call transmit_event(p) now. |
1002 | * Otherwise, the scheduler will take care of it. |
1003 | */ |
1004 | if (p_was_empty) |
1005 | transmit_event(p, head, tail); |
1006 | |
1007 | } |
1008 | |
1009 | /* |
1010 | * This is called every 1ms. It is used to |
1011 | * increment the current tick counter and schedule expired events. |
1012 | */ |
1013 | static void |
1014 | dummynet(__unused void * unused) |
1015 | { |
1016 | void *p ; /* generic parameter to handler */ |
1017 | struct dn_heap *h ; |
1018 | struct dn_heap *heaps[3]; |
1019 | struct mbuf *head = NULL, *tail = NULL; |
1020 | int i; |
1021 | struct dn_pipe *pe ; |
1022 | struct timespec ts; |
1023 | struct timeval tv; |
1024 | |
1025 | heaps[0] = &ready_heap ; /* fixed-rate queues */ |
1026 | heaps[1] = &wfq_ready_heap ; /* wfq queues */ |
1027 | heaps[2] = &extract_heap ; /* delay line */ |
1028 | |
1029 | lck_mtx_lock(dn_mutex); |
1030 | |
1031 | /* make all time measurements in milliseconds (ms) - |
1032 | * here we convert secs and usecs to msecs (just divide the |
1033 | * usecs and take the closest whole number). |
1034 | */ |
1035 | microuptime(&tv); |
1036 | curr_time = (tv.tv_sec * 1000) + (tv.tv_usec / 1000); |
1037 | |
1038 | for (i=0; i < 3 ; i++) { |
1039 | h = heaps[i]; |
1040 | while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time) ) { |
1041 | if (h->p[0].key > curr_time) |
1042 | printf("dummynet: warning, heap %d is %d ticks late\n" , |
1043 | i, (int)(curr_time - h->p[0].key)); |
1044 | p = h->p[0].object ; /* store a copy before heap_extract */ |
1045 | heap_extract(h, NULL); /* need to extract before processing */ |
1046 | if (i == 0) |
1047 | ready_event(p, &head, &tail) ; |
1048 | else if (i == 1) { |
1049 | struct dn_pipe *pipe = p; |
1050 | if (pipe->if_name[0] != '\0') |
1051 | printf("dummynet: bad ready_event_wfq for pipe %s\n" , |
1052 | pipe->if_name); |
1053 | else |
1054 | ready_event_wfq(p, &head, &tail) ; |
1055 | } else { |
1056 | transmit_event(p, &head, &tail); |
1057 | } |
1058 | } |
1059 | } |
1060 | /* sweep pipes trying to expire idle flow_queues */ |
1061 | for (i = 0; i < HASHSIZE; i++) |
1062 | SLIST_FOREACH(pe, &pipehash[i], next) |
1063 | if (pe->idle_heap.elements > 0 && |
1064 | DN_KEY_LT(pe->idle_heap.p[0].key, pe->V) ) { |
1065 | struct dn_flow_queue *q = pe->idle_heap.p[0].object ; |
1066 | |
1067 | heap_extract(&(pe->idle_heap), NULL); |
1068 | q->S = q->F + 1 ; /* mark timestamp as invalid */ |
1069 | pe->sum -= q->fs->weight ; |
1070 | } |
1071 | |
1072 | /* check the heaps to see if there's still stuff in there, and |
1073 | * only set the timer if there are packets to process |
1074 | */ |
1075 | timer_enabled = 0; |
1076 | for (i=0; i < 3 ; i++) { |
1077 | h = heaps[i]; |
1078 | if (h->elements > 0) { // set the timer |
1079 | ts.tv_sec = 0; |
1080 | ts.tv_nsec = 1 * 1000000; // 1ms |
1081 | timer_enabled = 1; |
1082 | bsd_timeout(dummynet, NULL, &ts); |
1083 | break; |
1084 | } |
1085 | } |
1086 | |
1087 | if (head != NULL) |
1088 | serialize++; |
1089 | |
1090 | lck_mtx_unlock(dn_mutex); |
1091 | |
1092 | /* Send out the de-queued list of ready-to-send packets */ |
1093 | if (head != NULL) { |
1094 | dummynet_send(head); |
1095 | lck_mtx_lock(dn_mutex); |
1096 | serialize--; |
1097 | lck_mtx_unlock(dn_mutex); |
1098 | } |
1099 | } |
1100 | |
1101 | |
1102 | static void |
1103 | dummynet_send(struct mbuf *m) |
1104 | { |
1105 | struct dn_pkt_tag *pkt; |
1106 | struct mbuf *n; |
1107 | |
1108 | for (; m != NULL; m = n) { |
1109 | n = m->m_nextpkt; |
1110 | m->m_nextpkt = NULL; |
1111 | pkt = dn_tag_get(m); |
1112 | |
1113 | DPRINTF(("dummynet_send m: 0x%llx dn_dir: %d dn_flags: 0x%x\n" , |
1114 | (uint64_t)VM_KERNEL_ADDRPERM(m), pkt->dn_dir, |
1115 | pkt->dn_flags)); |
1116 | |
1117 | switch (pkt->dn_dir) { |
1118 | case DN_TO_IP_OUT: { |
1119 | struct route tmp_rt; |
1120 | |
1121 | /* route is already in the packet's dn_ro */ |
1122 | bzero(&tmp_rt, sizeof (tmp_rt)); |
1123 | |
1124 | /* Force IP_RAWOUTPUT as the IP header is fully formed */ |
1125 | pkt->dn_flags |= IP_RAWOUTPUT | IP_FORWARDING; |
1126 | (void)ip_output(m, NULL, &tmp_rt, pkt->dn_flags, NULL, NULL); |
1127 | ROUTE_RELEASE(&tmp_rt); |
1128 | break ; |
1129 | } |
1130 | case DN_TO_IP_IN : |
1131 | proto_inject(PF_INET, m); |
1132 | break ; |
1133 | #ifdef INET6 |
1134 | case DN_TO_IP6_OUT: { |
1135 | /* routes already in the packet's dn_{ro6,pmtu} */ |
1136 | ip6_output(m, NULL, NULL, IPV6_FORWARDING, NULL, NULL, NULL); |
1137 | break; |
1138 | } |
1139 | case DN_TO_IP6_IN: |
1140 | proto_inject(PF_INET6, m); |
1141 | break; |
1142 | #endif /* INET6 */ |
1143 | default: |
1144 | printf("dummynet: bad switch %d!\n" , pkt->dn_dir); |
1145 | m_freem(m); |
1146 | break ; |
1147 | } |
1148 | } |
1149 | } |
1150 | |
1151 | /* |
1152 | * Unconditionally expire empty queues in case of shortage. |
1153 | * Returns the number of queues freed. |
1154 | */ |
1155 | static int |
1156 | expire_queues(struct dn_flow_set *fs) |
1157 | { |
1158 | struct dn_flow_queue *q, *prev ; |
1159 | int i, initial_elements = fs->rq_elements ; |
1160 | struct timeval timenow; |
1161 | |
1162 | /* reviewed for getmicrotime usage */ |
1163 | getmicrotime(&timenow); |
1164 | |
1165 | if (fs->last_expired == timenow.tv_sec) |
1166 | return 0 ; |
1167 | fs->last_expired = timenow.tv_sec ; |
1168 | for (i = 0 ; i <= fs->rq_size ; i++) /* last one is overflow */ |
1169 | for (prev=NULL, q = fs->rq[i] ; q != NULL ; ) |
1170 | if (q->head != NULL || q->S != q->F+1) { |
1171 | prev = q ; |
1172 | q = q->next ; |
1173 | } else { /* entry is idle, expire it */ |
1174 | struct dn_flow_queue *old_q = q ; |
1175 | |
1176 | if (prev != NULL) |
1177 | prev->next = q = q->next ; |
1178 | else |
1179 | fs->rq[i] = q = q->next ; |
1180 | fs->rq_elements-- ; |
1181 | FREE(old_q, M_DUMMYNET); |
1182 | } |
1183 | return initial_elements - fs->rq_elements ; |
1184 | } |
1185 | |
1186 | /* |
1187 | * If room, create a new queue and put at head of slot i; |
1188 | * otherwise, create or use the default queue. |
1189 | */ |
1190 | static struct dn_flow_queue * |
1191 | create_queue(struct dn_flow_set *fs, int i) |
1192 | { |
1193 | struct dn_flow_queue *q ; |
1194 | |
1195 | if (fs->rq_elements > fs->rq_size * dn_max_ratio && |
1196 | expire_queues(fs) == 0) { |
1197 | /* |
1198 | * No way to get room, use or create overflow queue. |
1199 | */ |
1200 | i = fs->rq_size ; |
1201 | if ( fs->rq[i] != NULL ) |
1202 | return fs->rq[i] ; |
1203 | } |
1204 | q = _MALLOC(sizeof(*q), M_DUMMYNET, M_DONTWAIT | M_ZERO); |
1205 | if (q == NULL) { |
1206 | printf("dummynet: sorry, cannot allocate queue for new flow\n" ); |
1207 | return NULL ; |
1208 | } |
1209 | q->fs = fs ; |
1210 | q->hash_slot = i ; |
1211 | q->next = fs->rq[i] ; |
1212 | q->S = q->F + 1; /* hack - mark timestamp as invalid */ |
1213 | fs->rq[i] = q ; |
1214 | fs->rq_elements++ ; |
1215 | return q ; |
1216 | } |
1217 | |
1218 | /* |
1219 | * Given a flow_set and a pkt in last_pkt, find a matching queue |
1220 | * after appropriate masking. The queue is moved to front |
1221 | * so that further searches take less time. |
1222 | */ |
1223 | static struct dn_flow_queue * |
1224 | find_queue(struct dn_flow_set *fs, struct ip_flow_id *id) |
1225 | { |
1226 | int i = 0 ; /* we need i and q for new allocations */ |
1227 | struct dn_flow_queue *q, *prev; |
1228 | int is_v6 = IS_IP6_FLOW_ID(id); |
1229 | |
1230 | if ( !(fs->flags_fs & DN_HAVE_FLOW_MASK) ) |
1231 | q = fs->rq[0] ; |
1232 | else { |
1233 | /* first, do the masking, then hash */ |
1234 | id->dst_port &= fs->flow_mask.dst_port ; |
1235 | id->src_port &= fs->flow_mask.src_port ; |
1236 | id->proto &= fs->flow_mask.proto ; |
1237 | id->flags = 0 ; /* we don't care about this one */ |
1238 | if (is_v6) { |
1239 | APPLY_MASK(&id->dst_ip6, &fs->flow_mask.dst_ip6); |
1240 | APPLY_MASK(&id->src_ip6, &fs->flow_mask.src_ip6); |
1241 | id->flow_id6 &= fs->flow_mask.flow_id6; |
1242 | |
1243 | i = ((id->dst_ip6.__u6_addr.__u6_addr32[0]) & 0xffff)^ |
1244 | ((id->dst_ip6.__u6_addr.__u6_addr32[1]) & 0xffff)^ |
1245 | ((id->dst_ip6.__u6_addr.__u6_addr32[2]) & 0xffff)^ |
1246 | ((id->dst_ip6.__u6_addr.__u6_addr32[3]) & 0xffff)^ |
1247 | |
1248 | ((id->dst_ip6.__u6_addr.__u6_addr32[0] >> 15) & 0xffff)^ |
1249 | ((id->dst_ip6.__u6_addr.__u6_addr32[1] >> 15) & 0xffff)^ |
1250 | ((id->dst_ip6.__u6_addr.__u6_addr32[2] >> 15) & 0xffff)^ |
1251 | ((id->dst_ip6.__u6_addr.__u6_addr32[3] >> 15) & 0xffff)^ |
1252 | |
1253 | ((id->src_ip6.__u6_addr.__u6_addr32[0] << 1) & 0xfffff)^ |
1254 | ((id->src_ip6.__u6_addr.__u6_addr32[1] << 1) & 0xfffff)^ |
1255 | ((id->src_ip6.__u6_addr.__u6_addr32[2] << 1) & 0xfffff)^ |
1256 | ((id->src_ip6.__u6_addr.__u6_addr32[3] << 1) & 0xfffff)^ |
1257 | |
1258 | ((id->src_ip6.__u6_addr.__u6_addr32[0] >> 16) & 0xffff)^ |
1259 | ((id->src_ip6.__u6_addr.__u6_addr32[1] >> 16) & 0xffff)^ |
1260 | ((id->src_ip6.__u6_addr.__u6_addr32[2] >> 16) & 0xffff)^ |
1261 | ((id->src_ip6.__u6_addr.__u6_addr32[3] >> 16) & 0xffff)^ |
1262 | |
1263 | (id->dst_port << 1) ^ (id->src_port) ^ |
1264 | (id->proto ) ^ |
1265 | (id->flow_id6); |
1266 | } else { |
1267 | id->dst_ip &= fs->flow_mask.dst_ip ; |
1268 | id->src_ip &= fs->flow_mask.src_ip ; |
1269 | |
1270 | i = ( (id->dst_ip) & 0xffff ) ^ |
1271 | ( (id->dst_ip >> 15) & 0xffff ) ^ |
1272 | ( (id->src_ip << 1) & 0xffff ) ^ |
1273 | ( (id->src_ip >> 16 ) & 0xffff ) ^ |
1274 | (id->dst_port << 1) ^ (id->src_port) ^ |
1275 | (id->proto ); |
1276 | } |
1277 | i = i % fs->rq_size ; |
1278 | /* finally, scan the current list for a match */ |
1279 | searches++ ; |
1280 | for (prev=NULL, q = fs->rq[i] ; q ; ) { |
1281 | search_steps++; |
1282 | if (is_v6 && |
1283 | IN6_ARE_ADDR_EQUAL(&id->dst_ip6,&q->id.dst_ip6) && |
1284 | IN6_ARE_ADDR_EQUAL(&id->src_ip6,&q->id.src_ip6) && |
1285 | id->dst_port == q->id.dst_port && |
1286 | id->src_port == q->id.src_port && |
1287 | id->proto == q->id.proto && |
1288 | id->flags == q->id.flags && |
1289 | id->flow_id6 == q->id.flow_id6) |
1290 | break ; /* found */ |
1291 | |
1292 | if (!is_v6 && id->dst_ip == q->id.dst_ip && |
1293 | id->src_ip == q->id.src_ip && |
1294 | id->dst_port == q->id.dst_port && |
1295 | id->src_port == q->id.src_port && |
1296 | id->proto == q->id.proto && |
1297 | id->flags == q->id.flags) |
1298 | break ; /* found */ |
1299 | |
1300 | /* No match. Check if we can expire the entry */ |
1301 | if (pipe_expire && q->head == NULL && q->S == q->F+1 ) { |
1302 | /* entry is idle and not in any heap, expire it */ |
1303 | struct dn_flow_queue *old_q = q ; |
1304 | |
1305 | if (prev != NULL) |
1306 | prev->next = q = q->next ; |
1307 | else |
1308 | fs->rq[i] = q = q->next ; |
1309 | fs->rq_elements-- ; |
1310 | FREE(old_q, M_DUMMYNET); |
1311 | continue ; |
1312 | } |
1313 | prev = q ; |
1314 | q = q->next ; |
1315 | } |
1316 | if (q && prev != NULL) { /* found and not in front */ |
1317 | prev->next = q->next ; |
1318 | q->next = fs->rq[i] ; |
1319 | fs->rq[i] = q ; |
1320 | } |
1321 | } |
1322 | if (q == NULL) { /* no match, need to allocate a new entry */ |
1323 | q = create_queue(fs, i); |
1324 | if (q != NULL) |
1325 | q->id = *id ; |
1326 | } |
1327 | return q ; |
1328 | } |
1329 | |
1330 | static int |
1331 | red_drops(struct dn_flow_set *fs, struct dn_flow_queue *q, int len) |
1332 | { |
1333 | /* |
1334 | * RED algorithm |
1335 | * |
1336 | * RED calculates the average queue size (avg) using a low-pass filter |
1337 | * with an exponential weighted (w_q) moving average: |
1338 | * avg <- (1-w_q) * avg + w_q * q_size |
1339 | * where q_size is the queue length (measured in bytes or * packets). |
1340 | * |
1341 | * If q_size == 0, we compute the idle time for the link, and set |
1342 | * avg = (1 - w_q)^(idle/s) |
1343 | * where s is the time needed for transmitting a medium-sized packet. |
1344 | * |
1345 | * Now, if avg < min_th the packet is enqueued. |
1346 | * If avg > max_th the packet is dropped. Otherwise, the packet is |
1347 | * dropped with probability P function of avg. |
1348 | * |
1349 | */ |
1350 | |
1351 | int64_t p_b = 0; |
1352 | /* queue in bytes or packets ? */ |
1353 | u_int q_size = (fs->flags_fs & DN_QSIZE_IS_BYTES) ? q->len_bytes : q->len; |
1354 | |
1355 | DPRINTF(("\ndummynet: %d q: %2u " , (int) curr_time, q_size)); |
1356 | |
1357 | /* average queue size estimation */ |
1358 | if (q_size != 0) { |
1359 | /* |
1360 | * queue is not empty, avg <- avg + (q_size - avg) * w_q |
1361 | */ |
1362 | int diff = SCALE(q_size) - q->avg; |
1363 | int64_t v = SCALE_MUL((int64_t) diff, (int64_t) fs->w_q); |
1364 | |
1365 | q->avg += (int) v; |
1366 | } else { |
1367 | /* |
1368 | * queue is empty, find for how long the queue has been |
1369 | * empty and use a lookup table for computing |
1370 | * (1 - * w_q)^(idle_time/s) where s is the time to send a |
1371 | * (small) packet. |
1372 | * XXX check wraps... |
1373 | */ |
1374 | if (q->avg) { |
1375 | u_int t = (curr_time - q->q_time) / fs->lookup_step; |
1376 | |
1377 | q->avg = (t < fs->lookup_depth) ? |
1378 | SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0; |
1379 | } |
1380 | } |
1381 | DPRINTF(("dummynet: avg: %u " , SCALE_VAL(q->avg))); |
1382 | |
1383 | /* should i drop ? */ |
1384 | |
1385 | if (q->avg < fs->min_th) { |
1386 | q->count = -1; |
1387 | return 0; /* accept packet ; */ |
1388 | } |
1389 | if (q->avg >= fs->max_th) { /* average queue >= max threshold */ |
1390 | if (fs->flags_fs & DN_IS_GENTLE_RED) { |
1391 | /* |
1392 | * According to Gentle-RED, if avg is greater than max_th the |
1393 | * packet is dropped with a probability |
1394 | * p_b = c_3 * avg - c_4 |
1395 | * where c_3 = (1 - max_p) / max_th, and c_4 = 1 - 2 * max_p |
1396 | */ |
1397 | p_b = SCALE_MUL((int64_t) fs->c_3, (int64_t) q->avg) - fs->c_4; |
1398 | } else { |
1399 | q->count = -1; |
1400 | DPRINTF(("dummynet: - drop" )); |
1401 | return 1 ; |
1402 | } |
1403 | } else if (q->avg > fs->min_th) { |
1404 | /* |
1405 | * we compute p_b using the linear dropping function p_b = c_1 * |
1406 | * avg - c_2, where c_1 = max_p / (max_th - min_th), and c_2 = |
1407 | * max_p * min_th / (max_th - min_th) |
1408 | */ |
1409 | p_b = SCALE_MUL((int64_t) fs->c_1, (int64_t) q->avg) - fs->c_2; |
1410 | } |
1411 | if (fs->flags_fs & DN_QSIZE_IS_BYTES) |
1412 | p_b = (p_b * len) / fs->max_pkt_size; |
1413 | if (++q->count == 0) |
1414 | q->random = (my_random() & 0xffff); |
1415 | else { |
1416 | /* |
1417 | * q->count counts packets arrived since last drop, so a greater |
1418 | * value of q->count means a greater packet drop probability. |
1419 | */ |
1420 | if (SCALE_MUL(p_b, SCALE((int64_t) q->count)) > q->random) { |
1421 | q->count = 0; |
1422 | DPRINTF(("dummynet: - red drop" )); |
1423 | /* after a drop we calculate a new random value */ |
1424 | q->random = (my_random() & 0xffff); |
1425 | return 1; /* drop */ |
1426 | } |
1427 | } |
1428 | /* end of RED algorithm */ |
1429 | return 0 ; /* accept */ |
1430 | } |
1431 | |
1432 | static __inline |
1433 | struct dn_flow_set * |
1434 | locate_flowset(int fs_nr) |
1435 | { |
1436 | struct dn_flow_set *fs; |
1437 | SLIST_FOREACH(fs, &flowsethash[HASH(fs_nr)], next) |
1438 | if (fs->fs_nr == fs_nr) |
1439 | return fs ; |
1440 | |
1441 | return (NULL); |
1442 | } |
1443 | |
1444 | static __inline struct dn_pipe * |
1445 | locate_pipe(int pipe_nr) |
1446 | { |
1447 | struct dn_pipe *pipe; |
1448 | |
1449 | SLIST_FOREACH(pipe, &pipehash[HASH(pipe_nr)], next) |
1450 | if (pipe->pipe_nr == pipe_nr) |
1451 | return (pipe); |
1452 | |
1453 | return (NULL); |
1454 | } |
1455 | |
1456 | |
1457 | |
1458 | /* |
1459 | * dummynet hook for packets. Below 'pipe' is a pipe or a queue |
1460 | * depending on whether WF2Q or fixed bw is used. |
1461 | * |
1462 | * pipe_nr pipe or queue the packet is destined for. |
1463 | * dir where shall we send the packet after dummynet. |
1464 | * m the mbuf with the packet |
1465 | * ifp the 'ifp' parameter from the caller. |
1466 | * NULL in ip_input, destination interface in ip_output, |
1467 | * real_dst in bdg_forward |
1468 | * ro route parameter (only used in ip_output, NULL otherwise) |
1469 | * dst destination address, only used by ip_output |
1470 | * rule matching rule, in case of multiple passes |
1471 | * flags flags from the caller, only used in ip_output |
1472 | * |
1473 | */ |
1474 | static int |
1475 | dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa, int client) |
1476 | { |
1477 | struct mbuf *head = NULL, *tail = NULL; |
1478 | struct dn_pkt_tag *pkt; |
1479 | struct m_tag *mtag; |
1480 | struct dn_flow_set *fs = NULL; |
1481 | struct dn_pipe *pipe ; |
1482 | u_int64_t len = m->m_pkthdr.len ; |
1483 | struct dn_flow_queue *q = NULL ; |
1484 | int is_pipe = 0; |
1485 | struct timespec ts; |
1486 | struct timeval tv; |
1487 | |
1488 | DPRINTF(("dummynet_io m: 0x%llx pipe: %d dir: %d client: %d\n" , |
1489 | (uint64_t)VM_KERNEL_ADDRPERM(m), pipe_nr, dir, client)); |
1490 | |
1491 | #if IPFIREWALL |
1492 | #if IPFW2 |
1493 | if (client == DN_CLIENT_IPFW) { |
1494 | ipfw_insn *cmd = fwa->fwa_ipfw_rule->cmd + fwa->fwa_ipfw_rule->act_ofs; |
1495 | |
1496 | if (cmd->opcode == O_LOG) |
1497 | cmd += F_LEN(cmd); |
1498 | is_pipe = (cmd->opcode == O_PIPE); |
1499 | } |
1500 | #else |
1501 | if (client == DN_CLIENT_IPFW) |
1502 | is_pipe = (fwa->fwa_ipfw_rule->fw_flg & IP_FW_F_COMMAND) == IP_FW_F_PIPE; |
1503 | #endif |
1504 | #endif /* IPFIREWALL */ |
1505 | |
1506 | #if DUMMYNET |
1507 | if (client == DN_CLIENT_PF) |
1508 | is_pipe = fwa->fwa_flags == DN_IS_PIPE ? 1 : 0; |
1509 | #endif /* DUMMYNET */ |
1510 | |
1511 | pipe_nr &= 0xffff ; |
1512 | |
1513 | lck_mtx_lock(dn_mutex); |
1514 | |
1515 | /* make all time measurements in milliseconds (ms) - |
1516 | * here we convert secs and usecs to msecs (just divide the |
1517 | * usecs and take the closest whole number). |
1518 | */ |
1519 | microuptime(&tv); |
1520 | curr_time = (tv.tv_sec * 1000) + (tv.tv_usec / 1000); |
1521 | |
1522 | /* |
1523 | * This is a dummynet rule, so we expect an O_PIPE or O_QUEUE rule. |
1524 | */ |
1525 | if (is_pipe) { |
1526 | pipe = locate_pipe(pipe_nr); |
1527 | if (pipe != NULL) |
1528 | fs = &(pipe->fs); |
1529 | } else |
1530 | fs = locate_flowset(pipe_nr); |
1531 | |
1532 | |
1533 | if (fs == NULL){ |
1534 | goto dropit ; /* this queue/pipe does not exist! */ |
1535 | } |
1536 | pipe = fs->pipe ; |
1537 | if (pipe == NULL) { /* must be a queue, try find a matching pipe */ |
1538 | pipe = locate_pipe(fs->parent_nr); |
1539 | |
1540 | if (pipe != NULL) |
1541 | fs->pipe = pipe ; |
1542 | else { |
1543 | printf("dummynet: no pipe %d for queue %d, drop pkt\n" , |
1544 | fs->parent_nr, fs->fs_nr); |
1545 | goto dropit ; |
1546 | } |
1547 | } |
1548 | q = find_queue(fs, &(fwa->fwa_id)); |
1549 | if ( q == NULL ) |
1550 | goto dropit ; /* cannot allocate queue */ |
1551 | /* |
1552 | * update statistics, then check reasons to drop pkt |
1553 | */ |
1554 | q->tot_bytes += len ; |
1555 | q->tot_pkts++ ; |
1556 | if ( fs->plr && (my_random() < fs->plr)) |
1557 | goto dropit ; /* random pkt drop */ |
1558 | if ( fs->flags_fs & DN_QSIZE_IS_BYTES) { |
1559 | if (q->len_bytes > fs->qsize) |
1560 | goto dropit ; /* queue size overflow */ |
1561 | } else { |
1562 | if (q->len >= fs->qsize) |
1563 | goto dropit ; /* queue count overflow */ |
1564 | } |
1565 | if ( fs->flags_fs & DN_IS_RED && red_drops(fs, q, len) ) |
1566 | goto dropit ; |
1567 | |
1568 | /* XXX expensive to zero, see if we can remove it*/ |
1569 | mtag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, |
1570 | sizeof(struct dn_pkt_tag), M_NOWAIT, m); |
1571 | if ( mtag == NULL ) |
1572 | goto dropit ; /* cannot allocate packet header */ |
1573 | m_tag_prepend(m, mtag); /* attach to mbuf chain */ |
1574 | |
1575 | pkt = (struct dn_pkt_tag *)(mtag+1); |
1576 | bzero(pkt, sizeof(struct dn_pkt_tag)); |
1577 | /* ok, i can handle the pkt now... */ |
1578 | /* build and enqueue packet + parameters */ |
1579 | /* |
1580 | * PF is checked before ipfw so remember ipfw rule only when |
1581 | * the caller is ipfw. When the caller is PF, fwa_ipfw_rule |
1582 | * is a fake rule just used for convenience |
1583 | */ |
1584 | if (client == DN_CLIENT_IPFW) |
1585 | pkt->dn_ipfw_rule = fwa->fwa_ipfw_rule; |
1586 | pkt->dn_pf_rule = fwa->fwa_pf_rule; |
1587 | pkt->dn_dir = dir ; |
1588 | pkt->dn_client = client; |
1589 | |
1590 | pkt->dn_ifp = fwa->fwa_oif; |
1591 | if (dir == DN_TO_IP_OUT) { |
1592 | /* |
1593 | * We need to copy *ro because for ICMP pkts (and maybe others) |
1594 | * the caller passed a pointer into the stack; dst might also be |
1595 | * a pointer into *ro so it needs to be updated. |
1596 | */ |
1597 | if (fwa->fwa_ro) { |
1598 | route_copyout(&pkt->dn_ro, fwa->fwa_ro, sizeof (pkt->dn_ro)); |
1599 | } |
1600 | if (fwa->fwa_dst) { |
1601 | if (fwa->fwa_dst == (struct sockaddr_in *)&fwa->fwa_ro->ro_dst) /* dst points into ro */ |
1602 | fwa->fwa_dst = (struct sockaddr_in *)&(pkt->dn_ro.ro_dst) ; |
1603 | |
1604 | bcopy (fwa->fwa_dst, &pkt->dn_dst, sizeof(pkt->dn_dst)); |
1605 | } |
1606 | } else if (dir == DN_TO_IP6_OUT) { |
1607 | if (fwa->fwa_ro6) { |
1608 | route_copyout((struct route *)&pkt->dn_ro6, |
1609 | (struct route *)fwa->fwa_ro6, sizeof (pkt->dn_ro6)); |
1610 | } |
1611 | if (fwa->fwa_ro6_pmtu) { |
1612 | route_copyout((struct route *)&pkt->dn_ro6_pmtu, |
1613 | (struct route *)fwa->fwa_ro6_pmtu, sizeof (pkt->dn_ro6_pmtu)); |
1614 | } |
1615 | if (fwa->fwa_dst6) { |
1616 | if (fwa->fwa_dst6 == (struct sockaddr_in6 *)&fwa->fwa_ro6->ro_dst) /* dst points into ro */ |
1617 | fwa->fwa_dst6 = (struct sockaddr_in6 *)&(pkt->dn_ro6.ro_dst) ; |
1618 | |
1619 | bcopy (fwa->fwa_dst6, &pkt->dn_dst6, sizeof(pkt->dn_dst6)); |
1620 | } |
1621 | pkt->dn_origifp = fwa->fwa_origifp; |
1622 | pkt->dn_mtu = fwa->fwa_mtu; |
1623 | pkt->dn_alwaysfrag = fwa->fwa_alwaysfrag; |
1624 | pkt->dn_unfragpartlen = fwa->fwa_unfragpartlen; |
1625 | if (fwa->fwa_exthdrs) { |
1626 | bcopy (fwa->fwa_exthdrs, &pkt->dn_exthdrs, sizeof(pkt->dn_exthdrs)); |
1627 | /* |
1628 | * Need to zero out the source structure so the mbufs |
1629 | * won't be freed by ip6_output() |
1630 | */ |
1631 | bzero(fwa->fwa_exthdrs, sizeof(struct ip6_exthdrs)); |
1632 | } |
1633 | } |
1634 | if (dir == DN_TO_IP_OUT || dir == DN_TO_IP6_OUT) { |
1635 | pkt->dn_flags = fwa->fwa_oflags; |
1636 | if (fwa->fwa_ipoa != NULL) |
1637 | pkt->dn_ipoa = *(fwa->fwa_ipoa); |
1638 | } |
1639 | if (q->head == NULL) |
1640 | q->head = m; |
1641 | else |
1642 | q->tail->m_nextpkt = m; |
1643 | q->tail = m; |
1644 | q->len++; |
1645 | q->len_bytes += len ; |
1646 | |
1647 | if ( q->head != m ) /* flow was not idle, we are done */ |
1648 | goto done; |
1649 | /* |
1650 | * If we reach this point the flow was previously idle, so we need |
1651 | * to schedule it. This involves different actions for fixed-rate or |
1652 | * WF2Q queues. |
1653 | */ |
1654 | if (is_pipe) { |
1655 | /* |
1656 | * Fixed-rate queue: just insert into the ready_heap. |
1657 | */ |
1658 | dn_key t = 0 ; |
1659 | if (pipe->bandwidth) |
1660 | t = SET_TICKS(m, q, pipe); |
1661 | q->sched_time = curr_time ; |
1662 | if (t == 0) /* must process it now */ |
1663 | ready_event( q , &head, &tail ); |
1664 | else |
1665 | heap_insert(&ready_heap, curr_time + t , q ); |
1666 | } else { |
1667 | /* |
1668 | * WF2Q. First, compute start time S: if the flow was idle (S=F+1) |
1669 | * set S to the virtual time V for the controlling pipe, and update |
1670 | * the sum of weights for the pipe; otherwise, remove flow from |
1671 | * idle_heap and set S to max(F,V). |
1672 | * Second, compute finish time F = S + len/weight. |
1673 | * Third, if pipe was idle, update V=max(S, V). |
1674 | * Fourth, count one more backlogged flow. |
1675 | */ |
1676 | if (DN_KEY_GT(q->S, q->F)) { /* means timestamps are invalid */ |
1677 | q->S = pipe->V ; |
1678 | pipe->sum += fs->weight ; /* add weight of new queue */ |
1679 | } else { |
1680 | heap_extract(&(pipe->idle_heap), q); |
1681 | q->S = MAX64(q->F, pipe->V ) ; |
1682 | } |
1683 | q->F = q->S + ( len<<MY_M )/(u_int64_t) fs->weight; |
1684 | |
1685 | if (pipe->not_eligible_heap.elements == 0 && |
1686 | pipe->scheduler_heap.elements == 0) |
1687 | pipe->V = MAX64 ( q->S, pipe->V ); |
1688 | fs->backlogged++ ; |
1689 | /* |
1690 | * Look at eligibility. A flow is not eligibile if S>V (when |
1691 | * this happens, it means that there is some other flow already |
1692 | * scheduled for the same pipe, so the scheduler_heap cannot be |
1693 | * empty). If the flow is not eligible we just store it in the |
1694 | * not_eligible_heap. Otherwise, we store in the scheduler_heap |
1695 | * and possibly invoke ready_event_wfq() right now if there is |
1696 | * leftover credit. |
1697 | * Note that for all flows in scheduler_heap (SCH), S_i <= V, |
1698 | * and for all flows in not_eligible_heap (NEH), S_i > V . |
1699 | * So when we need to compute max( V, min(S_i) ) forall i in SCH+NEH, |
1700 | * we only need to look into NEH. |
1701 | */ |
1702 | if (DN_KEY_GT(q->S, pipe->V) ) { /* not eligible */ |
1703 | if (pipe->scheduler_heap.elements == 0) |
1704 | printf("dummynet: ++ ouch! not eligible but empty scheduler!\n" ); |
1705 | heap_insert(&(pipe->not_eligible_heap), q->S, q); |
1706 | } else { |
1707 | heap_insert(&(pipe->scheduler_heap), q->F, q); |
1708 | if (pipe->numbytes >= 0) { /* pipe is idle */ |
1709 | if (pipe->scheduler_heap.elements != 1) |
1710 | printf("dummynet: OUCH! pipe should have been idle!\n" ); |
1711 | DPRINTF(("dummynet: waking up pipe %d at %d\n" , |
1712 | pipe->pipe_nr, (int)(q->F >> MY_M))); |
1713 | pipe->sched_time = curr_time ; |
1714 | ready_event_wfq(pipe, &head, &tail); |
1715 | } |
1716 | } |
1717 | } |
1718 | done: |
1719 | /* start the timer and set global if not already set */ |
1720 | if (!timer_enabled) { |
1721 | ts.tv_sec = 0; |
1722 | ts.tv_nsec = 1 * 1000000; // 1ms |
1723 | timer_enabled = 1; |
1724 | bsd_timeout(dummynet, NULL, &ts); |
1725 | } |
1726 | |
1727 | lck_mtx_unlock(dn_mutex); |
1728 | |
1729 | if (head != NULL) { |
1730 | dummynet_send(head); |
1731 | } |
1732 | |
1733 | return 0; |
1734 | |
1735 | dropit: |
1736 | if (q) |
1737 | q->drops++ ; |
1738 | lck_mtx_unlock(dn_mutex); |
1739 | m_freem(m); |
1740 | return ( (fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS); |
1741 | } |
1742 | |
1743 | /* |
1744 | * Below, the ROUTE_RELEASE is only needed when (pkt->dn_dir == DN_TO_IP_OUT) |
1745 | * Doing this would probably save us the initial bzero of dn_pkt |
1746 | */ |
1747 | #define DN_FREE_PKT(_m) do { \ |
1748 | struct m_tag *tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, NULL); \ |
1749 | if (tag) { \ |
1750 | struct dn_pkt_tag *n = (struct dn_pkt_tag *)(tag+1); \ |
1751 | ROUTE_RELEASE(&n->dn_ro); \ |
1752 | } \ |
1753 | m_tag_delete(_m, tag); \ |
1754 | m_freem(_m); \ |
1755 | } while (0) |
1756 | |
1757 | /* |
1758 | * Dispose all packets and flow_queues on a flow_set. |
1759 | * If all=1, also remove red lookup table and other storage, |
1760 | * including the descriptor itself. |
1761 | * For the one in dn_pipe MUST also cleanup ready_heap... |
1762 | */ |
1763 | static void |
1764 | purge_flow_set(struct dn_flow_set *fs, int all) |
1765 | { |
1766 | struct dn_flow_queue *q, *qn ; |
1767 | int i ; |
1768 | |
1769 | LCK_MTX_ASSERT(dn_mutex, LCK_MTX_ASSERT_OWNED); |
1770 | |
1771 | for (i = 0 ; i <= fs->rq_size ; i++ ) { |
1772 | for (q = fs->rq[i] ; q ; q = qn ) { |
1773 | struct mbuf *m, *mnext; |
1774 | |
1775 | mnext = q->head; |
1776 | while ((m = mnext) != NULL) { |
1777 | mnext = m->m_nextpkt; |
1778 | DN_FREE_PKT(m); |
1779 | } |
1780 | qn = q->next ; |
1781 | FREE(q, M_DUMMYNET); |
1782 | } |
1783 | fs->rq[i] = NULL ; |
1784 | } |
1785 | fs->rq_elements = 0 ; |
1786 | if (all) { |
1787 | /* RED - free lookup table */ |
1788 | if (fs->w_q_lookup) |
1789 | FREE(fs->w_q_lookup, M_DUMMYNET); |
1790 | if (fs->rq) |
1791 | FREE(fs->rq, M_DUMMYNET); |
1792 | /* if this fs is not part of a pipe, free it */ |
1793 | if (fs->pipe && fs != &(fs->pipe->fs) ) |
1794 | FREE(fs, M_DUMMYNET); |
1795 | } |
1796 | } |
1797 | |
1798 | /* |
1799 | * Dispose all packets queued on a pipe (not a flow_set). |
1800 | * Also free all resources associated to a pipe, which is about |
1801 | * to be deleted. |
1802 | */ |
1803 | static void |
1804 | purge_pipe(struct dn_pipe *pipe) |
1805 | { |
1806 | struct mbuf *m, *mnext; |
1807 | |
1808 | purge_flow_set( &(pipe->fs), 1 ); |
1809 | |
1810 | mnext = pipe->head; |
1811 | while ((m = mnext) != NULL) { |
1812 | mnext = m->m_nextpkt; |
1813 | DN_FREE_PKT(m); |
1814 | } |
1815 | |
1816 | heap_free( &(pipe->scheduler_heap) ); |
1817 | heap_free( &(pipe->not_eligible_heap) ); |
1818 | heap_free( &(pipe->idle_heap) ); |
1819 | } |
1820 | |
1821 | /* |
1822 | * Delete all pipes and heaps returning memory. Must also |
1823 | * remove references from all ipfw rules to all pipes. |
1824 | */ |
1825 | static void |
1826 | dummynet_flush(void) |
1827 | { |
1828 | struct dn_pipe *pipe, *pipe1; |
1829 | struct dn_flow_set *fs, *fs1; |
1830 | int i; |
1831 | |
1832 | lck_mtx_lock(dn_mutex); |
1833 | |
1834 | #if IPFW2 |
1835 | /* remove all references to pipes ...*/ |
1836 | flush_pipe_ptrs(NULL); |
1837 | #endif /* IPFW2 */ |
1838 | |
1839 | /* Free heaps so we don't have unwanted events. */ |
1840 | heap_free(&ready_heap); |
1841 | heap_free(&wfq_ready_heap); |
1842 | heap_free(&extract_heap); |
1843 | |
1844 | /* |
1845 | * Now purge all queued pkts and delete all pipes. |
1846 | * |
1847 | * XXXGL: can we merge the for(;;) cycles into one or not? |
1848 | */ |
1849 | for (i = 0; i < HASHSIZE; i++) |
1850 | SLIST_FOREACH_SAFE(fs, &flowsethash[i], next, fs1) { |
1851 | SLIST_REMOVE(&flowsethash[i], fs, dn_flow_set, next); |
1852 | purge_flow_set(fs, 1); |
1853 | } |
1854 | for (i = 0; i < HASHSIZE; i++) |
1855 | SLIST_FOREACH_SAFE(pipe, &pipehash[i], next, pipe1) { |
1856 | SLIST_REMOVE(&pipehash[i], pipe, dn_pipe, next); |
1857 | purge_pipe(pipe); |
1858 | FREE(pipe, M_DUMMYNET); |
1859 | } |
1860 | lck_mtx_unlock(dn_mutex); |
1861 | } |
1862 | |
1863 | |
1864 | static void |
1865 | dn_ipfw_rule_delete_fs(struct dn_flow_set *fs, void *r) |
1866 | { |
1867 | int i ; |
1868 | struct dn_flow_queue *q ; |
1869 | struct mbuf *m ; |
1870 | |
1871 | for (i = 0 ; i <= fs->rq_size ; i++) /* last one is ovflow */ |
1872 | for (q = fs->rq[i] ; q ; q = q->next ) |
1873 | for (m = q->head ; m ; m = m->m_nextpkt ) { |
1874 | struct dn_pkt_tag *pkt = dn_tag_get(m) ; |
1875 | if (pkt->dn_ipfw_rule == r) |
1876 | pkt->dn_ipfw_rule = &default_rule ; |
1877 | } |
1878 | } |
1879 | /* |
1880 | * when a firewall rule is deleted, scan all queues and remove the flow-id |
1881 | * from packets matching this rule. |
1882 | */ |
1883 | void |
1884 | dn_ipfw_rule_delete(void *r) |
1885 | { |
1886 | struct dn_pipe *p ; |
1887 | struct dn_flow_set *fs ; |
1888 | struct dn_pkt_tag *pkt ; |
1889 | struct mbuf *m ; |
1890 | int i; |
1891 | |
1892 | lck_mtx_lock(dn_mutex); |
1893 | |
1894 | /* |
1895 | * If the rule references a queue (dn_flow_set), then scan |
1896 | * the flow set, otherwise scan pipes. Should do either, but doing |
1897 | * both does not harm. |
1898 | */ |
1899 | for (i = 0; i < HASHSIZE; i++) |
1900 | SLIST_FOREACH(fs, &flowsethash[i], next) |
1901 | dn_ipfw_rule_delete_fs(fs, r); |
1902 | |
1903 | for (i = 0; i < HASHSIZE; i++) |
1904 | SLIST_FOREACH(p, &pipehash[i], next) { |
1905 | fs = &(p->fs); |
1906 | dn_ipfw_rule_delete_fs(fs, r); |
1907 | for (m = p->head ; m ; m = m->m_nextpkt ) { |
1908 | pkt = dn_tag_get(m); |
1909 | if (pkt->dn_ipfw_rule == r) |
1910 | pkt->dn_ipfw_rule = &default_rule; |
1911 | } |
1912 | } |
1913 | lck_mtx_unlock(dn_mutex); |
1914 | } |
1915 | |
1916 | /* |
1917 | * setup RED parameters |
1918 | */ |
1919 | static int |
1920 | config_red(struct dn_flow_set *p, struct dn_flow_set * x) |
1921 | { |
1922 | int i; |
1923 | |
1924 | x->w_q = p->w_q; |
1925 | x->min_th = SCALE(p->min_th); |
1926 | x->max_th = SCALE(p->max_th); |
1927 | x->max_p = p->max_p; |
1928 | |
1929 | x->c_1 = p->max_p / (p->max_th - p->min_th); |
1930 | x->c_2 = SCALE_MUL(x->c_1, SCALE(p->min_th)); |
1931 | if (x->flags_fs & DN_IS_GENTLE_RED) { |
1932 | x->c_3 = (SCALE(1) - p->max_p) / p->max_th; |
1933 | x->c_4 = (SCALE(1) - 2 * p->max_p); |
1934 | } |
1935 | |
1936 | /* if the lookup table already exist, free and create it again */ |
1937 | if (x->w_q_lookup) { |
1938 | FREE(x->w_q_lookup, M_DUMMYNET); |
1939 | x->w_q_lookup = NULL ; |
1940 | } |
1941 | if (red_lookup_depth == 0) { |
1942 | printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth must be > 0\n" ); |
1943 | FREE(x, M_DUMMYNET); |
1944 | return EINVAL; |
1945 | } |
1946 | x->lookup_depth = red_lookup_depth; |
1947 | x->w_q_lookup = (u_int *) _MALLOC(x->lookup_depth * sizeof(int), |
1948 | M_DUMMYNET, M_DONTWAIT); |
1949 | if (x->w_q_lookup == NULL) { |
1950 | printf("dummynet: sorry, cannot allocate red lookup table\n" ); |
1951 | FREE(x, M_DUMMYNET); |
1952 | return ENOSPC; |
1953 | } |
1954 | |
1955 | /* fill the lookup table with (1 - w_q)^x */ |
1956 | x->lookup_step = p->lookup_step ; |
1957 | x->lookup_weight = p->lookup_weight ; |
1958 | x->w_q_lookup[0] = SCALE(1) - x->w_q; |
1959 | for (i = 1; i < x->lookup_depth; i++) |
1960 | x->w_q_lookup[i] = SCALE_MUL(x->w_q_lookup[i - 1], x->lookup_weight); |
1961 | if (red_avg_pkt_size < 1) |
1962 | red_avg_pkt_size = 512 ; |
1963 | x->avg_pkt_size = red_avg_pkt_size ; |
1964 | if (red_max_pkt_size < 1) |
1965 | red_max_pkt_size = 1500 ; |
1966 | x->max_pkt_size = red_max_pkt_size ; |
1967 | return 0 ; |
1968 | } |
1969 | |
1970 | static int |
1971 | alloc_hash(struct dn_flow_set *x, struct dn_flow_set *pfs) |
1972 | { |
1973 | if (x->flags_fs & DN_HAVE_FLOW_MASK) { /* allocate some slots */ |
1974 | int l = pfs->rq_size; |
1975 | |
1976 | if (l == 0) |
1977 | l = dn_hash_size; |
1978 | if (l < 4) |
1979 | l = 4; |
1980 | else if (l > DN_MAX_HASH_SIZE) |
1981 | l = DN_MAX_HASH_SIZE; |
1982 | x->rq_size = l; |
1983 | } else /* one is enough for null mask */ |
1984 | x->rq_size = 1; |
1985 | x->rq = _MALLOC((1 + x->rq_size) * sizeof(struct dn_flow_queue *), |
1986 | M_DUMMYNET, M_DONTWAIT | M_ZERO); |
1987 | if (x->rq == NULL) { |
1988 | printf("dummynet: sorry, cannot allocate queue\n" ); |
1989 | return ENOSPC; |
1990 | } |
1991 | x->rq_elements = 0; |
1992 | return 0 ; |
1993 | } |
1994 | |
1995 | static void |
1996 | set_fs_parms(struct dn_flow_set *x, struct dn_flow_set *src) |
1997 | { |
1998 | x->flags_fs = src->flags_fs; |
1999 | x->qsize = src->qsize; |
2000 | x->plr = src->plr; |
2001 | x->flow_mask = src->flow_mask; |
2002 | if (x->flags_fs & DN_QSIZE_IS_BYTES) { |
2003 | if (x->qsize > 1024*1024) |
2004 | x->qsize = 1024*1024 ; |
2005 | } else { |
2006 | if (x->qsize == 0) |
2007 | x->qsize = 50 ; |
2008 | if (x->qsize > 100) |
2009 | x->qsize = 50 ; |
2010 | } |
2011 | /* configuring RED */ |
2012 | if ( x->flags_fs & DN_IS_RED ) |
2013 | config_red(src, x) ; /* XXX should check errors */ |
2014 | } |
2015 | |
2016 | /* |
2017 | * setup pipe or queue parameters. |
2018 | */ |
2019 | static int |
2020 | config_pipe(struct dn_pipe *p) |
2021 | { |
2022 | int i, r; |
2023 | struct dn_flow_set *pfs = &(p->fs); |
2024 | struct dn_flow_queue *q; |
2025 | |
2026 | /* |
2027 | * The config program passes parameters as follows: |
2028 | * bw = bits/second (0 means no limits), |
2029 | * delay = ms, must be translated into ticks. |
2030 | * qsize = slots/bytes |
2031 | */ |
2032 | p->delay = ( p->delay * (hz*10) ) / 1000 ; |
2033 | /* We need either a pipe number or a flow_set number */ |
2034 | if (p->pipe_nr == 0 && pfs->fs_nr == 0) |
2035 | return EINVAL ; |
2036 | if (p->pipe_nr != 0 && pfs->fs_nr != 0) |
2037 | return EINVAL ; |
2038 | if (p->pipe_nr != 0) { /* this is a pipe */ |
2039 | struct dn_pipe *x, *b; |
2040 | struct dummynet_event dn_event; |
2041 | lck_mtx_lock(dn_mutex); |
2042 | |
2043 | /* locate pipe */ |
2044 | b = locate_pipe(p->pipe_nr); |
2045 | |
2046 | if (b == NULL || b->pipe_nr != p->pipe_nr) { /* new pipe */ |
2047 | x = _MALLOC(sizeof(struct dn_pipe), M_DUMMYNET, M_DONTWAIT | M_ZERO) ; |
2048 | if (x == NULL) { |
2049 | lck_mtx_unlock(dn_mutex); |
2050 | printf("dummynet: no memory for new pipe\n" ); |
2051 | return ENOSPC; |
2052 | } |
2053 | x->pipe_nr = p->pipe_nr; |
2054 | x->fs.pipe = x ; |
2055 | /* idle_heap is the only one from which we extract from the middle. |
2056 | */ |
2057 | x->idle_heap.size = x->idle_heap.elements = 0 ; |
2058 | x->idle_heap.offset=offsetof(struct dn_flow_queue, heap_pos); |
2059 | } else { |
2060 | x = b; |
2061 | /* Flush accumulated credit for all queues */ |
2062 | for (i = 0; i <= x->fs.rq_size; i++) |
2063 | for (q = x->fs.rq[i]; q; q = q->next) |
2064 | q->numbytes = 0; |
2065 | } |
2066 | |
2067 | x->bandwidth = p->bandwidth ; |
2068 | x->numbytes = 0; /* just in case... */ |
2069 | bcopy(p->if_name, x->if_name, sizeof(p->if_name) ); |
2070 | x->ifp = NULL ; /* reset interface ptr */ |
2071 | x->delay = p->delay ; |
2072 | set_fs_parms(&(x->fs), pfs); |
2073 | |
2074 | |
2075 | if ( x->fs.rq == NULL ) { /* a new pipe */ |
2076 | r = alloc_hash(&(x->fs), pfs) ; |
2077 | if (r) { |
2078 | lck_mtx_unlock(dn_mutex); |
2079 | FREE(x, M_DUMMYNET); |
2080 | return r ; |
2081 | } |
2082 | SLIST_INSERT_HEAD(&pipehash[HASH(x->pipe_nr)], |
2083 | x, next); |
2084 | } |
2085 | lck_mtx_unlock(dn_mutex); |
2086 | |
2087 | bzero(&dn_event, sizeof(dn_event)); |
2088 | dn_event.dn_event_code = DUMMYNET_PIPE_CONFIG; |
2089 | dn_event.dn_event_pipe_config.bandwidth = p->bandwidth; |
2090 | dn_event.dn_event_pipe_config.delay = p->delay; |
2091 | dn_event.dn_event_pipe_config.plr = pfs->plr; |
2092 | |
2093 | dummynet_event_enqueue_nwk_wq_entry(&dn_event); |
2094 | } else { /* config queue */ |
2095 | struct dn_flow_set *x, *b ; |
2096 | |
2097 | lck_mtx_lock(dn_mutex); |
2098 | /* locate flow_set */ |
2099 | b = locate_flowset(pfs->fs_nr); |
2100 | |
2101 | if (b == NULL || b->fs_nr != pfs->fs_nr) { /* new */ |
2102 | if (pfs->parent_nr == 0) { /* need link to a pipe */ |
2103 | lck_mtx_unlock(dn_mutex); |
2104 | return EINVAL ; |
2105 | } |
2106 | x = _MALLOC(sizeof(struct dn_flow_set), M_DUMMYNET, M_DONTWAIT | M_ZERO); |
2107 | if (x == NULL) { |
2108 | lck_mtx_unlock(dn_mutex); |
2109 | printf("dummynet: no memory for new flow_set\n" ); |
2110 | return ENOSPC; |
2111 | } |
2112 | x->fs_nr = pfs->fs_nr; |
2113 | x->parent_nr = pfs->parent_nr; |
2114 | x->weight = pfs->weight ; |
2115 | if (x->weight == 0) |
2116 | x->weight = 1 ; |
2117 | else if (x->weight > 100) |
2118 | x->weight = 100 ; |
2119 | } else { |
2120 | /* Change parent pipe not allowed; must delete and recreate */ |
2121 | if (pfs->parent_nr != 0 && b->parent_nr != pfs->parent_nr) { |
2122 | lck_mtx_unlock(dn_mutex); |
2123 | return EINVAL ; |
2124 | } |
2125 | x = b; |
2126 | } |
2127 | set_fs_parms(x, pfs); |
2128 | |
2129 | if ( x->rq == NULL ) { /* a new flow_set */ |
2130 | r = alloc_hash(x, pfs) ; |
2131 | if (r) { |
2132 | lck_mtx_unlock(dn_mutex); |
2133 | FREE(x, M_DUMMYNET); |
2134 | return r ; |
2135 | } |
2136 | SLIST_INSERT_HEAD(&flowsethash[HASH(x->fs_nr)], |
2137 | x, next); |
2138 | } |
2139 | lck_mtx_unlock(dn_mutex); |
2140 | } |
2141 | return 0 ; |
2142 | } |
2143 | |
2144 | /* |
2145 | * Helper function to remove from a heap queues which are linked to |
2146 | * a flow_set about to be deleted. |
2147 | */ |
2148 | static void |
2149 | fs_remove_from_heap(struct dn_heap *h, struct dn_flow_set *fs) |
2150 | { |
2151 | int i = 0, found = 0 ; |
2152 | for (; i < h->elements ;) |
2153 | if ( ((struct dn_flow_queue *)h->p[i].object)->fs == fs) { |
2154 | h->elements-- ; |
2155 | h->p[i] = h->p[h->elements] ; |
2156 | found++ ; |
2157 | } else |
2158 | i++ ; |
2159 | if (found) |
2160 | heapify(h); |
2161 | } |
2162 | |
2163 | /* |
2164 | * helper function to remove a pipe from a heap (can be there at most once) |
2165 | */ |
2166 | static void |
2167 | pipe_remove_from_heap(struct dn_heap *h, struct dn_pipe *p) |
2168 | { |
2169 | if (h->elements > 0) { |
2170 | int i = 0 ; |
2171 | for (i=0; i < h->elements ; i++ ) { |
2172 | if (h->p[i].object == p) { /* found it */ |
2173 | h->elements-- ; |
2174 | h->p[i] = h->p[h->elements] ; |
2175 | heapify(h); |
2176 | break ; |
2177 | } |
2178 | } |
2179 | } |
2180 | } |
2181 | |
2182 | /* |
2183 | * drain all queues. Called in case of severe mbuf shortage. |
2184 | */ |
2185 | void |
2186 | dummynet_drain(void) |
2187 | { |
2188 | struct dn_flow_set *fs; |
2189 | struct dn_pipe *p; |
2190 | struct mbuf *m, *mnext; |
2191 | int i; |
2192 | |
2193 | LCK_MTX_ASSERT(dn_mutex, LCK_MTX_ASSERT_OWNED); |
2194 | |
2195 | heap_free(&ready_heap); |
2196 | heap_free(&wfq_ready_heap); |
2197 | heap_free(&extract_heap); |
2198 | /* remove all references to this pipe from flow_sets */ |
2199 | for (i = 0; i < HASHSIZE; i++) |
2200 | SLIST_FOREACH(fs, &flowsethash[i], next) |
2201 | purge_flow_set(fs, 0); |
2202 | |
2203 | for (i = 0; i < HASHSIZE; i++) |
2204 | SLIST_FOREACH(p, &pipehash[i], next) { |
2205 | purge_flow_set(&(p->fs), 0); |
2206 | |
2207 | mnext = p->head; |
2208 | while ((m = mnext) != NULL) { |
2209 | mnext = m->m_nextpkt; |
2210 | DN_FREE_PKT(m); |
2211 | } |
2212 | p->head = p->tail = NULL ; |
2213 | } |
2214 | } |
2215 | |
2216 | /* |
2217 | * Fully delete a pipe or a queue, cleaning up associated info. |
2218 | */ |
2219 | static int |
2220 | delete_pipe(struct dn_pipe *p) |
2221 | { |
2222 | if (p->pipe_nr == 0 && p->fs.fs_nr == 0) |
2223 | return EINVAL ; |
2224 | if (p->pipe_nr != 0 && p->fs.fs_nr != 0) |
2225 | return EINVAL ; |
2226 | if (p->pipe_nr != 0) { /* this is an old-style pipe */ |
2227 | struct dn_pipe *b; |
2228 | struct dn_flow_set *fs; |
2229 | int i; |
2230 | |
2231 | lck_mtx_lock(dn_mutex); |
2232 | /* locate pipe */ |
2233 | b = locate_pipe(p->pipe_nr); |
2234 | if(b == NULL){ |
2235 | lck_mtx_unlock(dn_mutex); |
2236 | return EINVAL ; /* not found */ |
2237 | } |
2238 | |
2239 | /* Unlink from list of pipes. */ |
2240 | SLIST_REMOVE(&pipehash[HASH(b->pipe_nr)], b, dn_pipe, next); |
2241 | |
2242 | #if IPFW2 |
2243 | /* remove references to this pipe from the ip_fw rules. */ |
2244 | flush_pipe_ptrs(&(b->fs)); |
2245 | #endif /* IPFW2 */ |
2246 | |
2247 | /* Remove all references to this pipe from flow_sets. */ |
2248 | for (i = 0; i < HASHSIZE; i++) |
2249 | SLIST_FOREACH(fs, &flowsethash[i], next) |
2250 | if (fs->pipe == b) { |
2251 | printf("dummynet: ++ ref to pipe %d from fs %d\n" , |
2252 | p->pipe_nr, fs->fs_nr); |
2253 | fs->pipe = NULL ; |
2254 | purge_flow_set(fs, 0); |
2255 | } |
2256 | fs_remove_from_heap(&ready_heap, &(b->fs)); |
2257 | |
2258 | purge_pipe(b); /* remove all data associated to this pipe */ |
2259 | /* remove reference to here from extract_heap and wfq_ready_heap */ |
2260 | pipe_remove_from_heap(&extract_heap, b); |
2261 | pipe_remove_from_heap(&wfq_ready_heap, b); |
2262 | lck_mtx_unlock(dn_mutex); |
2263 | |
2264 | FREE(b, M_DUMMYNET); |
2265 | } else { /* this is a WF2Q queue (dn_flow_set) */ |
2266 | struct dn_flow_set *b; |
2267 | |
2268 | lck_mtx_lock(dn_mutex); |
2269 | /* locate set */ |
2270 | b = locate_flowset(p->fs.fs_nr); |
2271 | if (b == NULL) { |
2272 | lck_mtx_unlock(dn_mutex); |
2273 | return EINVAL ; /* not found */ |
2274 | } |
2275 | |
2276 | #if IPFW2 |
2277 | /* remove references to this flow_set from the ip_fw rules. */ |
2278 | flush_pipe_ptrs(b); |
2279 | #endif /* IPFW2 */ |
2280 | |
2281 | /* Unlink from list of flowsets. */ |
2282 | SLIST_REMOVE( &flowsethash[HASH(b->fs_nr)], b, dn_flow_set, next); |
2283 | |
2284 | if (b->pipe != NULL) { |
2285 | /* Update total weight on parent pipe and cleanup parent heaps */ |
2286 | b->pipe->sum -= b->weight * b->backlogged ; |
2287 | fs_remove_from_heap(&(b->pipe->not_eligible_heap), b); |
2288 | fs_remove_from_heap(&(b->pipe->scheduler_heap), b); |
2289 | #if 1 /* XXX should i remove from idle_heap as well ? */ |
2290 | fs_remove_from_heap(&(b->pipe->idle_heap), b); |
2291 | #endif |
2292 | } |
2293 | purge_flow_set(b, 1); |
2294 | lck_mtx_unlock(dn_mutex); |
2295 | } |
2296 | return 0 ; |
2297 | } |
2298 | |
2299 | /* |
2300 | * helper function used to copy data from kernel in DUMMYNET_GET |
2301 | */ |
2302 | static |
2303 | char* dn_copy_set_32(struct dn_flow_set *set, char *bp) |
2304 | { |
2305 | int i, copied = 0 ; |
2306 | struct dn_flow_queue *q; |
2307 | struct dn_flow_queue_32 *qp = (struct dn_flow_queue_32 *)bp; |
2308 | |
2309 | LCK_MTX_ASSERT(dn_mutex, LCK_MTX_ASSERT_OWNED); |
2310 | |
2311 | for (i = 0 ; i <= set->rq_size ; i++) |
2312 | for (q = set->rq[i] ; q ; q = q->next, qp++ ) { |
2313 | if (q->hash_slot != i) |
2314 | printf("dummynet: ++ at %d: wrong slot (have %d, " |
2315 | "should be %d)\n" , copied, q->hash_slot, i); |
2316 | if (q->fs != set) |
2317 | printf("dummynet: ++ at %d: wrong fs ptr " |
2318 | "(have 0x%llx, should be 0x%llx)\n" , i, |
2319 | (uint64_t)VM_KERNEL_ADDRPERM(q->fs), |
2320 | (uint64_t)VM_KERNEL_ADDRPERM(set)); |
2321 | copied++ ; |
2322 | cp_queue_to_32_user( q, qp ); |
2323 | /* cleanup pointers */ |
2324 | qp->next = (user32_addr_t)0 ; |
2325 | qp->head = qp->tail = (user32_addr_t)0 ; |
2326 | qp->fs = (user32_addr_t)0 ; |
2327 | } |
2328 | if (copied != set->rq_elements) |
2329 | printf("dummynet: ++ wrong count, have %d should be %d\n" , |
2330 | copied, set->rq_elements); |
2331 | return (char *)qp ; |
2332 | } |
2333 | |
2334 | static |
2335 | char* dn_copy_set_64(struct dn_flow_set *set, char *bp) |
2336 | { |
2337 | int i, copied = 0 ; |
2338 | struct dn_flow_queue *q; |
2339 | struct dn_flow_queue_64 *qp = (struct dn_flow_queue_64 *)bp; |
2340 | |
2341 | LCK_MTX_ASSERT(dn_mutex, LCK_MTX_ASSERT_OWNED); |
2342 | |
2343 | for (i = 0 ; i <= set->rq_size ; i++) |
2344 | for (q = set->rq[i] ; q ; q = q->next, qp++ ) { |
2345 | if (q->hash_slot != i) |
2346 | printf("dummynet: ++ at %d: wrong slot (have %d, " |
2347 | "should be %d)\n" , copied, q->hash_slot, i); |
2348 | if (q->fs != set) |
2349 | printf("dummynet: ++ at %d: wrong fs ptr " |
2350 | "(have 0x%llx, should be 0x%llx)\n" , i, |
2351 | (uint64_t)VM_KERNEL_ADDRPERM(q->fs), |
2352 | (uint64_t)VM_KERNEL_ADDRPERM(set)); |
2353 | copied++ ; |
2354 | //bcopy(q, qp, sizeof(*q)); |
2355 | cp_queue_to_64_user( q, qp ); |
2356 | /* cleanup pointers */ |
2357 | qp->next = USER_ADDR_NULL ; |
2358 | qp->head = qp->tail = USER_ADDR_NULL ; |
2359 | qp->fs = USER_ADDR_NULL ; |
2360 | } |
2361 | if (copied != set->rq_elements) |
2362 | printf("dummynet: ++ wrong count, have %d should be %d\n" , |
2363 | copied, set->rq_elements); |
2364 | return (char *)qp ; |
2365 | } |
2366 | |
2367 | static size_t |
2368 | dn_calc_size(int is64user) |
2369 | { |
2370 | struct dn_flow_set *set ; |
2371 | struct dn_pipe *p ; |
2372 | size_t size = 0 ; |
2373 | size_t pipesize; |
2374 | size_t queuesize; |
2375 | size_t setsize; |
2376 | int i; |
2377 | |
2378 | LCK_MTX_ASSERT(dn_mutex, LCK_MTX_ASSERT_OWNED); |
2379 | if ( is64user ){ |
2380 | pipesize = sizeof(struct dn_pipe_64); |
2381 | queuesize = sizeof(struct dn_flow_queue_64); |
2382 | setsize = sizeof(struct dn_flow_set_64); |
2383 | } |
2384 | else { |
2385 | pipesize = sizeof(struct dn_pipe_32); |
2386 | queuesize = sizeof( struct dn_flow_queue_32 ); |
2387 | setsize = sizeof(struct dn_flow_set_32); |
2388 | } |
2389 | /* |
2390 | * compute size of data structures: list of pipes and flow_sets. |
2391 | */ |
2392 | for (i = 0; i < HASHSIZE; i++) { |
2393 | SLIST_FOREACH(p, &pipehash[i], next) |
2394 | size += sizeof(*p) + |
2395 | p->fs.rq_elements * sizeof(struct dn_flow_queue); |
2396 | SLIST_FOREACH(set, &flowsethash[i], next) |
2397 | size += sizeof (*set) + |
2398 | set->rq_elements * sizeof(struct dn_flow_queue); |
2399 | } |
2400 | return size; |
2401 | } |
2402 | |
2403 | static int |
2404 | dummynet_get(struct sockopt *sopt) |
2405 | { |
2406 | char *buf = NULL, *bp = NULL; /* bp is the "copy-pointer" */ |
2407 | size_t size = 0; |
2408 | struct dn_flow_set *set; |
2409 | struct dn_pipe *p; |
2410 | int error = 0, i; |
2411 | int is64user = 0; |
2412 | |
2413 | /* XXX lock held too long */ |
2414 | lck_mtx_lock(dn_mutex); |
2415 | /* |
2416 | * XXX: Ugly, but we need to allocate memory with M_WAITOK flag |
2417 | * and we cannot use this flag while holding a mutex. |
2418 | */ |
2419 | if (proc_is64bit(sopt->sopt_p)) |
2420 | is64user = 1; |
2421 | for (i = 0; i < 10; i++) { |
2422 | size = dn_calc_size(is64user); |
2423 | lck_mtx_unlock(dn_mutex); |
2424 | buf = _MALLOC(size, M_TEMP, M_WAITOK | M_ZERO); |
2425 | if (buf == NULL) |
2426 | return(ENOBUFS); |
2427 | lck_mtx_lock(dn_mutex); |
2428 | if (size == dn_calc_size(is64user)) |
2429 | break; |
2430 | FREE(buf, M_TEMP); |
2431 | buf = NULL; |
2432 | } |
2433 | if (buf == NULL) { |
2434 | lck_mtx_unlock(dn_mutex); |
2435 | return(ENOBUFS); |
2436 | } |
2437 | |
2438 | bp = buf; |
2439 | for (i = 0; i < HASHSIZE; i++) { |
2440 | SLIST_FOREACH(p, &pipehash[i], next) { |
2441 | /* |
2442 | * copy pipe descriptor into *bp, convert delay |
2443 | * back to ms, then copy the flow_set descriptor(s) |
2444 | * one at a time. After each flow_set, copy the |
2445 | * queue descriptor it owns. |
2446 | */ |
2447 | if ( is64user ) { |
2448 | bp = cp_pipe_to_64_user(p, |
2449 | (struct dn_pipe_64 *)bp); |
2450 | } else { |
2451 | bp = cp_pipe_to_32_user(p, |
2452 | (struct dn_pipe_32 *)bp); |
2453 | } |
2454 | } |
2455 | } |
2456 | for (i = 0; i < HASHSIZE; i++) { |
2457 | SLIST_FOREACH(set, &flowsethash[i], next) { |
2458 | struct dn_flow_set_64 *fs_bp = |
2459 | (struct dn_flow_set_64 *)bp ; |
2460 | cp_flow_set_to_64_user(set, fs_bp); |
2461 | /* XXX same hack as above */ |
2462 | fs_bp->next = CAST_DOWN(user64_addr_t, |
2463 | DN_IS_QUEUE); |
2464 | fs_bp->pipe = USER_ADDR_NULL; |
2465 | fs_bp->rq = USER_ADDR_NULL ; |
2466 | bp += sizeof(struct dn_flow_set_64); |
2467 | bp = dn_copy_set_64( set, bp ); |
2468 | } |
2469 | } |
2470 | lck_mtx_unlock(dn_mutex); |
2471 | error = sooptcopyout(sopt, buf, size); |
2472 | FREE(buf, M_TEMP); |
2473 | return(error); |
2474 | } |
2475 | |
2476 | /* |
2477 | * Handler for the various dummynet socket options (get, flush, config, del) |
2478 | */ |
2479 | static int |
2480 | ip_dn_ctl(struct sockopt *sopt) |
2481 | { |
2482 | int error = 0 ; |
2483 | struct dn_pipe *p, tmp_pipe; |
2484 | |
2485 | /* Disallow sets in really-really secure mode. */ |
2486 | if (sopt->sopt_dir == SOPT_SET && securelevel >= 3) |
2487 | return (EPERM); |
2488 | |
2489 | switch (sopt->sopt_name) { |
2490 | default : |
2491 | printf("dummynet: -- unknown option %d" , sopt->sopt_name); |
2492 | return EINVAL ; |
2493 | |
2494 | case IP_DUMMYNET_GET : |
2495 | error = dummynet_get(sopt); |
2496 | break ; |
2497 | |
2498 | case IP_DUMMYNET_FLUSH : |
2499 | dummynet_flush() ; |
2500 | break ; |
2501 | |
2502 | case IP_DUMMYNET_CONFIGURE : |
2503 | p = &tmp_pipe ; |
2504 | if (proc_is64bit(sopt->sopt_p)) |
2505 | error = cp_pipe_from_user_64( sopt, p ); |
2506 | else |
2507 | error = cp_pipe_from_user_32( sopt, p ); |
2508 | |
2509 | if (error) |
2510 | break ; |
2511 | error = config_pipe(p); |
2512 | break ; |
2513 | |
2514 | case IP_DUMMYNET_DEL : /* remove a pipe or queue */ |
2515 | p = &tmp_pipe ; |
2516 | if (proc_is64bit(sopt->sopt_p)) |
2517 | error = cp_pipe_from_user_64( sopt, p ); |
2518 | else |
2519 | error = cp_pipe_from_user_32( sopt, p ); |
2520 | if (error) |
2521 | break ; |
2522 | |
2523 | error = delete_pipe(p); |
2524 | break ; |
2525 | } |
2526 | return error ; |
2527 | } |
2528 | |
2529 | void |
2530 | dummynet_init(void) |
2531 | { |
2532 | eventhandler_lists_ctxt_init(&dummynet_evhdlr_ctxt); |
2533 | } |
2534 | |
2535 | void |
2536 | ip_dn_init(void) |
2537 | { |
2538 | /* setup locks */ |
2539 | dn_mutex_grp_attr = lck_grp_attr_alloc_init(); |
2540 | dn_mutex_grp = lck_grp_alloc_init("dn" , dn_mutex_grp_attr); |
2541 | dn_mutex_attr = lck_attr_alloc_init(); |
2542 | lck_mtx_init(dn_mutex, dn_mutex_grp, dn_mutex_attr); |
2543 | |
2544 | ready_heap.size = ready_heap.elements = 0 ; |
2545 | ready_heap.offset = 0 ; |
2546 | |
2547 | wfq_ready_heap.size = wfq_ready_heap.elements = 0 ; |
2548 | wfq_ready_heap.offset = 0 ; |
2549 | |
2550 | extract_heap.size = extract_heap.elements = 0 ; |
2551 | extract_heap.offset = 0 ; |
2552 | ip_dn_ctl_ptr = ip_dn_ctl; |
2553 | ip_dn_io_ptr = dummynet_io; |
2554 | |
2555 | bzero(&default_rule, sizeof default_rule); |
2556 | #if IPFIREWALL |
2557 | default_rule.act_ofs = 0; |
2558 | default_rule.rulenum = IPFW_DEFAULT_RULE; |
2559 | default_rule.cmd_len = 1; |
2560 | default_rule.set = RESVD_SET; |
2561 | |
2562 | default_rule.cmd[0].len = 1; |
2563 | default_rule.cmd[0].opcode = |
2564 | #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT |
2565 | (1) ? O_ACCEPT : |
2566 | #endif |
2567 | O_DENY; |
2568 | #endif |
2569 | } |
2570 | |
2571 | struct dn_event_nwk_wq_entry |
2572 | { |
2573 | struct nwk_wq_entry nwk_wqe; |
2574 | struct dummynet_event dn_ev_arg; |
2575 | }; |
2576 | |
2577 | static void |
2578 | dummynet_event_callback(void *arg) |
2579 | { |
2580 | struct dummynet_event *p_dn_ev = (struct dummynet_event *)arg; |
2581 | |
2582 | EVENTHANDLER_INVOKE(&dummynet_evhdlr_ctxt, dummynet_event, p_dn_ev); |
2583 | return; |
2584 | } |
2585 | |
2586 | void |
2587 | dummynet_event_enqueue_nwk_wq_entry(struct dummynet_event *p_dn_event) |
2588 | { |
2589 | struct dn_event_nwk_wq_entry *p_dn_ev = NULL; |
2590 | |
2591 | MALLOC(p_dn_ev, struct dn_event_nwk_wq_entry *, |
2592 | sizeof(struct dn_event_nwk_wq_entry), |
2593 | M_NWKWQ, M_WAITOK | M_ZERO); |
2594 | |
2595 | p_dn_ev->nwk_wqe.func = dummynet_event_callback; |
2596 | p_dn_ev->nwk_wqe.is_arg_managed = TRUE; |
2597 | p_dn_ev->nwk_wqe.arg = &p_dn_ev->dn_ev_arg; |
2598 | |
2599 | bcopy(p_dn_event, &(p_dn_ev->dn_ev_arg), |
2600 | sizeof(struct dummynet_event)); |
2601 | nwk_wq_enqueue((struct nwk_wq_entry*)p_dn_ev); |
2602 | } |
2603 | |