1 | /* |
2 | * Copyright (c) 2016-2021 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #include <skywalk/os_skywalk_private.h> |
30 | #include <skywalk/nexus/flowswitch/nx_flowswitch.h> |
31 | #include <skywalk/nexus/flowswitch/fsw_var.h> |
32 | |
33 | static void fsw_flow_route_ctor(void *, struct flow_route *); |
34 | static int fsw_flow_route_resolve(void *, struct flow_route *, |
35 | struct __kern_packet *); |
36 | |
37 | struct flow_owner * |
38 | fsw_flow_add(struct nx_flowswitch *fsw, struct nx_flow_req *req0, int *error) |
39 | { |
40 | struct kern_nexus *nx = fsw->fsw_nx; |
41 | struct flow_mgr *fm = fsw->fsw_flow_mgr; |
42 | nexus_port_t nx_port = req0->nfr_nx_port; |
43 | struct flow_owner_bucket *fob; |
44 | struct flow_owner *fo = NULL; |
45 | void *fo_context = req0->nfr_context; |
46 | boolean_t nx_bound = FALSE; |
47 | boolean_t new_mapping = FALSE; |
48 | struct nx_flow_req req; |
49 | uuid_t uuid_key; |
50 | bool nx_port_pid_bound; |
51 | uint32_t max_flowadv = nx->nx_prov->nxprov_params->nxp_flowadv_max; |
52 | struct proc *p; |
53 | int pid = req0->nfr_pid; |
54 | bool low_latency = ((req0->nfr_flags & NXFLOWREQF_LOW_LATENCY) != 0); |
55 | #if SK_LOG |
56 | uuid_string_t uuidstr; |
57 | #endif /* SK_LOG */ |
58 | |
59 | *error = 0; |
60 | |
61 | /* |
62 | * Make a local copy of the original request; we'll modify the |
63 | * local copy and write it back to the original upon success. |
64 | */ |
65 | bcopy(src: req0, dst: &req, n: sizeof(*req0)); |
66 | ASSERT(!uuid_is_null(req.nfr_flow_uuid)); |
67 | |
68 | /* |
69 | * Interface attach and detach involve holding the flowswitch lock |
70 | * held as writer. Given that we might block in msleep() below, |
71 | * holding the flowswitch RW lock is not an option. Instead, we |
72 | * utilize the detach barrier prevent things from going away while |
73 | * we are here. |
74 | */ |
75 | if (!fsw_detach_barrier_add(fsw)) { |
76 | SK_ERR("netagent detached" ); |
77 | *error = ENXIO; |
78 | return NULL; |
79 | } |
80 | |
81 | /* |
82 | * We insist that PID resolves to a process for flow add, but not for |
83 | * delete. That's because those events may be posted (to us) after the |
84 | * corresponding process has exited, and so we still need to be able to |
85 | * cleanup. |
86 | */ |
87 | p = proc_find(pid); |
88 | if (p == PROC_NULL) { |
89 | SK_ERR("process for pid %d doesn't exist" , pid); |
90 | *error = EINVAL; |
91 | fsw_detach_barrier_remove(fsw); |
92 | return NULL; |
93 | } |
94 | req.nfr_proc = p; |
95 | |
96 | /* |
97 | * If interface is currently attached, indicate that a bind is in |
98 | * progress, so that upon releasing the lock any threads attempting |
99 | * to detach the interface will wait until we're done. |
100 | */ |
101 | fob = flow_mgr_get_fob_by_pid(fm, pid); |
102 | FOB_LOCK_SPIN(fob); |
103 | while (fob->fob_busy_flags & (FOBF_OPEN_BUSY | FOBF_CLOSE_BUSY)) { |
104 | if (++(fob->fob_open_waiters) == 0) { /* wraparound */ |
105 | fob->fob_open_waiters++; |
106 | } |
107 | if ((*error = msleep(chan: &fob->fob_open_waiters, mtx: &fob->fob_lock, |
108 | pri: (PZERO + 1) | PSPIN, wmesg: __FUNCTION__, NULL)) == EINTR) { |
109 | SK_ERR("%s(%d) binding for uuid %s was interrupted" , |
110 | sk_proc_name_address(p), pid, |
111 | sk_uuid_unparse(req.nfr_flow_uuid, uuidstr)); |
112 | ASSERT(fob->fob_open_waiters > 0); |
113 | fob->fob_open_waiters--; |
114 | FOB_UNLOCK(fob); |
115 | ASSERT(fo == NULL); |
116 | goto unbusy; |
117 | } |
118 | } |
119 | if (__improbable((fob->fob_busy_flags & FOBF_DEAD) != 0)) { |
120 | SK_ERR("%s(%d) binding for flow_uuid %s aborted due to " |
121 | "dead owner" , sk_proc_name_address(p), pid, |
122 | sk_uuid_unparse(req.nfr_flow_uuid, uuidstr)); |
123 | *error = ENXIO; |
124 | goto done; |
125 | } |
126 | ASSERT(!(fob->fob_busy_flags & FOBF_OPEN_BUSY)); |
127 | fob->fob_busy_flags |= FOBF_OPEN_BUSY; |
128 | |
129 | do { |
130 | fo = flow_owner_find_by_pid(fob, pid, fo_context, low_latency); |
131 | if (fo == NULL && nx_port == NEXUS_PORT_ANY) { |
132 | struct nxbind nxb; |
133 | |
134 | /* |
135 | * Release lock to maintain ordering with the |
136 | * flowswitch lock; busy flag is set above. |
137 | * Also read_random() may block. |
138 | */ |
139 | FOB_UNLOCK(fob); |
140 | |
141 | uuid_generate_random(out: uuid_key); |
142 | |
143 | bzero(s: &nxb, n: sizeof(nxb)); |
144 | nxb.nxb_flags |= NXBF_MATCH_UNIQUEID; |
145 | nxb.nxb_uniqueid = proc_uniqueid(p); |
146 | nxb.nxb_pid = pid; |
147 | nxb.nxb_flags |= NXBF_MATCH_KEY; |
148 | nxb.nxb_key_len = sizeof(uuid_key); |
149 | nxb.nxb_key = sk_alloc_data(nxb.nxb_key_len, |
150 | Z_WAITOK | Z_NOFAIL, skmem_tag_nx_key); |
151 | bcopy(src: uuid_key, dst: nxb.nxb_key, n: nxb.nxb_key_len); |
152 | |
153 | /* |
154 | * Bind a new nexus port. Directly invoke the |
155 | * nxdom_bind_port() callback of the nexus since |
156 | * the nexus instance is already known. Free |
157 | * the UUID key upon failure; otherwise callee |
158 | * will attach it to the nexus port and clean |
159 | * it up during nxdom_unbind_port(). |
160 | */ |
161 | if ((*error = NX_DOM(nx)->nxdom_bind_port(nx, |
162 | &nx_port, &nxb, NULL)) != 0) { |
163 | sk_free_data(nxb.nxb_key, nxb.nxb_key_len); |
164 | SK_ERR("%s(%d) failed to bind flow_uuid %s to a " |
165 | "nx_port (err %d)" , sk_proc_name_address(p), |
166 | pid, sk_uuid_unparse(req.nfr_flow_uuid, |
167 | uuidstr), *error); |
168 | nx_port = NEXUS_PORT_ANY; |
169 | FOB_LOCK_SPIN(fob); |
170 | break; |
171 | } |
172 | ASSERT(nx_port != NEXUS_PORT_ANY); |
173 | nx_bound = TRUE; |
174 | |
175 | SK_DF(SK_VERB_FLOW, "%s(%d) flow_uuid %s associated with " |
176 | "ephemeral nx_port %d" , sk_proc_name_address(p), |
177 | pid, sk_uuid_unparse(req.nfr_flow_uuid, uuidstr), |
178 | (int)nx_port); |
179 | |
180 | FOB_LOCK_SPIN(fob); |
181 | /* |
182 | * if there's no interface associated with this, |
183 | * then bail |
184 | */ |
185 | if (__improbable((fob->fob_busy_flags & FOBF_DEAD) != |
186 | 0 || fsw->fsw_ifp == NULL || |
187 | fsw->fsw_agent_session == NULL)) { |
188 | SK_ERR("%s(%d) binding for flow_uuid %s aborted " |
189 | "(lost race)" , sk_proc_name_address(p), |
190 | pid, sk_uuid_unparse(req.nfr_flow_uuid, |
191 | uuidstr)); |
192 | *error = ENXIO; |
193 | break; |
194 | } |
195 | nx_port_pid_bound = true; |
196 | uuid_copy(dst: req.nfr_bind_key, src: uuid_key); |
197 | } else if (fo == NULL) { |
198 | /* make sure request has valid nx_port */ |
199 | ASSERT(nx_port != NEXUS_PORT_ANY); |
200 | /* |
201 | * XXX |
202 | * Why is this path supported? Normal flows are not |
203 | * added with a specified port and this check does |
204 | * nothing to verify if the port is used. |
205 | * |
206 | * Using nx_port_is_valid() is wrong because that |
207 | * assumes the array already has non-zero ports. |
208 | */ |
209 | if (__improbable(nx_port >= NX_PORT_CHUNK)) { |
210 | *error = EINVAL; |
211 | break; |
212 | } |
213 | /* read_random() may block */ |
214 | FOB_LOCK_CONVERT(fob); |
215 | |
216 | nx_port_pid_bound = false; |
217 | uuid_generate_random(out: uuid_key); |
218 | |
219 | SK_DF(SK_VERB_FLOW, "%s(%d) flow_uuid %s associated " |
220 | "with nx_port %d" , sk_proc_name_address(p), |
221 | pid, sk_uuid_unparse(req.nfr_flow_uuid, uuidstr), |
222 | (int)nx_port); |
223 | } else { |
224 | /* subsequent request should reuse existing port */ |
225 | ASSERT(fo->fo_nx_port != NEXUS_PORT_ANY); |
226 | if (nx_port != NEXUS_PORT_ANY && |
227 | nx_port != fo->fo_nx_port) { |
228 | *error = EINVAL; |
229 | break; |
230 | } |
231 | /* fillout info for nexus port */ |
232 | nx_port = fo->fo_nx_port; |
233 | uuid_copy(dst: uuid_key, src: fo->fo_key); |
234 | break; |
235 | } |
236 | |
237 | FOB_LOCK_CONVERT(fob); |
238 | |
239 | ASSERT(nx_port != NEXUS_PORT_ANY); |
240 | ASSERT(fo == NULL); |
241 | fo = flow_owner_alloc(fob, p, nx_port, nx_port_pid_bound, |
242 | (max_flowadv != 0), fsw, NULL, fo_context, low_latency); |
243 | if (fo == NULL) { |
244 | *error = ENOMEM; |
245 | break; |
246 | } |
247 | ASSERT(!uuid_is_null(uuid_key)); |
248 | uuid_copy(dst: fo->fo_key, src: uuid_key); |
249 | new_mapping = TRUE; |
250 | } while (0); |
251 | |
252 | if (*error != 0) { |
253 | goto done; |
254 | } |
255 | |
256 | /* make sure rule ID isn't already being used */ |
257 | struct flow_entry *fe; |
258 | if ((fe = flow_entry_find_by_uuid(fo, req.nfr_flow_uuid)) != NULL) { |
259 | #if SK_LOG |
260 | char dbgbuf[FLOWENTRY_DBGBUF_SIZE]; |
261 | SK_DSC(p, "flow uuid collision: \"%s\" already exists at " |
262 | "fe 0x%llx flags 0x%b %s(%d)" , |
263 | fe_as_string(fe, dbgbuf, sizeof(dbgbuf)), SK_KVA(fe), |
264 | fe->fe_flags, FLOWENTF_BITS, fe->fe_proc_name, fe->fe_pid); |
265 | #endif /* SK_LOG */ |
266 | *error = EEXIST; |
267 | flow_entry_release(pfe: &fe); |
268 | goto done; |
269 | } |
270 | |
271 | /* return assigned nexus port to caller */ |
272 | req.nfr_nx_port = nx_port; |
273 | if (__probable(!fsw_qos_default_restricted())) { |
274 | req.nfr_flags |= NXFLOWREQF_QOS_MARKING; |
275 | } else { |
276 | req.nfr_flags &= ~NXFLOWREQF_QOS_MARKING; |
277 | } |
278 | |
279 | FOB_LOCK_CONVERT(fob); |
280 | |
281 | *error = flow_mgr_flow_add(nx, fm, fo, ifp: fsw->fsw_ifp, req: &req, |
282 | fr_ctor: fsw_flow_route_ctor, fr_resolve: fsw_flow_route_resolve, fr_arg: fsw); |
283 | |
284 | if (*error == 0) { |
285 | /* replace original request with our (modified) local copy */ |
286 | bcopy(src: &req, dst: req0, n: sizeof(*req0)); |
287 | |
288 | SK_DF(SK_VERB_FLOW, "%s(%d) flow_uuid %s is now on " |
289 | "nx_port %d" , sk_proc_name_address(p), pid, |
290 | sk_uuid_unparse(req.nfr_flow_uuid, uuidstr), |
291 | (int)nx_port); |
292 | } |
293 | |
294 | done: |
295 | if (__improbable(*error != 0)) { |
296 | SK_ERR("%s(%d) failed to add flow_uuid %s (err %d)" , |
297 | sk_proc_name_address(p), pid, |
298 | sk_uuid_unparse(req.nfr_flow_uuid, uuidstr), *error); |
299 | if (fo != NULL) { |
300 | if (new_mapping) { |
301 | FOB_LOCK_CONVERT(fob); |
302 | flow_owner_free(fob, fo); |
303 | } |
304 | fo = NULL; |
305 | } |
306 | if (nx_bound) { |
307 | ASSERT(nx_port != NEXUS_PORT_ANY); |
308 | FOB_LOCK_ASSERT_HELD(fob); |
309 | /* |
310 | * Release lock to maintain ordering with the |
311 | * flowswitch lock; busy flag is set above. |
312 | */ |
313 | FOB_UNLOCK(fob); |
314 | (void) NX_DOM(nx)->nxdom_unbind_port(nx, nx_port); |
315 | nx_port = NEXUS_PORT_ANY; |
316 | FOB_LOCK_SPIN(fob); |
317 | } |
318 | } |
319 | fob->fob_busy_flags &= ~FOBF_OPEN_BUSY; |
320 | if (__improbable(fob->fob_open_waiters > 0)) { |
321 | fob->fob_open_waiters = 0; |
322 | wakeup(chan: &fob->fob_open_waiters); |
323 | } |
324 | if (__improbable(fob->fob_close_waiters > 0)) { |
325 | fob->fob_close_waiters = 0; |
326 | wakeup(chan: &fob->fob_close_waiters); |
327 | } |
328 | FOB_UNLOCK(fob); |
329 | |
330 | unbusy: |
331 | proc_rele(p); |
332 | p = PROC_NULL; |
333 | /* allow any pending detach to proceed */ |
334 | fsw_detach_barrier_remove(fsw); |
335 | |
336 | return fo; |
337 | } |
338 | |
339 | int |
340 | fsw_flow_del(struct nx_flowswitch *fsw, struct nx_flow_req *req, bool nolinger, |
341 | void *params) |
342 | { |
343 | struct flow_mgr *fm = fsw->fsw_flow_mgr; |
344 | struct kern_nexus *nx = fsw->fsw_nx; |
345 | struct flow_owner_bucket *fob; |
346 | struct flow_owner *fo; |
347 | void *fo_context = req->nfr_context; |
348 | pid_t pid = req->nfr_pid; |
349 | bool low_latency = ((req->nfr_flags & NXFLOWREQF_LOW_LATENCY) != 0); |
350 | int error; |
351 | |
352 | ASSERT(!uuid_is_null(req->nfr_flow_uuid)); |
353 | |
354 | /* |
355 | * we use the detach barrier to prevent flowswith instance from |
356 | * going away while we are here. |
357 | */ |
358 | if (!fsw_detach_barrier_add(fsw)) { |
359 | SK_ERR("netagent detached" ); |
360 | return ENXIO; |
361 | } |
362 | |
363 | /* find mapping */ |
364 | fob = flow_mgr_get_fob_by_pid(fm, pid); |
365 | FOB_LOCK_SPIN(fob); |
366 | while (fob->fob_busy_flags & (FOBF_OPEN_BUSY | FOBF_CLOSE_BUSY)) { |
367 | if (++(fob->fob_close_waiters) == 0) { /* wraparound */ |
368 | fob->fob_close_waiters++; |
369 | } |
370 | (void) msleep(chan: &fob->fob_close_waiters, mtx: &fob->fob_lock, |
371 | pri: (PZERO - 1) | PSPIN, wmesg: __FUNCTION__, NULL); |
372 | } |
373 | fob->fob_busy_flags |= FOBF_CLOSE_BUSY; |
374 | |
375 | fo = flow_owner_find_by_pid(fob, pid, fo_context, low_latency); |
376 | if (fo == NULL) { |
377 | error = ENOENT; |
378 | goto done; |
379 | } |
380 | |
381 | FOB_LOCK_CONVERT(fob); |
382 | |
383 | /* |
384 | * Unbind flow. Note that if "auto close" is enabled, the flows |
385 | * associated with this fo would have been removed when the channel |
386 | * opened to the nexus port gets closed. If we get ENOENT just |
387 | * treat as as non-fatal and proceed further down. |
388 | */ |
389 | error = flow_owner_destroy_entry(fo, req->nfr_flow_uuid, nolinger, |
390 | params); |
391 | if (error != 0 && error != ENOENT) { |
392 | goto done; |
393 | } |
394 | |
395 | /* |
396 | * If the channel that was connected to the nexus port is no longer |
397 | * around, i.e. fsw_port_dtor() has been called, and there are no |
398 | * more flows on the owner, and the owner was bound to PID on the |
399 | * nexus port in fsw_flow_bind(), remove the nexus binding now to make |
400 | * this port available. |
401 | */ |
402 | if (RB_EMPTY(&fo->fo_flow_entry_id_head) && |
403 | fo->fo_nx_port_destroyed && fo->fo_nx_port_pid_bound) { |
404 | nexus_port_t nx_port = fo->fo_nx_port; |
405 | ASSERT(nx_port != NEXUS_PORT_ANY); |
406 | /* |
407 | * Release lock to maintain ordering with the |
408 | * flowswitch lock; busy flag is set above. |
409 | */ |
410 | FOB_UNLOCK(fob); |
411 | (void) NX_DOM(nx)->nxdom_unbind_port(nx, nx_port); |
412 | FOB_LOCK(fob); |
413 | flow_owner_free(fob, fo); |
414 | fo = NULL; |
415 | } |
416 | error = 0; |
417 | |
418 | done: |
419 | #if SK_LOG |
420 | if (__improbable((sk_verbose & SK_VERB_FLOW) != 0)) { |
421 | uuid_string_t uuidstr; |
422 | if (fo != NULL) { |
423 | SK_DF(SK_VERB_FLOW, "%s(%d) flow_uuid %s (err %d)" , |
424 | fo->fo_name, fo->fo_pid, |
425 | sk_uuid_unparse(req->nfr_flow_uuid, uuidstr), error); |
426 | } else { |
427 | SK_DF(SK_VERB_FLOW, "pid %d flow_uuid %s (err %d)" , pid, |
428 | sk_uuid_unparse(req->nfr_flow_uuid, uuidstr), error); |
429 | } |
430 | } |
431 | #endif /* SK_LOG */ |
432 | |
433 | fob->fob_busy_flags &= ~FOBF_CLOSE_BUSY; |
434 | if (__improbable(fob->fob_open_waiters > 0)) { |
435 | fob->fob_open_waiters = 0; |
436 | wakeup(chan: &fob->fob_open_waiters); |
437 | } |
438 | if (__improbable(fob->fob_close_waiters > 0)) { |
439 | fob->fob_close_waiters = 0; |
440 | wakeup(chan: &fob->fob_close_waiters); |
441 | } |
442 | FOB_UNLOCK(fob); |
443 | |
444 | /* allow any pending detach to proceed */ |
445 | fsw_detach_barrier_remove(fsw); |
446 | |
447 | return error; |
448 | } |
449 | |
450 | int |
451 | fsw_flow_config(struct nx_flowswitch *fsw, struct nx_flow_req *req) |
452 | { |
453 | struct flow_mgr *fm = fsw->fsw_flow_mgr; |
454 | struct flow_entry *fe = NULL; |
455 | struct ns_token *nt = NULL; |
456 | int error = 0; |
457 | |
458 | FSW_RLOCK(fsw); |
459 | fe = flow_mgr_get_fe_by_uuid_rlock(fm, req->nfr_flow_uuid); |
460 | if (fe == NULL) { |
461 | SK_ERR("can't find flow" ); |
462 | error = ENOENT; |
463 | goto done; |
464 | } |
465 | |
466 | if (fe->fe_pid != req->nfr_pid) { |
467 | SK_ERR("flow ownership error" ); |
468 | error = EPERM; |
469 | goto done; |
470 | } |
471 | |
472 | /* right now only support NXFLOWREQF_NOWAKEFROMSLEEP config */ |
473 | nt = fe->fe_port_reservation; |
474 | if (req->nfr_flags & NXFLOWREQF_NOWAKEFROMSLEEP) { |
475 | os_atomic_or(&fe->fe_flags, FLOWENTF_NOWAKEFROMSLEEP, relaxed); |
476 | netns_change_flags(token: &nt, NETNS_NOWAKEFROMSLEEP, clear_flags: 0); |
477 | } else { |
478 | os_atomic_andnot(&fe->fe_flags, FLOWENTF_NOWAKEFROMSLEEP, relaxed); |
479 | netns_change_flags(token: &nt, set_flags: 0, NETNS_NOWAKEFROMSLEEP); |
480 | } |
481 | #if SK_LOG |
482 | char dbgbuf[FLOWENTRY_DBGBUF_SIZE]; |
483 | SK_DF(SK_VERB_FLOW, "%s: NOWAKEFROMSLEEP %d" , |
484 | fe_as_string(fe, dbgbuf, sizeof(dbgbuf)), |
485 | req->nfr_flags & NXFLOWREQF_NOWAKEFROMSLEEP ? 1 : 0); |
486 | #endif /* SK_LOG */ |
487 | |
488 | done: |
489 | if (fe != NULL) { |
490 | flow_entry_release(pfe: &fe); |
491 | } |
492 | FSW_RUNLOCK(fsw); |
493 | return error; |
494 | } |
495 | |
496 | static void |
497 | fsw_flow_route_ctor(void *arg, struct flow_route *fr) |
498 | { |
499 | struct nx_flowswitch *fsw = arg; |
500 | if (fsw->fsw_ctor != NULL) { |
501 | fsw->fsw_ctor(fsw, fr); |
502 | } |
503 | } |
504 | |
505 | static int |
506 | fsw_flow_route_resolve(void *arg, struct flow_route *fr, |
507 | struct __kern_packet *pkt) |
508 | { |
509 | struct nx_flowswitch *fsw = arg; |
510 | return (fsw->fsw_resolve != NULL) ? fsw->fsw_resolve(fsw, fr, pkt) : 0; |
511 | } |
512 | |