1/*
2 * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <skywalk/os_skywalk_private.h>
30#include <skywalk/nexus/flowswitch/nx_flowswitch.h>
31#include <skywalk/nexus/flowswitch/fsw_var.h>
32
33static void fsw_flow_route_ctor(void *, struct flow_route *);
34static int fsw_flow_route_resolve(void *, struct flow_route *,
35 struct __kern_packet *);
36
37struct flow_owner *
38fsw_flow_add(struct nx_flowswitch *fsw, struct nx_flow_req *req0, int *error)
39{
40 struct kern_nexus *nx = fsw->fsw_nx;
41 struct flow_mgr *fm = fsw->fsw_flow_mgr;
42 nexus_port_t nx_port = req0->nfr_nx_port;
43 struct flow_owner_bucket *fob;
44 struct flow_owner *fo = NULL;
45 void *fo_context = req0->nfr_context;
46 boolean_t nx_bound = FALSE;
47 boolean_t new_mapping = FALSE;
48 struct nx_flow_req req;
49 uuid_t uuid_key;
50 bool nx_port_pid_bound;
51 uint32_t max_flowadv = nx->nx_prov->nxprov_params->nxp_flowadv_max;
52 struct proc *p;
53 int pid = req0->nfr_pid;
54 bool low_latency = ((req0->nfr_flags & NXFLOWREQF_LOW_LATENCY) != 0);
55#if SK_LOG
56 uuid_string_t uuidstr;
57#endif /* SK_LOG */
58
59 *error = 0;
60
61 /*
62 * Make a local copy of the original request; we'll modify the
63 * local copy and write it back to the original upon success.
64 */
65 bcopy(src: req0, dst: &req, n: sizeof(*req0));
66 ASSERT(!uuid_is_null(req.nfr_flow_uuid));
67
68 /*
69 * Interface attach and detach involve holding the flowswitch lock
70 * held as writer. Given that we might block in msleep() below,
71 * holding the flowswitch RW lock is not an option. Instead, we
72 * utilize the detach barrier prevent things from going away while
73 * we are here.
74 */
75 if (!fsw_detach_barrier_add(fsw)) {
76 SK_ERR("netagent detached");
77 *error = ENXIO;
78 return NULL;
79 }
80
81 /*
82 * We insist that PID resolves to a process for flow add, but not for
83 * delete. That's because those events may be posted (to us) after the
84 * corresponding process has exited, and so we still need to be able to
85 * cleanup.
86 */
87 p = proc_find(pid);
88 if (p == PROC_NULL) {
89 SK_ERR("process for pid %d doesn't exist", pid);
90 *error = EINVAL;
91 fsw_detach_barrier_remove(fsw);
92 return NULL;
93 }
94 req.nfr_proc = p;
95
96 /*
97 * If interface is currently attached, indicate that a bind is in
98 * progress, so that upon releasing the lock any threads attempting
99 * to detach the interface will wait until we're done.
100 */
101 fob = flow_mgr_get_fob_by_pid(fm, pid);
102 FOB_LOCK_SPIN(fob);
103 while (fob->fob_busy_flags & (FOBF_OPEN_BUSY | FOBF_CLOSE_BUSY)) {
104 if (++(fob->fob_open_waiters) == 0) { /* wraparound */
105 fob->fob_open_waiters++;
106 }
107 if ((*error = msleep(chan: &fob->fob_open_waiters, mtx: &fob->fob_lock,
108 pri: (PZERO + 1) | PSPIN, wmesg: __FUNCTION__, NULL)) == EINTR) {
109 SK_ERR("%s(%d) binding for uuid %s was interrupted",
110 sk_proc_name_address(p), pid,
111 sk_uuid_unparse(req.nfr_flow_uuid, uuidstr));
112 ASSERT(fob->fob_open_waiters > 0);
113 fob->fob_open_waiters--;
114 FOB_UNLOCK(fob);
115 ASSERT(fo == NULL);
116 goto unbusy;
117 }
118 }
119 if (__improbable((fob->fob_busy_flags & FOBF_DEAD) != 0)) {
120 SK_ERR("%s(%d) binding for flow_uuid %s aborted due to "
121 "dead owner", sk_proc_name_address(p), pid,
122 sk_uuid_unparse(req.nfr_flow_uuid, uuidstr));
123 *error = ENXIO;
124 goto done;
125 }
126 ASSERT(!(fob->fob_busy_flags & FOBF_OPEN_BUSY));
127 fob->fob_busy_flags |= FOBF_OPEN_BUSY;
128
129 do {
130 fo = flow_owner_find_by_pid(fob, pid, fo_context, low_latency);
131 if (fo == NULL && nx_port == NEXUS_PORT_ANY) {
132 struct nxbind nxb;
133
134 /*
135 * Release lock to maintain ordering with the
136 * flowswitch lock; busy flag is set above.
137 * Also read_random() may block.
138 */
139 FOB_UNLOCK(fob);
140
141 uuid_generate_random(out: uuid_key);
142
143 bzero(s: &nxb, n: sizeof(nxb));
144 nxb.nxb_flags |= NXBF_MATCH_UNIQUEID;
145 nxb.nxb_uniqueid = proc_uniqueid(p);
146 nxb.nxb_pid = pid;
147 nxb.nxb_flags |= NXBF_MATCH_KEY;
148 nxb.nxb_key_len = sizeof(uuid_key);
149 nxb.nxb_key = sk_alloc_data(nxb.nxb_key_len,
150 Z_WAITOK | Z_NOFAIL, skmem_tag_nx_key);
151 bcopy(src: uuid_key, dst: nxb.nxb_key, n: nxb.nxb_key_len);
152
153 /*
154 * Bind a new nexus port. Directly invoke the
155 * nxdom_bind_port() callback of the nexus since
156 * the nexus instance is already known. Free
157 * the UUID key upon failure; otherwise callee
158 * will attach it to the nexus port and clean
159 * it up during nxdom_unbind_port().
160 */
161 if ((*error = NX_DOM(nx)->nxdom_bind_port(nx,
162 &nx_port, &nxb, NULL)) != 0) {
163 sk_free_data(nxb.nxb_key, nxb.nxb_key_len);
164 SK_ERR("%s(%d) failed to bind flow_uuid %s to a "
165 "nx_port (err %d)", sk_proc_name_address(p),
166 pid, sk_uuid_unparse(req.nfr_flow_uuid,
167 uuidstr), *error);
168 nx_port = NEXUS_PORT_ANY;
169 FOB_LOCK_SPIN(fob);
170 break;
171 }
172 ASSERT(nx_port != NEXUS_PORT_ANY);
173 nx_bound = TRUE;
174
175 SK_DF(SK_VERB_FLOW, "%s(%d) flow_uuid %s associated with "
176 "ephemeral nx_port %d", sk_proc_name_address(p),
177 pid, sk_uuid_unparse(req.nfr_flow_uuid, uuidstr),
178 (int)nx_port);
179
180 FOB_LOCK_SPIN(fob);
181 /*
182 * if there's no interface associated with this,
183 * then bail
184 */
185 if (__improbable((fob->fob_busy_flags & FOBF_DEAD) !=
186 0 || fsw->fsw_ifp == NULL ||
187 fsw->fsw_agent_session == NULL)) {
188 SK_ERR("%s(%d) binding for flow_uuid %s aborted "
189 "(lost race)", sk_proc_name_address(p),
190 pid, sk_uuid_unparse(req.nfr_flow_uuid,
191 uuidstr));
192 *error = ENXIO;
193 break;
194 }
195 nx_port_pid_bound = true;
196 uuid_copy(dst: req.nfr_bind_key, src: uuid_key);
197 } else if (fo == NULL) {
198 /* make sure request has valid nx_port */
199 ASSERT(nx_port != NEXUS_PORT_ANY);
200 /*
201 * XXX
202 * Why is this path supported? Normal flows are not
203 * added with a specified port and this check does
204 * nothing to verify if the port is used.
205 *
206 * Using nx_port_is_valid() is wrong because that
207 * assumes the array already has non-zero ports.
208 */
209 if (__improbable(nx_port >= NX_PORT_CHUNK)) {
210 *error = EINVAL;
211 break;
212 }
213 /* read_random() may block */
214 FOB_LOCK_CONVERT(fob);
215
216 nx_port_pid_bound = false;
217 uuid_generate_random(out: uuid_key);
218
219 SK_DF(SK_VERB_FLOW, "%s(%d) flow_uuid %s associated "
220 "with nx_port %d", sk_proc_name_address(p),
221 pid, sk_uuid_unparse(req.nfr_flow_uuid, uuidstr),
222 (int)nx_port);
223 } else {
224 /* subsequent request should reuse existing port */
225 ASSERT(fo->fo_nx_port != NEXUS_PORT_ANY);
226 if (nx_port != NEXUS_PORT_ANY &&
227 nx_port != fo->fo_nx_port) {
228 *error = EINVAL;
229 break;
230 }
231 /* fillout info for nexus port */
232 nx_port = fo->fo_nx_port;
233 uuid_copy(dst: uuid_key, src: fo->fo_key);
234 break;
235 }
236
237 FOB_LOCK_CONVERT(fob);
238
239 ASSERT(nx_port != NEXUS_PORT_ANY);
240 ASSERT(fo == NULL);
241 fo = flow_owner_alloc(fob, p, nx_port, nx_port_pid_bound,
242 (max_flowadv != 0), fsw, NULL, fo_context, low_latency);
243 if (fo == NULL) {
244 *error = ENOMEM;
245 break;
246 }
247 ASSERT(!uuid_is_null(uuid_key));
248 uuid_copy(dst: fo->fo_key, src: uuid_key);
249 new_mapping = TRUE;
250 } while (0);
251
252 if (*error != 0) {
253 goto done;
254 }
255
256 /* make sure rule ID isn't already being used */
257 struct flow_entry *fe;
258 if ((fe = flow_entry_find_by_uuid(fo, req.nfr_flow_uuid)) != NULL) {
259#if SK_LOG
260 char dbgbuf[FLOWENTRY_DBGBUF_SIZE];
261 SK_DSC(p, "flow uuid collision: \"%s\" already exists at "
262 "fe 0x%llx flags 0x%b %s(%d)",
263 fe_as_string(fe, dbgbuf, sizeof(dbgbuf)), SK_KVA(fe),
264 fe->fe_flags, FLOWENTF_BITS, fe->fe_proc_name, fe->fe_pid);
265#endif /* SK_LOG */
266 *error = EEXIST;
267 flow_entry_release(pfe: &fe);
268 goto done;
269 }
270
271 /* return assigned nexus port to caller */
272 req.nfr_nx_port = nx_port;
273 if (__probable(!fsw_qos_default_restricted())) {
274 req.nfr_flags |= NXFLOWREQF_QOS_MARKING;
275 } else {
276 req.nfr_flags &= ~NXFLOWREQF_QOS_MARKING;
277 }
278
279 FOB_LOCK_CONVERT(fob);
280
281 *error = flow_mgr_flow_add(nx, fm, fo, ifp: fsw->fsw_ifp, req: &req,
282 fr_ctor: fsw_flow_route_ctor, fr_resolve: fsw_flow_route_resolve, fr_arg: fsw);
283
284 if (*error == 0) {
285 /* replace original request with our (modified) local copy */
286 bcopy(src: &req, dst: req0, n: sizeof(*req0));
287
288 SK_DF(SK_VERB_FLOW, "%s(%d) flow_uuid %s is now on "
289 "nx_port %d", sk_proc_name_address(p), pid,
290 sk_uuid_unparse(req.nfr_flow_uuid, uuidstr),
291 (int)nx_port);
292 }
293
294done:
295 if (__improbable(*error != 0)) {
296 SK_ERR("%s(%d) failed to add flow_uuid %s (err %d)",
297 sk_proc_name_address(p), pid,
298 sk_uuid_unparse(req.nfr_flow_uuid, uuidstr), *error);
299 if (fo != NULL) {
300 if (new_mapping) {
301 FOB_LOCK_CONVERT(fob);
302 flow_owner_free(fob, fo);
303 }
304 fo = NULL;
305 }
306 if (nx_bound) {
307 ASSERT(nx_port != NEXUS_PORT_ANY);
308 FOB_LOCK_ASSERT_HELD(fob);
309 /*
310 * Release lock to maintain ordering with the
311 * flowswitch lock; busy flag is set above.
312 */
313 FOB_UNLOCK(fob);
314 (void) NX_DOM(nx)->nxdom_unbind_port(nx, nx_port);
315 nx_port = NEXUS_PORT_ANY;
316 FOB_LOCK_SPIN(fob);
317 }
318 }
319 fob->fob_busy_flags &= ~FOBF_OPEN_BUSY;
320 if (__improbable(fob->fob_open_waiters > 0)) {
321 fob->fob_open_waiters = 0;
322 wakeup(chan: &fob->fob_open_waiters);
323 }
324 if (__improbable(fob->fob_close_waiters > 0)) {
325 fob->fob_close_waiters = 0;
326 wakeup(chan: &fob->fob_close_waiters);
327 }
328 FOB_UNLOCK(fob);
329
330unbusy:
331 proc_rele(p);
332 p = PROC_NULL;
333 /* allow any pending detach to proceed */
334 fsw_detach_barrier_remove(fsw);
335
336 return fo;
337}
338
339int
340fsw_flow_del(struct nx_flowswitch *fsw, struct nx_flow_req *req, bool nolinger,
341 void *params)
342{
343 struct flow_mgr *fm = fsw->fsw_flow_mgr;
344 struct kern_nexus *nx = fsw->fsw_nx;
345 struct flow_owner_bucket *fob;
346 struct flow_owner *fo;
347 void *fo_context = req->nfr_context;
348 pid_t pid = req->nfr_pid;
349 bool low_latency = ((req->nfr_flags & NXFLOWREQF_LOW_LATENCY) != 0);
350 int error;
351
352 ASSERT(!uuid_is_null(req->nfr_flow_uuid));
353
354 /*
355 * we use the detach barrier to prevent flowswith instance from
356 * going away while we are here.
357 */
358 if (!fsw_detach_barrier_add(fsw)) {
359 SK_ERR("netagent detached");
360 return ENXIO;
361 }
362
363 /* find mapping */
364 fob = flow_mgr_get_fob_by_pid(fm, pid);
365 FOB_LOCK_SPIN(fob);
366 while (fob->fob_busy_flags & (FOBF_OPEN_BUSY | FOBF_CLOSE_BUSY)) {
367 if (++(fob->fob_close_waiters) == 0) { /* wraparound */
368 fob->fob_close_waiters++;
369 }
370 (void) msleep(chan: &fob->fob_close_waiters, mtx: &fob->fob_lock,
371 pri: (PZERO - 1) | PSPIN, wmesg: __FUNCTION__, NULL);
372 }
373 fob->fob_busy_flags |= FOBF_CLOSE_BUSY;
374
375 fo = flow_owner_find_by_pid(fob, pid, fo_context, low_latency);
376 if (fo == NULL) {
377 error = ENOENT;
378 goto done;
379 }
380
381 FOB_LOCK_CONVERT(fob);
382
383 /*
384 * Unbind flow. Note that if "auto close" is enabled, the flows
385 * associated with this fo would have been removed when the channel
386 * opened to the nexus port gets closed. If we get ENOENT just
387 * treat as as non-fatal and proceed further down.
388 */
389 error = flow_owner_destroy_entry(fo, req->nfr_flow_uuid, nolinger,
390 params);
391 if (error != 0 && error != ENOENT) {
392 goto done;
393 }
394
395 /*
396 * If the channel that was connected to the nexus port is no longer
397 * around, i.e. fsw_port_dtor() has been called, and there are no
398 * more flows on the owner, and the owner was bound to PID on the
399 * nexus port in fsw_flow_bind(), remove the nexus binding now to make
400 * this port available.
401 */
402 if (RB_EMPTY(&fo->fo_flow_entry_id_head) &&
403 fo->fo_nx_port_destroyed && fo->fo_nx_port_pid_bound) {
404 nexus_port_t nx_port = fo->fo_nx_port;
405 ASSERT(nx_port != NEXUS_PORT_ANY);
406 /*
407 * Release lock to maintain ordering with the
408 * flowswitch lock; busy flag is set above.
409 */
410 FOB_UNLOCK(fob);
411 (void) NX_DOM(nx)->nxdom_unbind_port(nx, nx_port);
412 FOB_LOCK(fob);
413 flow_owner_free(fob, fo);
414 fo = NULL;
415 }
416 error = 0;
417
418done:
419#if SK_LOG
420 if (__improbable((sk_verbose & SK_VERB_FLOW) != 0)) {
421 uuid_string_t uuidstr;
422 if (fo != NULL) {
423 SK_DF(SK_VERB_FLOW, "%s(%d) flow_uuid %s (err %d)",
424 fo->fo_name, fo->fo_pid,
425 sk_uuid_unparse(req->nfr_flow_uuid, uuidstr), error);
426 } else {
427 SK_DF(SK_VERB_FLOW, "pid %d flow_uuid %s (err %d)", pid,
428 sk_uuid_unparse(req->nfr_flow_uuid, uuidstr), error);
429 }
430 }
431#endif /* SK_LOG */
432
433 fob->fob_busy_flags &= ~FOBF_CLOSE_BUSY;
434 if (__improbable(fob->fob_open_waiters > 0)) {
435 fob->fob_open_waiters = 0;
436 wakeup(chan: &fob->fob_open_waiters);
437 }
438 if (__improbable(fob->fob_close_waiters > 0)) {
439 fob->fob_close_waiters = 0;
440 wakeup(chan: &fob->fob_close_waiters);
441 }
442 FOB_UNLOCK(fob);
443
444 /* allow any pending detach to proceed */
445 fsw_detach_barrier_remove(fsw);
446
447 return error;
448}
449
450int
451fsw_flow_config(struct nx_flowswitch *fsw, struct nx_flow_req *req)
452{
453 struct flow_mgr *fm = fsw->fsw_flow_mgr;
454 struct flow_entry *fe = NULL;
455 struct ns_token *nt = NULL;
456 int error = 0;
457
458 FSW_RLOCK(fsw);
459 fe = flow_mgr_get_fe_by_uuid_rlock(fm, req->nfr_flow_uuid);
460 if (fe == NULL) {
461 SK_ERR("can't find flow");
462 error = ENOENT;
463 goto done;
464 }
465
466 if (fe->fe_pid != req->nfr_pid) {
467 SK_ERR("flow ownership error");
468 error = EPERM;
469 goto done;
470 }
471
472 /* right now only support NXFLOWREQF_NOWAKEFROMSLEEP config */
473 nt = fe->fe_port_reservation;
474 if (req->nfr_flags & NXFLOWREQF_NOWAKEFROMSLEEP) {
475 os_atomic_or(&fe->fe_flags, FLOWENTF_NOWAKEFROMSLEEP, relaxed);
476 netns_change_flags(token: &nt, NETNS_NOWAKEFROMSLEEP, clear_flags: 0);
477 } else {
478 os_atomic_andnot(&fe->fe_flags, FLOWENTF_NOWAKEFROMSLEEP, relaxed);
479 netns_change_flags(token: &nt, set_flags: 0, NETNS_NOWAKEFROMSLEEP);
480 }
481#if SK_LOG
482 char dbgbuf[FLOWENTRY_DBGBUF_SIZE];
483 SK_DF(SK_VERB_FLOW, "%s: NOWAKEFROMSLEEP %d",
484 fe_as_string(fe, dbgbuf, sizeof(dbgbuf)),
485 req->nfr_flags & NXFLOWREQF_NOWAKEFROMSLEEP ? 1 : 0);
486#endif /* SK_LOG */
487
488done:
489 if (fe != NULL) {
490 flow_entry_release(pfe: &fe);
491 }
492 FSW_RUNLOCK(fsw);
493 return error;
494}
495
496static void
497fsw_flow_route_ctor(void *arg, struct flow_route *fr)
498{
499 struct nx_flowswitch *fsw = arg;
500 if (fsw->fsw_ctor != NULL) {
501 fsw->fsw_ctor(fsw, fr);
502 }
503}
504
505static int
506fsw_flow_route_resolve(void *arg, struct flow_route *fr,
507 struct __kern_packet *pkt)
508{
509 struct nx_flowswitch *fsw = arg;
510 return (fsw->fsw_resolve != NULL) ? fsw->fsw_resolve(fsw, fr, pkt) : 0;
511}
512