1/*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)nfs_srvcache.c 8.3 (Berkeley) 3/30/95
65 * FreeBSD-Id: nfs_srvcache.c,v 1.15 1997/10/12 20:25:46 phk Exp $
66 */
67
68#include <nfs/nfs_conf.h>
69#if CONFIG_NFS_SERVER
70
71/*
72 * Reference: Chet Juszczak, "Improving the Performance and Correctness
73 * of an NFS Server", in Proc. Winter 1989 USENIX Conference,
74 * pages 53-63. San Diego, February 1989.
75 */
76#include <sys/param.h>
77#include <sys/vnode.h>
78#include <sys/mount_internal.h>
79#include <sys/kernel.h>
80#include <sys/systm.h>
81#include <sys/proc.h>
82#include <sys/kpi_mbuf.h>
83#include <sys/malloc.h>
84#include <sys/socket.h>
85#include <libkern/OSAtomic.h>
86
87#include <netinet/in.h>
88#include <nfs/rpcv2.h>
89#include <nfs/nfsproto.h>
90#include <nfs/nfs.h>
91#include <nfs/nfsrvcache.h>
92
93extern int nfsv2_procid[NFS_NPROCS];
94static int nfsrv_reqcache_count;
95int nfsrv_reqcache_size = NFSRVCACHESIZ;
96
97#define NFSRCHASH(xid) \
98 (&nfsrv_reqcache_hashtbl[((xid) + ((xid) >> 24)) & nfsrv_reqcache_hash])
99static LIST_HEAD(nfsrv_reqcache_hash, nfsrvcache) * nfsrv_reqcache_hashtbl;
100static TAILQ_HEAD(nfsrv_reqcache_lru, nfsrvcache) nfsrv_reqcache_lruhead;
101static u_long nfsrv_reqcache_hash;
102
103static LCK_GRP_DECLARE(nfsrv_reqcache_lck_grp, "nfsrv_reqcache");
104static LCK_MTX_DECLARE(nfsrv_reqcache_mutex, &nfsrv_reqcache_lck_grp);
105
106/*
107 * Static array that defines which nfs rpc's are nonidempotent
108 */
109static int nonidempotent[NFS_NPROCS] = {
110 FALSE,
111 FALSE,
112 TRUE,
113 FALSE,
114 FALSE,
115 FALSE,
116 FALSE,
117 TRUE,
118 TRUE,
119 TRUE,
120 TRUE,
121 TRUE,
122 TRUE,
123 TRUE,
124 TRUE,
125 TRUE,
126 FALSE,
127 FALSE,
128 FALSE,
129 FALSE,
130 FALSE,
131 FALSE,
132 FALSE,
133};
134
135/* True iff the rpc reply is an nfs status ONLY! */
136static int nfsv2_repstat[NFS_NPROCS] = {
137 FALSE,
138 FALSE,
139 FALSE,
140 FALSE,
141 FALSE,
142 FALSE,
143 FALSE,
144 FALSE,
145 FALSE,
146 FALSE,
147 TRUE,
148 TRUE,
149 TRUE,
150 TRUE,
151 FALSE,
152 TRUE,
153 FALSE,
154 FALSE,
155};
156
157/*
158 * Initialize the server request cache list
159 */
160void
161nfsrv_initcache(void)
162{
163 if (nfsrv_reqcache_size <= 0) {
164 return;
165 }
166
167 lck_mtx_lock(lck: &nfsrv_reqcache_mutex);
168 /* init nfs server request cache hash table */
169 nfsrv_reqcache_hashtbl = hashinit(count: nfsrv_reqcache_size, M_NFSD, hashmask: &nfsrv_reqcache_hash);
170 TAILQ_INIT(&nfsrv_reqcache_lruhead);
171 lck_mtx_unlock(lck: &nfsrv_reqcache_mutex);
172}
173
174/*
175 * This function compares two net addresses by family and returns TRUE
176 * if they are the same host.
177 * If there is any doubt, return FALSE.
178 * The AF_INET family is handled as a special case so that address mbufs
179 * don't need to be saved to store "struct in_addr", which is only 4 bytes.
180 * Ditto for AF_INET6 which is only 16 bytes.
181 */
182static int
183netaddr_match(
184 int family,
185 union nethostaddr *haddr,
186 mbuf_t nam)
187{
188 struct sockaddr_in *inetaddr;
189 struct sockaddr_in6 *inet6addr;
190
191 switch (family) {
192 case AF_INET:
193 inetaddr = mbuf_data(mbuf: nam);
194 if ((inetaddr->sin_family == AF_INET) &&
195 (inetaddr->sin_addr.s_addr == haddr->had_inetaddr)) {
196 return 1;
197 }
198 break;
199 case AF_INET6:
200 inet6addr = mbuf_data(mbuf: nam);
201 if ((inet6addr->sin6_family == AF_INET6) &&
202 !bcmp(s1: &inet6addr->sin6_addr, s2: &haddr->had_inet6addr, n: sizeof(inet6addr->sin6_addr))) {
203 return 1;
204 }
205 break;
206 }
207 return 0;
208}
209
210/*
211 * Look for the request in the cache
212 * If found then
213 * return action and optionally reply
214 * else
215 * insert it in the cache
216 *
217 * The rules are as follows:
218 * - if in progress, return DROP request
219 * - if completed within DELAY of the current time, return DROP it
220 * - if completed a longer time ago return REPLY if the reply was cached or
221 * return DOIT
222 * Update/add new request at end of lru list
223 */
224int
225nfsrv_getcache(
226 struct nfsrv_descript *nd,
227 struct nfsrv_sock *slp,
228 mbuf_t *mrepp)
229{
230 struct nfsrvcache *rp;
231 struct nfsm_chain nmrep;
232 struct sockaddr *saddr;
233 int ret, error;
234
235 /*
236 * Don't cache recent requests for reliable transport protocols.
237 * (Maybe we should for the case of a reconnect, but..)
238 */
239 if (!nd->nd_nam2) {
240 return RC_DOIT;
241 }
242 lck_mtx_lock(lck: &nfsrv_reqcache_mutex);
243loop:
244 for (rp = NFSRCHASH(nd->nd_retxid)->lh_first; rp != 0;
245 rp = rp->rc_hash.le_next) {
246 if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc &&
247 netaddr_match(family: rp->rc_family, haddr: &rp->rc_haddr, nam: nd->nd_nam)) {
248 if ((rp->rc_flag & RC_LOCKED) != 0) {
249 rp->rc_flag |= RC_WANTED;
250 msleep(chan: rp, mtx: &nfsrv_reqcache_mutex, PZERO - 1, wmesg: "nfsrc", NULL);
251 goto loop;
252 }
253 rp->rc_flag |= RC_LOCKED;
254 /* If not at end of LRU chain, move it there */
255 if (rp->rc_lru.tqe_next) {
256 TAILQ_REMOVE(&nfsrv_reqcache_lruhead, rp, rc_lru);
257 TAILQ_INSERT_TAIL(&nfsrv_reqcache_lruhead, rp, rc_lru);
258 }
259 if (rp->rc_state == RC_UNUSED) {
260 panic("nfsrv cache");
261 }
262 if (rp->rc_state == RC_INPROG) {
263 OSAddAtomic64(1, &nfsrvstats.srvcache_inproghits);
264 ret = RC_DROPIT;
265 } else if (rp->rc_flag & RC_REPSTATUS) {
266 OSAddAtomic64(1, &nfsrvstats.srvcache_nonidemdonehits);
267 nd->nd_repstat = rp->rc_status;
268 error = nfsrv_rephead(nd, slp, &nmrep, 0);
269 if (error) {
270 printf("nfsrv cache: reply alloc failed for nonidem request hit\n");
271 ret = RC_DROPIT;
272 *mrepp = NULL;
273 } else {
274 ret = RC_REPLY;
275 *mrepp = nmrep.nmc_mhead;
276 }
277 } else if (rp->rc_flag & RC_REPMBUF) {
278 OSAddAtomic64(1, &nfsrvstats.srvcache_nonidemdonehits);
279 error = mbuf_copym(src: rp->rc_reply, offset: 0, MBUF_COPYALL, how: MBUF_WAITOK, new_mbuf: mrepp);
280 if (error) {
281 printf("nfsrv cache: reply copym failed for nonidem request hit\n");
282 ret = RC_DROPIT;
283 } else {
284 ret = RC_REPLY;
285 }
286 } else {
287 OSAddAtomic64(1, &nfsrvstats.srvcache_idemdonehits);
288 rp->rc_state = RC_INPROG;
289 ret = RC_DOIT;
290 }
291 rp->rc_flag &= ~RC_LOCKED;
292 if (rp->rc_flag & RC_WANTED) {
293 rp->rc_flag &= ~RC_WANTED;
294 wakeup(chan: rp);
295 }
296 lck_mtx_unlock(lck: &nfsrv_reqcache_mutex);
297 return ret;
298 }
299 }
300 OSAddAtomic64(1, &nfsrvstats.srvcache_misses);
301 if (nfsrv_reqcache_count < nfsrv_reqcache_size) {
302 /* try to allocate a new entry */
303 rp = kalloc_type(struct nfsrvcache, Z_WAITOK | Z_ZERO | Z_NOFAIL);
304 rp->rc_flag = RC_LOCKED;
305 nfsrv_reqcache_count++;
306 } else {
307 rp = NULL;
308 }
309 if (!rp) {
310 /* try to reuse the least recently used entry */
311 rp = nfsrv_reqcache_lruhead.tqh_first;
312 if (!rp) {
313 /* no entry to reuse? */
314 /* OK, we just won't be able to cache this request */
315 lck_mtx_unlock(lck: &nfsrv_reqcache_mutex);
316 return RC_DOIT;
317 }
318 while ((rp->rc_flag & RC_LOCKED) != 0) {
319 rp->rc_flag |= RC_WANTED;
320 msleep(chan: rp, mtx: &nfsrv_reqcache_mutex, PZERO - 1, wmesg: "nfsrc", NULL);
321 rp = nfsrv_reqcache_lruhead.tqh_first;
322 }
323 rp->rc_flag |= RC_LOCKED;
324 LIST_REMOVE(rp, rc_hash);
325 TAILQ_REMOVE(&nfsrv_reqcache_lruhead, rp, rc_lru);
326 if (rp->rc_flag & RC_REPMBUF) {
327 mbuf_freem(mbuf: rp->rc_reply);
328 }
329 if (rp->rc_flag & RC_NAM) {
330 mbuf_freem(mbuf: rp->rc_nam);
331 }
332 rp->rc_flag &= (RC_LOCKED | RC_WANTED);
333 }
334 TAILQ_INSERT_TAIL(&nfsrv_reqcache_lruhead, rp, rc_lru);
335 rp->rc_state = RC_INPROG;
336 rp->rc_xid = nd->nd_retxid;
337 saddr = mbuf_data(mbuf: nd->nd_nam);
338 rp->rc_family = saddr->sa_family;
339 switch (saddr->sa_family) {
340 case AF_INET:
341 rp->rc_flag |= RC_INETADDR;
342 rp->rc_inetaddr = ((struct sockaddr_in*)saddr)->sin_addr.s_addr;
343 break;
344 case AF_INET6:
345 rp->rc_flag |= RC_INETADDR;
346 rp->rc_inet6addr = ((struct sockaddr_in6*)saddr)->sin6_addr;
347 break;
348 default:
349 error = mbuf_copym(src: nd->nd_nam, offset: 0, MBUF_COPYALL, how: MBUF_WAITOK, new_mbuf: &rp->rc_nam);
350 if (error) {
351 printf("nfsrv cache: nam copym failed\n");
352 } else {
353 rp->rc_flag |= RC_NAM;
354 }
355 break;
356 }
357 ;
358 rp->rc_proc = nd->nd_procnum;
359 LIST_INSERT_HEAD(NFSRCHASH(nd->nd_retxid), rp, rc_hash);
360 rp->rc_flag &= ~RC_LOCKED;
361 if (rp->rc_flag & RC_WANTED) {
362 rp->rc_flag &= ~RC_WANTED;
363 wakeup(chan: rp);
364 }
365 lck_mtx_unlock(lck: &nfsrv_reqcache_mutex);
366 return RC_DOIT;
367}
368
369/*
370 * Update a request cache entry after the rpc has been done
371 */
372void
373nfsrv_updatecache(
374 struct nfsrv_descript *nd,
375 int repvalid,
376 mbuf_t repmbuf)
377{
378 struct nfsrvcache *rp;
379 int error;
380
381 if (!nd->nd_nam2) {
382 return;
383 }
384 lck_mtx_lock(lck: &nfsrv_reqcache_mutex);
385loop:
386 for (rp = NFSRCHASH(nd->nd_retxid)->lh_first; rp != 0;
387 rp = rp->rc_hash.le_next) {
388 if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc &&
389 netaddr_match(family: rp->rc_family, haddr: &rp->rc_haddr, nam: nd->nd_nam)) {
390 if ((rp->rc_flag & RC_LOCKED) != 0) {
391 rp->rc_flag |= RC_WANTED;
392 msleep(chan: rp, mtx: &nfsrv_reqcache_mutex, PZERO - 1, wmesg: "nfsrc", NULL);
393 goto loop;
394 }
395 rp->rc_flag |= RC_LOCKED;
396 if (rp->rc_state == RC_DONE) {
397 /*
398 * This can occur if the cache is too small.
399 * Retransmits of the same request aren't
400 * dropped so we may see the operation
401 * complete more then once.
402 */
403 if (rp->rc_flag & RC_REPMBUF) {
404 mbuf_freem(mbuf: rp->rc_reply);
405 rp->rc_flag &= ~RC_REPMBUF;
406 }
407 }
408 rp->rc_state = RC_DONE;
409 /*
410 * If we have a valid reply update status and save
411 * the reply for non-idempotent rpc's.
412 */
413 if (repvalid && nonidempotent[nd->nd_procnum]) {
414 if ((nd->nd_vers == NFS_VER2) &&
415 nfsv2_repstat[nfsv2_procid[nd->nd_procnum]]) {
416 rp->rc_status = nd->nd_repstat;
417 rp->rc_flag |= RC_REPSTATUS;
418 } else {
419 error = mbuf_copym(src: repmbuf, offset: 0, MBUF_COPYALL, how: MBUF_WAITOK, new_mbuf: &rp->rc_reply);
420 if (!error) {
421 rp->rc_flag |= RC_REPMBUF;
422 }
423 }
424 }
425 rp->rc_flag &= ~RC_LOCKED;
426 if (rp->rc_flag & RC_WANTED) {
427 rp->rc_flag &= ~RC_WANTED;
428 wakeup(chan: rp);
429 }
430 lck_mtx_unlock(lck: &nfsrv_reqcache_mutex);
431 return;
432 }
433 }
434 lck_mtx_unlock(lck: &nfsrv_reqcache_mutex);
435}
436
437/*
438 * Clean out the cache. Called when the last nfsd terminates.
439 */
440void
441nfsrv_cleancache(void)
442{
443 struct nfsrvcache *rp, *nextrp;
444
445 lck_mtx_lock(lck: &nfsrv_reqcache_mutex);
446 TAILQ_FOREACH_SAFE(rp, &nfsrv_reqcache_lruhead, rc_lru, nextrp) {
447 kfree_type(struct nfsrvcache, rp);
448 }
449 hashdestroy(nfsrv_reqcache_hashtbl, M_NFSD, hashmask: nfsrv_reqcache_hash);
450 nfsrv_reqcache_hash = 0;
451 nfsrv_reqcache_count = 0;
452 TAILQ_INIT(&nfsrv_reqcache_lruhead);
453 lck_mtx_unlock(lck: &nfsrv_reqcache_mutex);
454}
455
456#endif /* CONFIG_NFS_SERVER */
457