vm_phantom_cache.c source code [xnu/osfmk/vm/vm_phantom_cache.c]

1	/*
2	* Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28
29	#include <vm/vm_page.h>
30	#include <vm/vm_object.h>
31	#include <vm/vm_kern.h>
32	#include <vm/vm_pageout.h>
33	#include <vm/vm_phantom_cache.h>
34	#include <vm/vm_compressor.h>
35
36
37	uint32_t phantom_cache_eval_period_in_msecs = `250`;
38	uint32_t phantom_cache_thrashing_threshold_ssd = `1000`;
39	#if CONFIG_EMBEDDED
40	uint32_t phantom_cache_thrashing_threshold = `500`;
41	#else
42	uint32_t phantom_cache_thrashing_threshold = `50`;
43	#endif
44
45	/*
46	* Number of consecutive thrashing periods required before
47	* vm_phantom_cache_check_pressure() returns true.
48	*/
49	#if CONFIG_EMBEDDED
50	unsigned phantom_cache_contiguous_periods = `4`;
51	#else
52	unsigned phantom_cache_contiguous_periods = `2`;
53	#endif
54
55	clock_sec_t pc_start_of_eval_period_sec = `0`;
56	clock_nsec_t pc_start_of_eval_period_nsec = `0`;
57	boolean_t pc_need_eval_reset = FALSE;
58
59	/ One bit per recent sampling period. Bit 0 = current period. /
60	uint32_t pc_history = `0`;
61
62	uint32_t sample_period_ghost_added_count = `0`;
63	uint32_t sample_period_ghost_added_count_ssd = `0`;
64	uint32_t sample_period_ghost_found_count = `0`;
65	uint32_t sample_period_ghost_found_count_ssd = `0`;
66
67	uint32_t vm_phantom_object_id = `1`;
68	#define VM_PHANTOM_OBJECT_ID_AFTER_WRAP 1000000
69
70	vm_ghost_t vm_phantom_cache;
71	uint32_t vm_phantom_cache_nindx = `1`;
72	uint32_t vm_phantom_cache_num_entries = `0`;
73	uint32_t vm_phantom_cache_size;
74
75	typedef uint32_t vm_phantom_hash_entry_t;
76	vm_phantom_hash_entry_t *vm_phantom_cache_hash;
77	uint32_t vm_phantom_cache_hash_size;
78	uint32_t vm_ghost_hash_mask; / Mask for hash function /
79	uint32_t vm_ghost_bucket_hash; / Basic bucket hash /
80
81
82	int pg_masks[`4`] = {
83	`0x1`, `0x2`, `0x4`, `0x8`
84	};
85
86
87	#define vm_phantom_hash(obj_id, offset) (\
88	( (natural_t)((uintptr_t)obj_id * vm_ghost_bucket_hash) + (offset ^ vm_ghost_bucket_hash)) & vm_ghost_hash_mask)
89
90
91	struct phantom_cache_stats {
92	uint32_t pcs_wrapped;
93	uint32_t pcs_added_page_to_entry;
94	uint32_t pcs_added_new_entry;
95	uint32_t pcs_replaced_entry;
96
97	uint32_t pcs_lookup_found_page_in_cache;
98	uint32_t pcs_lookup_entry_not_in_cache;
99	uint32_t pcs_lookup_page_not_in_entry;
100
101	uint32_t pcs_updated_phantom_state;
102	} phantom_cache_stats;
103
104
105
106	void
107	vm_phantom_cache_init()
108	{
109	unsigned int num_entries;
110	unsigned int log1;
111	unsigned int size;
112
113	if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE)
114	return;
115	#if CONFIG_EMBEDDED
116	num_entries = (uint32_t)(((max_mem / PAGE_SIZE) / `10`) / VM_GHOST_PAGES_PER_ENTRY);
117	#else
118	num_entries = (uint32_t)(((max_mem / PAGE_SIZE) / `4`) / VM_GHOST_PAGES_PER_ENTRY);
119	#endif
120	vm_phantom_cache_num_entries = `1`;
121
122	while (vm_phantom_cache_num_entries < num_entries)
123	vm_phantom_cache_num_entries <<= `1`;
124
125	vm_phantom_cache_size = sizeof(struct vm_ghost) * vm_phantom_cache_num_entries;
126	vm_phantom_cache_hash_size = sizeof(vm_phantom_hash_entry_t) * vm_phantom_cache_num_entries;
127
128	if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&vm_phantom_cache), vm_phantom_cache_size, `0`, KMA_KOBJECT \| KMA_PERMANENT, VM_KERN_MEMORY_PHANTOM_CACHE) != KERN_SUCCESS)
129	panic("vm_phantom_cache_init: kernel_memory_allocate failed\n");
130	bzero(vm_phantom_cache, vm_phantom_cache_size);
131
132	if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&vm_phantom_cache_hash), vm_phantom_cache_hash_size, `0`, KMA_KOBJECT \| KMA_PERMANENT, VM_KERN_MEMORY_PHANTOM_CACHE) != KERN_SUCCESS)
133	panic("vm_phantom_cache_init: kernel_memory_allocate failed\n");
134	bzero(vm_phantom_cache_hash, vm_phantom_cache_hash_size);
135
136
137	vm_ghost_hash_mask = vm_phantom_cache_num_entries - `1`;
138
139	/*
140	* Calculate object_id shift value for hashing algorithm:
141	* O = log2(sizeof(struct vm_object))
142	* B = log2(vm_page_bucket_count)
143	* hash shifts the object_id left by
144	* B/2 - O
145	*/
146	size = vm_phantom_cache_num_entries;
147	for (log1 = `0`; size > `1`; log1++)
148	size /= `2`;
149
150	vm_ghost_bucket_hash = `1` << ((log1 + `1`) >> `1`); / Get (ceiling of sqrt of table size) /
151	vm_ghost_bucket_hash \|= `1` << ((log1 + `1`) >> `2`); / Get (ceiling of quadroot of table size) /
152	vm_ghost_bucket_hash \|= `1`; / Set bit and add 1 - always must be 1 to insure unique series /
153
154	if (vm_ghost_hash_mask & vm_phantom_cache_num_entries)
155	printf("vm_phantom_cache_init: WARNING -- strange page hash\n");
156	}
157
158
159	void
160	vm_phantom_cache_add_ghost(vm_page_t m)
161	{
162	vm_ghost_t vpce;
163	vm_object_t object;
164	int ghost_index;
165	int pg_mask;
166	boolean_t isSSD = FALSE;
167	vm_phantom_hash_entry_t ghost_hash_index;
168
169	object = VM_PAGE_OBJECT(m);
170
171	LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
172	vm_object_lock_assert_exclusive(object);
173
174	if (vm_phantom_cache_num_entries == `0`)
175	return;
176
177	pg_mask = pg_masks[(m->vmp_offset >> PAGE_SHIFT) & VM_GHOST_PAGE_MASK];
178
179	if (object->phantom_object_id == `0`) {
180
181	vnode_pager_get_isSSD(object->pager, &isSSD);
182
183	if (isSSD == TRUE)
184	object->phantom_isssd = TRUE;
185
186	object->phantom_object_id = vm_phantom_object_id++;
187
188	if (vm_phantom_object_id == `0`)
189	vm_phantom_object_id = VM_PHANTOM_OBJECT_ID_AFTER_WRAP;
190	} else {
191	if ( (vpce = vm_phantom_cache_lookup_ghost(m, `0`)) ) {
192	vpce->g_pages_held \|= pg_mask;
193
194	phantom_cache_stats.pcs_added_page_to_entry++;
195	goto done;
196	}
197	}
198	/*
199	* if we're here then the vm_ghost_t of this vm_page_t
200	* is not present in the phantom cache... take the next
201	* available entry in the LRU first evicting the existing
202	* entry if we've wrapped the ring
203	*/
204	ghost_index = vm_phantom_cache_nindx++;
205
206	if (vm_phantom_cache_nindx == vm_phantom_cache_num_entries) {
207	vm_phantom_cache_nindx = `1`;
208
209	phantom_cache_stats.pcs_wrapped++;
210	}
211	vpce = &vm_phantom_cache[ghost_index];
212
213	if (vpce->g_obj_id) {
214	/*
215	* we're going to replace an existing entry
216	* so first remove it from the hash
217	*/
218	vm_ghost_t nvpce;
219
220	ghost_hash_index = vm_phantom_hash(vpce->g_obj_id, vpce->g_obj_offset);
221
222	nvpce = &vm_phantom_cache[vm_phantom_cache_hash[ghost_hash_index]];
223
224	if (nvpce == vpce) {
225	vm_phantom_cache_hash[ghost_hash_index] = vpce->g_next_index;
226	} else {
227	for (;;) {
228	if (nvpce->g_next_index == `0`)
229	panic("didn't find ghost in hash\n");
230
231	if (&vm_phantom_cache[nvpce->g_next_index] == vpce) {
232	nvpce->g_next_index = vpce->g_next_index;
233	break;
234	}
235	nvpce = &vm_phantom_cache[nvpce->g_next_index];
236	}
237	}
238	phantom_cache_stats.pcs_replaced_entry++;
239	} else
240	phantom_cache_stats.pcs_added_new_entry++;
241
242	vpce->g_pages_held = pg_mask;
243	vpce->g_obj_offset = (m->vmp_offset >> (PAGE_SHIFT + VM_GHOST_PAGE_SHIFT)) & VM_GHOST_OFFSET_MASK;
244	vpce->g_obj_id = object->phantom_object_id;
245
246	ghost_hash_index = vm_phantom_hash(vpce->g_obj_id, vpce->g_obj_offset);
247	vpce->g_next_index = vm_phantom_cache_hash[ghost_hash_index];
248	vm_phantom_cache_hash[ghost_hash_index] = ghost_index;
249
250	done:
251	vm_pageout_vminfo.vm_phantom_cache_added_ghost++;
252
253	if (object->phantom_isssd)
254	OSAddAtomic(`1`, &sample_period_ghost_added_count_ssd);
255	else
256	OSAddAtomic(`1`, &sample_period_ghost_added_count);
257	}
258
259
260	vm_ghost_t
261	vm_phantom_cache_lookup_ghost(vm_page_t m, uint32_t pg_mask)
262	{
263	uint64_t g_obj_offset;
264	uint32_t g_obj_id;
265	uint32_t ghost_index;
266	vm_object_t object;
267
268	object = VM_PAGE_OBJECT(m);
269
270	if ((g_obj_id = object->phantom_object_id) == `0`) {
271	/*
272	* no entries in phantom cache for this object
273	*/
274	return (NULL);
275	}
276	g_obj_offset = (m->vmp_offset >> (PAGE_SHIFT + VM_GHOST_PAGE_SHIFT)) & VM_GHOST_OFFSET_MASK;
277
278	ghost_index = vm_phantom_cache_hash[vm_phantom_hash(g_obj_id, g_obj_offset)];
279
280	while (ghost_index) {
281	vm_ghost_t vpce;
282
283	vpce = &vm_phantom_cache[ghost_index];
284
285	if (vpce->g_obj_id == g_obj_id && vpce->g_obj_offset == g_obj_offset) {
286
287	if (pg_mask == `0` \|\| (vpce->g_pages_held & pg_mask)) {
288	phantom_cache_stats.pcs_lookup_found_page_in_cache++;
289
290	return (vpce);
291	}
292	phantom_cache_stats.pcs_lookup_page_not_in_entry++;
293
294	return (NULL);
295	}
296	ghost_index = vpce->g_next_index;
297	}
298	phantom_cache_stats.pcs_lookup_entry_not_in_cache++;
299
300	return (NULL);
301	}
302
303
304
305	void
306	vm_phantom_cache_update(vm_page_t m)
307	{
308	int pg_mask;
309	vm_ghost_t vpce;
310	vm_object_t object;
311
312	object = VM_PAGE_OBJECT(m);
313
314	LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
315	vm_object_lock_assert_exclusive(object);
316
317	if (vm_phantom_cache_num_entries == `0`)
318	return;
319
320	pg_mask = pg_masks[(m->vmp_offset >> PAGE_SHIFT) & VM_GHOST_PAGE_MASK];
321
322	if ( (vpce = vm_phantom_cache_lookup_ghost(m, pg_mask)) ) {
323
324	vpce->g_pages_held &= ~pg_mask;
325
326	phantom_cache_stats.pcs_updated_phantom_state++;
327	vm_pageout_vminfo.vm_phantom_cache_found_ghost++;
328
329	if (object->phantom_isssd)
330	OSAddAtomic(`1`, &sample_period_ghost_found_count_ssd);
331	else
332	OSAddAtomic(`1`, &sample_period_ghost_found_count);
333	}
334	}
335
336
337	#define PHANTOM_CACHE_DEBUG 1
338
339	#if PHANTOM_CACHE_DEBUG
340
341	int sample_period_ghost_counts_indx = `0`;
342
343	struct {
344	uint32_t added;
345	uint32_t found;
346	uint32_t added_ssd;
347	uint32_t found_ssd;
348	uint32_t elapsed_ms;
349	boolean_t pressure_detected;
350	} sample_period_ghost_counts[`256`];
351
352	#endif
353
354	/*
355	* Determine if the file cache is thrashing from sampling interval statistics.
356	*
357	* Pages added to the phantom cache = pages evicted from the file cache.
358	* Pages found in the phantom cache = reads of pages that were recently evicted.
359	* Threshold is the latency-dependent number of reads we consider thrashing.
360	*/
361	static boolean_t
362	is_thrashing(uint32_t added, uint32_t found, uint32_t threshold)
363	{
364	/ Ignore normal activity below the threshold. /
365	if (added < threshold \|\| found < threshold)
366	return FALSE;
367
368	/*
369	* When thrashing in a way that we can mitigate, most of the pages read
370	* into the file cache were recently evicted, and 'found' will be close
371	* to 'added'.
372	*
373	* When replacing the current working set because a new app is
374	* launched, we see very high read traffic with sporadic phantom cache
375	* hits.
376	*
377	* This is not thrashing, or freeing up memory wouldn't help much
378	* anyway.
379	*/
380	if (found < added / `2`)
381	return FALSE;
382
383	return TRUE;
384	}
385
386	/*
387	* the following function is never called
388	* from multiple threads simultaneously due
389	* to a condition variable used to serialize
390	* at the compressor level... thus no need
391	* to provide locking for the sample processing
392	*/
393	boolean_t
394	vm_phantom_cache_check_pressure()
395	{
396	clock_sec_t cur_ts_sec;
397	clock_nsec_t cur_ts_nsec;
398	uint64_t elapsed_msecs_in_eval;
399	boolean_t pressure_detected = FALSE;
400
401	clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec);
402
403	elapsed_msecs_in_eval = vm_compressor_compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, pc_start_of_eval_period_sec, pc_start_of_eval_period_nsec);
404
405	/*
406	* Reset evaluation period after phantom_cache_eval_period_in_msecs or
407	* whenever vm_phantom_cache_restart_sample has been called.
408	*/
409	if (elapsed_msecs_in_eval >= phantom_cache_eval_period_in_msecs) {
410	pc_need_eval_reset = TRUE;
411	}
412
413	if (pc_need_eval_reset == TRUE) {
414
415	#if PHANTOM_CACHE_DEBUG
416	/*
417	* maintain some info about the last 256 sample periods
418	*/
419	sample_period_ghost_counts[sample_period_ghost_counts_indx].added = sample_period_ghost_added_count;
420	sample_period_ghost_counts[sample_period_ghost_counts_indx].found = sample_period_ghost_found_count;
421	sample_period_ghost_counts[sample_period_ghost_counts_indx].added_ssd = sample_period_ghost_added_count_ssd;
422	sample_period_ghost_counts[sample_period_ghost_counts_indx].found_ssd = sample_period_ghost_found_count_ssd;
423	sample_period_ghost_counts[sample_period_ghost_counts_indx].elapsed_ms = (uint32_t)elapsed_msecs_in_eval;
424
425	sample_period_ghost_counts_indx++;
426
427	if (sample_period_ghost_counts_indx >= `256`)
428	sample_period_ghost_counts_indx = `0`;
429	#endif
430	sample_period_ghost_added_count = `0`;
431	sample_period_ghost_found_count = `0`;
432	sample_period_ghost_added_count_ssd = `0`;
433	sample_period_ghost_found_count_ssd = `0`;
434
435	pc_start_of_eval_period_sec = cur_ts_sec;
436	pc_start_of_eval_period_nsec = cur_ts_nsec;
437	pc_history <<= `1`;
438	pc_need_eval_reset = FALSE;
439	} else {
440	/*
441	* Since the trashing rate is really a function of the read latency of the disk
442	* we have to consider both the SSD and spinning disk case since the file cache
443	* could be backed by either or even both flavors. When the object is first
444	* assigned a phantom_object_id, we query the pager to determine if the backing
445	* backing media is an SSD and remember that answer in the vm_object. We use
446	* that info to maintains counts for both the SSD and spinning disk cases.
447	*/
448	if (is_thrashing(sample_period_ghost_added_count,
449	sample_period_ghost_found_count,
450	phantom_cache_thrashing_threshold) \|\|
451	is_thrashing(sample_period_ghost_added_count_ssd,
452	sample_period_ghost_found_count_ssd,
453	phantom_cache_thrashing_threshold_ssd)) {
454	/ Thrashing in the current period: Set bit 0. /
455	pc_history \|= `1`;
456	}
457	}
458
459	/*
460	* Declare pressure_detected after phantom_cache_contiguous_periods.
461	*
462	* Create a bitmask with the N low bits set. These bits must all be set
463	* in pc_history. The high bits of pc_history are ignored.
464	*/
465	uint32_t bitmask = (`1u` << phantom_cache_contiguous_periods) - `1`;
466	if ((pc_history & bitmask) == bitmask)
467	pressure_detected = TRUE;
468
469	if (vm_page_external_count > ((AVAILABLE_MEMORY) * `50`) / `100`)
470	pressure_detected = FALSE;
471
472	#if PHANTOM_CACHE_DEBUG
473	sample_period_ghost_counts[sample_period_ghost_counts_indx].pressure_detected = pressure_detected;
474	#endif
475	return (pressure_detected);
476	}
477
478	/*
479	* Restart the current sampling because conditions have changed significantly,
480	* and we don't want to react to old data.
481	*
482	* This function can be called from any thread.
483	*/
484	void
485	vm_phantom_cache_restart_sample(void)
486	{
487	pc_need_eval_reset = TRUE;
488	}
489

Browse the source code of xnu/osfmk/vm/vm_phantom_cache.c