|
|
1.1 root 1: /*
2: * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3: *
4: * @APPLE_LICENSE_HEADER_START@
5: *
6: * The contents of this file constitute Original Code as defined in and
7: * are subject to the Apple Public Source License Version 1.1 (the
8: * "License"). You may not use this file except in compliance with the
9: * License. Please obtain a copy of the License at
10: * http://www.apple.com/publicsource and read it before using this file.
11: *
12: * This Original Code and all software distributed under the License are
13: * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14: * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15: * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16: * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17: * License for the specific language governing rights and limitations
18: * under the License.
19: *
20: * @APPLE_LICENSE_HEADER_END@
21: */
22: /*
23: * @OSF_COPYRIGHT@
24: */
25: /*
26: * Mach Operating System
27: * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28: * All Rights Reserved.
29: *
30: * Permission to use, copy, modify and distribute this software and its
31: * documentation is hereby granted, provided that both the copyright
32: * notice and this permission notice appear in all copies of the
33: * software, derivative works or modified versions, and any portions
34: * thereof, and that both notices appear in supporting documentation.
35: *
36: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38: * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39: *
40: * Carnegie Mellon requests users of this software to return to
41: *
42: * Software Distribution Coordinator or [email protected]
43: * School of Computer Science
44: * Carnegie Mellon University
45: * Pittsburgh PA 15213-3890
46: *
47: * any improvements or extensions that they make and grant Carnegie Mellon
48: * the rights to redistribute these changes.
49: */
50: /*
51: */
52: /*
53: * File: vm_fault.c
54: * Author: Avadis Tevanian, Jr., Michael Wayne Young
55: *
56: * Page fault handling module.
57: */
58: #ifdef MACH_BSD
59: /* remove after component interface available */
60: extern int vnode_pager_workaround;
61: #endif
62:
63: #include <mach_cluster_stats.h>
64: #include <mach_pagemap.h>
65: #include <mach_kdb.h>
66:
67: #include <vm/vm_fault.h>
68: #include <mach/kern_return.h>
69: #include <mach/message.h> /* for error codes */
70: #include <kern/host_statistics.h>
71: #include <kern/counters.h>
72: #include <kern/task.h>
73: #include <kern/thread.h>
74: #include <kern/sched_prim.h>
75: #include <kern/host.h>
76: #include <kern/xpr.h>
77: #include <vm/vm_map.h>
78: #include <vm/vm_object.h>
79: #include <vm/vm_page.h>
80: #include <vm/pmap.h>
81: #include <vm/vm_pageout.h>
82: #include <mach/vm_param.h>
83: #include <mach/vm_behavior.h>
84: #include <mach/memory_object.h>
85: /* For memory_object_data_{request,unlock} */
86: #include <kern/mach_param.h>
87: #include <kern/macro_help.h>
88: #include <kern/zalloc.h>
89: #include <kern/misc_protos.h>
90:
91: #include <sys/kdebug.h>
92:
93: #define VM_FAULT_CLASSIFY 0
94: #define VM_FAULT_STATIC_CONFIG 1
95:
96: #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
97:
98: int vm_object_absent_max = 50;
99:
100: int vm_fault_debug = 0;
101: boolean_t vm_page_deactivate_behind = TRUE;
102:
103: vm_machine_attribute_val_t mv_cache_sync = MATTR_VAL_CACHE_SYNC;
104:
105: #if !VM_FAULT_STATIC_CONFIG
106: boolean_t vm_fault_dirty_handling = FALSE;
107: boolean_t vm_fault_interruptible = FALSE;
108: boolean_t software_reference_bits = TRUE;
109: #endif
110:
111: #if MACH_KDB
112: extern struct db_watchpoint *db_watchpoint_list;
113: #endif /* MACH_KDB */
114:
115: /* Forward declarations of internal routines. */
116: extern kern_return_t vm_fault_wire_fast(
117: vm_map_t map,
118: vm_offset_t va,
119: vm_map_entry_t entry);
120:
121: extern void vm_fault_continue(void);
122:
123: extern void vm_fault_copy_cleanup(
124: vm_page_t page,
125: vm_page_t top_page);
126:
127: extern void vm_fault_copy_dst_cleanup(
128: vm_page_t page);
129:
130: #if VM_FAULT_CLASSIFY
131: extern void vm_fault_classify(vm_object_t object,
132: vm_offset_t offset,
133: vm_prot_t fault_type);
134:
135: extern void vm_fault_classify_init(void);
136: #endif
137:
138: /*
139: * Routine: vm_fault_init
140: * Purpose:
141: * Initialize our private data structures.
142: */
143: void
144: vm_fault_init(void)
145: {
146: }
147:
148: /*
149: * Routine: vm_fault_cleanup
150: * Purpose:
151: * Clean up the result of vm_fault_page.
152: * Results:
153: * The paging reference for "object" is released.
154: * "object" is unlocked.
155: * If "top_page" is not null, "top_page" is
156: * freed and the paging reference for the object
157: * containing it is released.
158: *
159: * In/out conditions:
160: * "object" must be locked.
161: */
162: void
163: vm_fault_cleanup(
164: register vm_object_t object,
165: register vm_page_t top_page)
166: {
167: vm_object_paging_end(object);
168: vm_object_unlock(object);
169:
170: if (top_page != VM_PAGE_NULL) {
171: object = top_page->object;
172: vm_object_lock(object);
173: VM_PAGE_FREE(top_page);
174: vm_object_paging_end(object);
175: vm_object_unlock(object);
176: }
177: }
178:
179: #if MACH_CLUSTER_STATS
180: #define MAXCLUSTERPAGES 16
181: struct {
182: unsigned long pages_in_cluster;
183: unsigned long pages_at_higher_offsets;
184: unsigned long pages_at_lower_offsets;
185: } cluster_stats_in[MAXCLUSTERPAGES];
186: #define CLUSTER_STAT(clause) clause
187: #define CLUSTER_STAT_HIGHER(x) \
188: ((cluster_stats_in[(x)].pages_at_higher_offsets)++)
189: #define CLUSTER_STAT_LOWER(x) \
190: ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
191: #define CLUSTER_STAT_CLUSTER(x) \
192: ((cluster_stats_in[(x)].pages_in_cluster)++)
193: #else /* MACH_CLUSTER_STATS */
194: #define CLUSTER_STAT(clause)
195: #endif /* MACH_CLUSTER_STATS */
196:
197: /* XXX - temporary */
198: boolean_t vm_allow_clustered_pagein = FALSE;
199: int vm_pagein_cluster_used = 0;
200:
201: /*
202: * Prepage default sizes given VM_BEHAVIOR_DEFAULT reference behavior
203: */
204: int vm_default_ahead = 1; /* Number of pages to prepage ahead */
205: int vm_default_behind = 0; /* Number of pages to prepage behind */
206:
207: #define ALIGNED(x) (((x) & (PAGE_SIZE - 1)) == 0)
208:
209: /*
210: * Routine: vm_fault_page
211: * Purpose:
212: * Find the resident page for the virtual memory
213: * specified by the given virtual memory object
214: * and offset.
215: * Additional arguments:
216: * The required permissions for the page is given
217: * in "fault_type". Desired permissions are included
218: * in "protection". The minimum and maximum valid offsets
219: * within the object for the relevant map entry are
220: * passed in "lo_offset" and "hi_offset" respectively and
221: * the expected page reference pattern is passed in "behavior".
222: * These three parameters are used to determine pagein cluster
223: * limits.
224: *
225: * If the desired page is known to be resident (for
226: * example, because it was previously wired down), asserting
227: * the "unwiring" parameter will speed the search.
228: *
229: * If the operation can be interrupted (by thread_abort
230: * or thread_terminate), then the "interruptible"
231: * parameter should be asserted.
232: *
233: * Results:
234: * The page containing the proper data is returned
235: * in "result_page".
236: *
237: * In/out conditions:
238: * The source object must be locked and referenced,
239: * and must donate one paging reference. The reference
240: * is not affected. The paging reference and lock are
241: * consumed.
242: *
243: * If the call succeeds, the object in which "result_page"
244: * resides is left locked and holding a paging reference.
245: * If this is not the original object, a busy page in the
246: * original object is returned in "top_page", to prevent other
247: * callers from pursuing this same data, along with a paging
248: * reference for the original object. The "top_page" should
249: * be destroyed when this guarantee is no longer required.
250: * The "result_page" is also left busy. It is not removed
251: * from the pageout queues.
252: */
253:
254: vm_fault_return_t
255: vm_fault_page(
256: /* Arguments: */
257: vm_object_t first_object, /* Object to begin search */
258: vm_offset_t first_offset, /* Offset into object */
259: vm_prot_t fault_type, /* What access is requested */
260: boolean_t must_be_resident,/* Must page be resident? */
261: int interruptible, /* how may fault be interrupted? */
262: vm_offset_t lo_offset, /* Map entry start */
263: vm_offset_t hi_offset, /* Map entry end */
264: vm_behavior_t behavior, /* Page reference behavior */
265: /* Modifies in place: */
266: vm_prot_t *protection, /* Protection for mapping */
267: /* Returns: */
268: vm_page_t *result_page, /* Page found, if successful */
269: vm_page_t *top_page, /* Page in top object, if
270: * not result_page. */
271: int *type_of_fault, /* if non-null, fill in with type of fault
272: * COW, zero-fill, etc... returned in trace point */
273: /* More arguments: */
274: kern_return_t *error_code, /* code if page is in error */
275: boolean_t no_zero_fill, /* don't zero fill absent pages */
276: boolean_t data_supply) /* treat as data_supply if
277: * it is a write fault and a full
278: * page is provided */
279: {
280: register
281: vm_page_t m;
282: register
283: vm_object_t object;
284: register
285: vm_offset_t offset;
286: vm_page_t first_m;
287: vm_object_t next_object;
288: vm_object_t copy_object;
289: boolean_t look_for_page;
290: vm_prot_t access_required = fault_type;
291: vm_prot_t wants_copy_flag;
292: thread_t thread = current_thread();
293: vm_size_t cluster_size, length;
294: vm_offset_t cluster_offset;
295: vm_offset_t cluster_start, cluster_end, paging_offset;
296: vm_offset_t align_offset;
297: CLUSTER_STAT(int pages_at_higher_offsets;)
298: CLUSTER_STAT(int pages_at_lower_offsets;)
299: #ifdef MACH_BSD
300: kern_return_t vnode_pager_data_request(
301: ipc_port_t, ipc_port_t, vm_offset_t, vm_size_t, vm_prot_t);
302: #endif
303:
304: #if MACH_PAGEMAP
305: /*
306: * MACH page map - an optional optimization where a bit map is maintained
307: * by the VM subsystem for internal objects to indicate which pages of
308: * the object currently reside on backing store. This existence map
309: * duplicates information maintained by the vnode pager. It is
310: * created at the time of the first pageout against the object, i.e.
311: * at the same time pager for the object is created. The optimization
312: * is designed to eliminate pager interaction overhead, if it is
313: * 'known' that the page does not exist on backing store.
314: *
315: * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
316: * either marked as paged out in the existence map for the object or no
317: * existence map exists for the object. LOOK_FOR() is one of the
318: * criteria in the decision to invoke the pager. It is also used as one
319: * of the criteria to terminate the scan for adjacent pages in a clustered
320: * pagein operation. Note that LOOK_FOR() always evaluates to TRUE for
321: * permanent objects. Note also that if the pager for an internal object
322: * has not been created, the pager is not invoked regardless of the value
323: * of LOOK_FOR() and that clustered pagein scans are only done on an object
324: * for which a pager has been created.
325: *
326: * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
327: * is marked as paged out in the existence map for the object. PAGED_OUT()
328: * PAGED_OUT() is used to determine if a page has already been pushed
329: * into a copy object in order to avoid a redundant page out operation.
330: */
331: #define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
332: != VM_EXTERNAL_STATE_ABSENT)
333: #define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
334: == VM_EXTERNAL_STATE_EXISTS)
335: #else /* MACH_PAGEMAP */
336: /*
337: * If the MACH page map optimization is not enabled,
338: * LOOK_FOR() always evaluates to TRUE. The pager will always be
339: * invoked to resolve missing pages in an object, assuming the pager
340: * has been created for the object. In a clustered page operation, the
341: * absence of a page on backing backing store cannot be used to terminate
342: * a scan for adjacent pages since that information is available only in
343: * the pager. Hence pages that may not be paged out are potentially
344: * included in a clustered request. The vnode pager is coded to deal
345: * with any combination of absent/present pages in a clustered
346: * pagein request. PAGED_OUT() always evaluates to FALSE, i.e. the pager
347: * will always be invoked to push a dirty page into a copy object assuming
348: * a pager has been created. If the page has already been pushed, the
349: * pager will ingore the new request.
350: */
351: #define LOOK_FOR(o, f) TRUE
352: #define PAGED_OUT(o, f) FALSE
353: #endif /* MACH_PAGEMAP */
354:
355: /*
356: * Recovery actions
357: */
358: #define PREPARE_RELEASE_PAGE(m) \
359: MACRO_BEGIN \
360: vm_page_lock_queues(); \
361: MACRO_END
362:
363: #define DO_RELEASE_PAGE(m) \
364: MACRO_BEGIN \
365: PAGE_WAKEUP_DONE(m); \
366: if (!m->active && !m->inactive) \
367: vm_page_activate(m); \
368: vm_page_unlock_queues(); \
369: MACRO_END
370:
371: #define RELEASE_PAGE(m) \
372: MACRO_BEGIN \
373: PREPARE_RELEASE_PAGE(m); \
374: DO_RELEASE_PAGE(m); \
375: MACRO_END
376:
377: #if TRACEFAULTPAGE
378: dbgTrace(0xBEEF0002, (unsigned int) first_object, (unsigned int) first_offset); /* (TEST/DEBUG) */
379: #endif
380:
381:
382:
383: #if !VM_FAULT_STATIC_CONFIG
384: if (vm_fault_dirty_handling
385: #if MACH_KDB
386: /*
387: * If there are watchpoints set, then
388: * we don't want to give away write permission
389: * on a read fault. Make the task write fault,
390: * so that the watchpoint code notices the access.
391: */
392: || db_watchpoint_list
393: #endif /* MACH_KDB */
394: ) {
395: /*
396: * If we aren't asking for write permission,
397: * then don't give it away. We're using write
398: * faults to set the dirty bit.
399: */
400: if (!(fault_type & VM_PROT_WRITE))
401: *protection &= ~VM_PROT_WRITE;
402: }
403:
404: if (!vm_fault_interruptible)
405: interruptible = THREAD_UNINT;
406: #else /* STATIC_CONFIG */
407: #if MACH_KDB
408: /*
409: * If there are watchpoints set, then
410: * we don't want to give away write permission
411: * on a read fault. Make the task write fault,
412: * so that the watchpoint code notices the access.
413: */
414: if (db_watchpoint_list) {
415: /*
416: * If we aren't asking for write permission,
417: * then don't give it away. We're using write
418: * faults to set the dirty bit.
419: */
420: if (!(fault_type & VM_PROT_WRITE))
421: *protection &= ~VM_PROT_WRITE;
422: }
423:
424: interruptible = THREAD_UNINT; /* vm_fault_interruptible */
425: #endif /* MACH_KDB */
426: #endif /* STATIC_CONFIG */
427:
428: /*
429: * INVARIANTS (through entire routine):
430: *
431: * 1) At all times, we must either have the object
432: * lock or a busy page in some object to prevent
433: * some other thread from trying to bring in
434: * the same page.
435: *
436: * Note that we cannot hold any locks during the
437: * pager access or when waiting for memory, so
438: * we use a busy page then.
439: *
440: * Note also that we aren't as concerned about more than
441: * one thread attempting to memory_object_data_unlock
442: * the same page at once, so we don't hold the page
443: * as busy then, but do record the highest unlock
444: * value so far. [Unlock requests may also be delivered
445: * out of order.]
446: *
447: * 2) To prevent another thread from racing us down the
448: * shadow chain and entering a new page in the top
449: * object before we do, we must keep a busy page in
450: * the top object while following the shadow chain.
451: *
452: * 3) We must increment paging_in_progress on any object
453: * for which we have a busy page, to prevent
454: * vm_object_collapse from removing the busy page
455: * without our noticing.
456: *
457: * 4) We leave busy pages on the pageout queues.
458: * If the pageout daemon comes across a busy page,
459: * it will remove the page from the pageout queues.
460: */
461:
462: /*
463: * Search for the page at object/offset.
464: */
465:
466: object = first_object;
467: offset = first_offset;
468: first_m = VM_PAGE_NULL;
469: access_required = fault_type;
470:
471: XPR(XPR_VM_FAULT,
472: "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
473: (integer_t)object, offset, fault_type, *protection, 0);
474:
475: /*
476: * See whether this page is resident
477: */
478:
479: while (TRUE) {
480: #if TRACEFAULTPAGE
481: dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
482: #endif
483: if (!object->alive) {
484: vm_fault_cleanup(object, first_m);
485: return(VM_FAULT_MEMORY_ERROR);
486: }
487: m = vm_page_lookup(object, offset);
488: #if TRACEFAULTPAGE
489: dbgTrace(0xBEEF0004, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */
490: #endif
491: if (m != VM_PAGE_NULL) {
492: /*
493: * If the page was pre-paged as part of a
494: * cluster, record the fact.
495: */
496: if (m->clustered) {
497: vm_pagein_cluster_used++;
498: m->clustered = FALSE;
499: }
500:
501: /*
502: * If the page is being brought in,
503: * wait for it and then retry.
504: *
505: * A possible optimization: if the page
506: * is known to be resident, we can ignore
507: * pages that are absent (regardless of
508: * whether they're busy).
509: */
510:
511: if (m->busy) {
512: kern_return_t wait_result;
513:
514: #if TRACEFAULTPAGE
515: dbgTrace(0xBEEF0005, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
516: #endif
517: PAGE_ASSERT_WAIT(m, interruptible);
518: vm_object_unlock(object);
519: XPR(XPR_VM_FAULT,
520: "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
521: (integer_t)object, offset,
522: (integer_t)m, 0, 0);
523: counter(c_vm_fault_page_block_busy_kernel++);
524: thread_block((void (*)(void))0);
525:
526: wait_result = thread->wait_result;
527: vm_object_lock(object);
528: if (wait_result != THREAD_AWAKENED) {
529: vm_fault_cleanup(object, first_m);
530: if (wait_result == THREAD_RESTART)
531: {
532: return(VM_FAULT_RETRY);
533: }
534: else
535: {
536: return(VM_FAULT_INTERRUPTED);
537: }
538: }
539: continue;
540: }
541:
542: /*
543: * If the page is in error, give up now.
544: */
545:
546: if (m->error) {
547: #if TRACEFAULTPAGE
548: dbgTrace(0xBEEF0006, (unsigned int) m, (unsigned int) error_code); /* (TEST/DEBUG) */
549: #endif
550: if (error_code)
551: *error_code = m->page_error;
552: VM_PAGE_FREE(m);
553: vm_fault_cleanup(object, first_m);
554:
555: return(VM_FAULT_MEMORY_ERROR);
556: }
557:
558: /*
559: * If the pager wants us to restart
560: * at the top of the chain,
561: * typically because it has moved the
562: * page to another pager, then do so.
563: */
564:
565: if (m->restart) {
566: #if TRACEFAULTPAGE
567: dbgTrace(0xBEEF0007, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
568: #endif
569: VM_PAGE_FREE(m);
570: vm_fault_cleanup(object, first_m);
571:
572: return(VM_FAULT_RETRY);
573: }
574:
575: /*
576: * If the page isn't busy, but is absent,
577: * then it was deemed "unavailable".
578: */
579:
580: if (m->absent) {
581: /*
582: * Remove the non-existent page (unless it's
583: * in the top object) and move on down to the
584: * next object (if there is one).
585: */
586: #if TRACEFAULTPAGE
587: dbgTrace(0xBEEF0008, (unsigned int) m, (unsigned int) object->shadow); /* (TEST/DEBUG) */
588: #endif
589:
590: next_object = object->shadow;
591: if (next_object == VM_OBJECT_NULL) {
592: vm_page_t real_m;
593:
594: assert(!must_be_resident);
595:
596: /*
597: * Absent page at bottom of shadow
598: * chain; zero fill the page we left
599: * busy in the first object, and flush
600: * the absent page. But first we
601: * need to allocate a real page.
602: */
603: if ((vm_page_free_target -
604: ((vm_page_free_target
605: -vm_page_free_min)>>2))
606: > vm_page_free_count) {
607: assert(object->ref_count > 0);
608: vm_fault_cleanup(
609: object, first_m);
610: /* kick off pageout daemon */
611: vm_page_wait();
612: if ((m = vm_page_grab())
613: != VM_PAGE_NULL) {
614: /* need to kick off */
615: /* other parties */
616: /* waiting on free */
617: /* pages */
618: VM_PAGE_FREE(m);
619: }
620: return VM_FAULT_RETRY;
621: }
622:
623: real_m = vm_page_grab();
624: if (real_m == VM_PAGE_NULL) {
625: vm_fault_cleanup(object, first_m);
626: return(VM_FAULT_MEMORY_SHORTAGE);
627: }
628:
629: XPR(XPR_VM_FAULT,
630: "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
631: (integer_t)object, offset,
632: (integer_t)m,
633: (integer_t)first_object, 0);
634: if (object != first_object) {
635: VM_PAGE_FREE(m);
636: vm_object_paging_end(object);
637: vm_object_unlock(object);
638: object = first_object;
639: offset = first_offset;
640: m = first_m;
641: first_m = VM_PAGE_NULL;
642: vm_object_lock(object);
643: }
644:
645: VM_PAGE_FREE(m);
646: assert(real_m->busy);
647: vm_page_insert(real_m, object, offset);
648: m = real_m;
649:
650: /*
651: * Drop the lock while zero filling
652: * page. Then break because this
653: * is the page we wanted. Checking
654: * the page lock is a waste of time;
655: * this page was either absent or
656: * newly allocated -- in both cases
657: * it can't be page locked by a pager.
658: */
659: if (!no_zero_fill) {
660: vm_object_unlock(object);
661: vm_page_zero_fill(m);
662: if (type_of_fault)
663: *type_of_fault = DBG_ZERO_FILL_FAULT;
664: VM_STAT(zero_fill_count++);
665: vm_object_lock(object);
666: }
667: pmap_clear_modify(m->phys_addr);
668: vm_page_lock_queues();
669: VM_PAGE_QUEUES_REMOVE(m);
670: queue_enter(&vm_page_queue_inactive,
671: m, vm_page_t, pageq);
672: m->inactive = TRUE;
673: vm_page_inactive_count++;
674: vm_page_unlock_queues();
675: break;
676: } else {
677: if (must_be_resident) {
678: vm_object_paging_end(object);
679: } else if (object != first_object) {
680: vm_object_paging_end(object);
681: VM_PAGE_FREE(m);
682: } else {
683: first_m = m;
684: m->absent = FALSE;
685: m->unusual = FALSE;
686: vm_object_absent_release(object);
687: m->busy = TRUE;
688:
689: vm_page_lock_queues();
690: VM_PAGE_QUEUES_REMOVE(m);
691: vm_page_unlock_queues();
692: }
693: XPR(XPR_VM_FAULT,
694: "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
695: (integer_t)object, offset,
696: (integer_t)next_object,
697: offset+object->shadow_offset,0);
698: offset += object->shadow_offset;
699: hi_offset += object->shadow_offset;
700: lo_offset += object->shadow_offset;
701: access_required = VM_PROT_READ;
702: vm_object_lock(next_object);
703: vm_object_unlock(object);
704: object = next_object;
705: vm_object_paging_begin(object);
706: continue;
707: }
708: }
709:
710: if ((m->cleaning)
711: && ((object != first_object) ||
712: (object->copy != VM_OBJECT_NULL))
713: && (fault_type & VM_PROT_WRITE)) {
714: /*
715: * This is a copy-on-write fault that will
716: * cause us to revoke access to this page, but
717: * this page is in the process of being cleaned
718: * in a clustered pageout. We must wait until
719: * the cleaning operation completes before
720: * revoking access to the original page,
721: * otherwise we might attempt to remove a
722: * wired mapping.
723: */
724: #if TRACEFAULTPAGE
725: dbgTrace(0xBEEF0009, (unsigned int) m, (unsigned int) offset); /* (TEST/DEBUG) */
726: #endif
727: XPR(XPR_VM_FAULT,
728: "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
729: (integer_t)object, offset,
730: (integer_t)m, 0, 0);
731: /* take an extra ref so that object won't die */
732: assert(object->ref_count > 0);
733: object->ref_count++;
734: vm_object_res_reference(object);
735: vm_fault_cleanup(object, first_m);
736: counter(c_vm_fault_page_block_backoff_kernel++);
737: vm_object_lock(object);
738: assert(object->ref_count > 0);
739: m = vm_page_lookup(object, offset);
740: if (m != VM_PAGE_NULL && m->cleaning) {
741: PAGE_ASSERT_WAIT(m, interruptible);
742: vm_object_unlock(object);
743: thread_block((void (*)(void)) 0);
744: vm_object_deallocate(object);
745: goto backoff;
746: } else {
747: vm_object_unlock(object);
748: vm_object_deallocate(object);
749: return VM_FAULT_RETRY;
750: }
751: }
752:
753: /*
754: * If the desired access to this page has
755: * been locked out, request that it be unlocked.
756: */
757:
758: if (access_required & m->page_lock) {
759: if ((access_required & m->unlock_request) != access_required) {
760: vm_prot_t new_unlock_request;
761: kern_return_t rc;
762:
763: #if TRACEFAULTPAGE
764: dbgTrace(0xBEEF000A, (unsigned int) m, (unsigned int) object->pager_ready); /* (TEST/DEBUG) */
765: #endif
766: if (!object->pager_ready) {
767: XPR(XPR_VM_FAULT,
768: "vm_f_page: ready wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
769: access_required,
770: (integer_t)object, offset,
771: (integer_t)m, 0);
772: /* take an extra ref */
773: assert(object->ref_count > 0);
774: object->ref_count++;
775: vm_object_res_reference(object);
776: vm_fault_cleanup(object,
777: first_m);
778: counter(c_vm_fault_page_block_backoff_kernel++);
779: vm_object_lock(object);
780: assert(object->ref_count > 0);
781: if (!object->pager_ready) {
782: vm_object_assert_wait(
783: object,
784: VM_OBJECT_EVENT_PAGER_READY,
785: interruptible);
786: vm_object_unlock(object);
787: thread_block((void (*)(void))0);
788: vm_object_deallocate(object);
789: goto backoff;
790: } else {
791: vm_object_unlock(object);
792: vm_object_deallocate(object);
793: return VM_FAULT_RETRY;
794: }
795: }
796:
797: new_unlock_request = m->unlock_request =
798: (access_required | m->unlock_request);
799: vm_object_unlock(object);
800: XPR(XPR_VM_FAULT,
801: "vm_f_page: unlock obj 0x%X, offset 0x%X, page 0x%X, unl_req %d\n",
802: (integer_t)object, offset,
803: (integer_t)m, new_unlock_request, 0);
804: if ((rc = memory_object_data_unlock(
805: object->pager,
806: object->pager_request,
807: offset + object->paging_offset,
808: PAGE_SIZE,
809: new_unlock_request))
810: != KERN_SUCCESS) {
811: if (vm_fault_debug)
812: printf("vm_fault: memory_object_data_unlock failed\n");
813: vm_object_lock(object);
814: vm_fault_cleanup(object, first_m);
815:
816: return((rc == MACH_SEND_INTERRUPTED) ?
817: VM_FAULT_INTERRUPTED :
818: VM_FAULT_MEMORY_ERROR);
819: }
820: vm_object_lock(object);
821: continue;
822: }
823:
824: XPR(XPR_VM_FAULT,
825: "vm_f_page: access wait acc_req %d, obj 0x%X, offset 0x%X, page 0x%X\n",
826: access_required, (integer_t)object,
827: offset, (integer_t)m, 0);
828: /* take an extra ref so object won't die */
829: assert(object->ref_count > 0);
830: object->ref_count++;
831: vm_object_res_reference(object);
832: vm_fault_cleanup(object, first_m);
833: counter(c_vm_fault_page_block_backoff_kernel++);
834: vm_object_lock(object);
835: assert(object->ref_count > 0);
836: m = vm_page_lookup(object, offset);
837: if (m != VM_PAGE_NULL &&
838: (access_required & m->page_lock) &&
839: !((access_required & m->unlock_request) != access_required)) {
840: PAGE_ASSERT_WAIT(m, interruptible);
841: vm_object_unlock(object);
842: thread_block((void (*)(void)) 0);
843: vm_object_deallocate(object);
844: goto backoff;
845: } else {
846: vm_object_unlock(object);
847: vm_object_deallocate(object);
848: return VM_FAULT_RETRY;
849: }
850: }
851: /*
852: * We mark the page busy and leave it on
853: * the pageout queues. If the pageout
854: * deamon comes across it, then it will
855: * remove the page.
856: */
857:
858: #if TRACEFAULTPAGE
859: dbgTrace(0xBEEF000B, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
860: #endif
861:
862: #if !VM_FAULT_STATIC_CONFIG
863: if (!software_reference_bits) {
864: vm_page_lock_queues();
865: if (m->inactive)
866: vm_stat.reactivations++;
867:
868: VM_PAGE_QUEUES_REMOVE(m);
869: vm_page_unlock_queues();
870: }
871: #endif
872: XPR(XPR_VM_FAULT,
873: "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
874: (integer_t)object, offset, (integer_t)m, 0, 0);
875: assert(!m->busy);
876: m->busy = TRUE;
877: assert(!m->absent);
878: break;
879: }
880:
881: look_for_page =
882: (object->pager_created) &&
883: LOOK_FOR(object, offset) &&
884: (!data_supply);
885:
886: #if TRACEFAULTPAGE
887: dbgTrace(0xBEEF000C, (unsigned int) look_for_page, (unsigned int) object); /* (TEST/DEBUG) */
888: #endif
889: if ((look_for_page || (object == first_object))
890: && !must_be_resident) {
891: /*
892: * Allocate a new page for this object/offset
893: * pair.
894: */
895:
896: m = vm_page_grab_fictitious();
897: #if TRACEFAULTPAGE
898: dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */
899: #endif
900: if (m == VM_PAGE_NULL) {
901: vm_fault_cleanup(object, first_m);
902: return(VM_FAULT_FICTITIOUS_SHORTAGE);
903: }
904: vm_page_insert(m, object, offset);
905: }
906:
907: if (look_for_page && !must_be_resident) {
908: kern_return_t rc;
909:
910: /*
911: * If the memory manager is not ready, we
912: * cannot make requests.
913: */
914: if (!object->pager_ready) {
915: #if TRACEFAULTPAGE
916: dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
917: #endif
918: VM_PAGE_FREE(m);
919: XPR(XPR_VM_FAULT,
920: "vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
921: (integer_t)object, offset, 0, 0, 0);
922: /* take an extra ref so object won't die */
923: assert(object->ref_count > 0);
924: object->ref_count++;
925: vm_object_res_reference(object);
926: vm_fault_cleanup(object, first_m);
927: counter(c_vm_fault_page_block_backoff_kernel++);
928: vm_object_lock(object);
929: assert(object->ref_count > 0);
930: if (!object->pager_ready) {
931: vm_object_assert_wait(object,
932: VM_OBJECT_EVENT_PAGER_READY,
933: interruptible);
934: vm_object_unlock(object);
935: thread_block((void (*)(void))0);
936: vm_object_deallocate(object);
937: goto backoff;
938: } else {
939: vm_object_unlock(object);
940: vm_object_deallocate(object);
941: return VM_FAULT_RETRY;
942: }
943: }
944:
945: if (object->internal) {
946: /*
947: * Requests to the default pager
948: * must reserve a real page in advance,
949: * because the pager's data-provided
950: * won't block for pages. IMPORTANT:
951: * this acts as a throttling mechanism
952: * for data_requests to the default
953: * pager.
954: */
955:
956: #if TRACEFAULTPAGE
957: dbgTrace(0xBEEF000F, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
958: #endif
959: if (m->fictitious && !vm_page_convert(m)) {
960: VM_PAGE_FREE(m);
961: vm_fault_cleanup(object, first_m);
962: return(VM_FAULT_MEMORY_SHORTAGE);
963: }
964: } else if (object->absent_count >
965: vm_object_absent_max) {
966: /*
967: * If there are too many outstanding page
968: * requests pending on this object, we
969: * wait for them to be resolved now.
970: */
971:
972: #if TRACEFAULTPAGE
973: dbgTrace(0xBEEF0010, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
974: #endif
975: VM_PAGE_FREE(m);
976: /* take an extra ref so object won't die */
977: assert(object->ref_count > 0);
978: object->ref_count++;
979: vm_object_res_reference(object);
980: vm_fault_cleanup(object, first_m);
981: counter(c_vm_fault_page_block_backoff_kernel++);
982: vm_object_lock(object);
983: assert(object->ref_count > 0);
984: if (object->absent_count > vm_object_absent_max) {
985: vm_object_absent_assert_wait(object,
986: interruptible);
987: vm_object_unlock(object);
988: thread_block((void (*)(void))0);
989: vm_object_deallocate(object);
990: goto backoff;
991: } else {
992: vm_object_unlock(object);
993: vm_object_deallocate(object);
994: return VM_FAULT_RETRY;
995: }
996: }
997:
998: /*
999: * Indicate that the page is waiting for data
1000: * from the memory manager.
1001: */
1002:
1003: m->list_req_pending = TRUE;
1004: m->absent = TRUE;
1005: m->unusual = TRUE;
1006: object->absent_count++;
1007:
1008: cluster_start = offset;
1009: length = PAGE_SIZE;
1010: cluster_size = object->cluster_size;
1011:
1012: /*
1013: * Skip clustered pagein if it is globally disabled
1014: * or random page reference behavior is expected
1015: * for the address range containing the faulting
1016: * address or the object paging block size is
1017: * equal to the page size.
1018: */
1019: if (!vm_allow_clustered_pagein ||
1020: behavior == VM_BEHAVIOR_RANDOM ||
1021: cluster_size == PAGE_SIZE)
1022: goto no_clustering;
1023:
1024: assert(offset >= lo_offset);
1025: assert(offset < hi_offset);
1026: assert(ALIGNED(object->paging_offset));
1027: assert(cluster_size >= PAGE_SIZE);
1028:
1029: #if TRACEFAULTPAGE
1030: dbgTrace(0xBEEF0011, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */
1031: #endif
1032: /*
1033: * Decide whether to scan ahead or behind for
1034: * additional pages contiguous to the faulted
1035: * page in the same paging block. The decision
1036: * is based on system wide globals and the
1037: * expected page reference behavior of the
1038: * address range contained the faulting address.
1039: * First calculate some constants.
1040: */
1041: paging_offset = offset + object->paging_offset;
1042: cluster_offset = paging_offset & (cluster_size - 1);
1043: align_offset = paging_offset&(PAGE_SIZE-1);
1044: if (align_offset != 0) {
1045: cluster_offset = trunc_page(cluster_offset);
1046: }
1047:
1048: #define SPANS_CLUSTER(x) ((((x) - align_offset) & (cluster_size - 1)) == 0)
1049:
1050: /*
1051: * Backward scan only if reverse sequential
1052: * behavior has been specified
1053: */
1054: CLUSTER_STAT(pages_at_lower_offsets = 0;)
1055: if (((vm_default_behind != 0 &&
1056: behavior == VM_BEHAVIOR_DEFAULT) ||
1057: behavior == VM_BEHAVIOR_RSEQNTL) && offset) {
1058: vm_offset_t cluster_bot;
1059:
1060: /*
1061: * Calculate lower search boundary.
1062: * Exclude pages that span a cluster boundary.
1063: * Clip to start of map entry.
1064: * For default page reference behavior, scan
1065: * default pages behind.
1066: */
1067: cluster_bot = (offset > cluster_offset) ?
1068: offset - cluster_offset : offset;
1069: if (align_offset != 0) {
1070: if ((cluster_bot < offset) &&
1071: SPANS_CLUSTER(cluster_bot)) {
1072: cluster_bot += PAGE_SIZE;
1073: }
1074: }
1075: if (behavior == VM_BEHAVIOR_DEFAULT) {
1076: vm_offset_t bot = vm_default_behind*PAGE_SIZE;
1077:
1078: if (cluster_bot < (offset - bot))
1079: cluster_bot = offset - bot;
1080: }
1081: if (lo_offset > cluster_bot)
1082: cluster_bot = lo_offset;
1083:
1084: for ( cluster_start = offset - PAGE_SIZE;
1085: (cluster_start >= cluster_bot) &&
1086: (cluster_start != (align_offset - PAGE_SIZE));
1087: cluster_start -= PAGE_SIZE) {
1088: assert(cluster_size > PAGE_SIZE);
1089: retry_cluster_backw:
1090: if (!LOOK_FOR(object, cluster_start) ||
1091: vm_page_lookup(object, cluster_start)
1092: != VM_PAGE_NULL) {
1093: break;
1094: }
1095: if (object->internal) {
1096: /*
1097: * need to acquire a real page in
1098: * advance because this acts as
1099: * a throttling mechanism for
1100: * data_requests to the default
1101: * pager. If this fails, give up
1102: * trying to find any more pages
1103: * in the cluster and send off the
1104: * request for what we already have.
1105: */
1106: if ((m = vm_page_grab())
1107: == VM_PAGE_NULL) {
1108: cluster_start += PAGE_SIZE;
1109: cluster_end = offset + PAGE_SIZE;
1110: goto give_up;
1111: }
1112: } else if ((m = vm_page_grab_fictitious())
1113: == VM_PAGE_NULL) {
1114: vm_object_unlock(object);
1115: vm_page_more_fictitious();
1116: vm_object_lock(object);
1117: goto retry_cluster_backw;
1118: }
1119: m->absent = TRUE;
1120: m->unusual = TRUE;
1121: m->clustered = TRUE;
1122: m->list_req_pending = TRUE;
1123:
1124: vm_page_insert(m, object, cluster_start);
1125: CLUSTER_STAT(pages_at_lower_offsets++;)
1126: object->absent_count++;
1127: }
1128: cluster_start += PAGE_SIZE;
1129: assert(cluster_start >= cluster_bot);
1130: }
1131: assert(cluster_start <= offset);
1132:
1133: /*
1134: * Forward scan if default or sequential behavior
1135: * specified
1136: */
1137: CLUSTER_STAT(pages_at_higher_offsets = 0;)
1138: if ((behavior == VM_BEHAVIOR_DEFAULT &&
1139: vm_default_ahead != 0) ||
1140: behavior == VM_BEHAVIOR_SEQUENTIAL) {
1141: vm_offset_t cluster_top;
1142:
1143: /*
1144: * Calculate upper search boundary.
1145: * Exclude pages that span a cluster boundary.
1146: * Clip to end of map entry.
1147: * For default page reference behavior, scan
1148: * default pages ahead.
1149: */
1150: cluster_top = (offset + cluster_size) -
1151: cluster_offset;
1152: if (align_offset != 0) {
1153: if ((cluster_top > (offset + PAGE_SIZE)) &&
1154: SPANS_CLUSTER(cluster_top)) {
1155: cluster_top -= PAGE_SIZE;
1156: }
1157: }
1158: if (behavior == VM_BEHAVIOR_DEFAULT) {
1159: vm_offset_t top = (vm_default_ahead*PAGE_SIZE)+
1160: PAGE_SIZE;
1161:
1162: if (cluster_top > (offset + top))
1163: cluster_top = offset + top;
1164: }
1165: if (cluster_top > hi_offset)
1166: cluster_top = hi_offset;
1167:
1168: for (cluster_end = offset + PAGE_SIZE;
1169: cluster_end < cluster_top;
1170: cluster_end += PAGE_SIZE) {
1171: assert(cluster_size > PAGE_SIZE);
1172: retry_cluster_forw:
1173: if (!LOOK_FOR(object, cluster_end) ||
1174: vm_page_lookup(object, cluster_end)
1175: != VM_PAGE_NULL) {
1176: break;
1177: }
1178: if (object->internal) {
1179: /*
1180: * need to acquire a real page in
1181: * advance because this acts as
1182: * a throttling mechanism for
1183: * data_requests to the default
1184: * pager. If this fails, give up
1185: * trying to find any more pages
1186: * in the cluster and send off the
1187: * request for what we already have.
1188: */
1189: if ((m = vm_page_grab())
1190: == VM_PAGE_NULL) {
1191: break;
1192: }
1193: } else if ((m = vm_page_grab_fictitious())
1194: == VM_PAGE_NULL) {
1195: vm_object_unlock(object);
1196: vm_page_more_fictitious();
1197: vm_object_lock(object);
1198: goto retry_cluster_forw;
1199: }
1200: m->absent = TRUE;
1201: m->unusual = TRUE;
1202: m->clustered = TRUE;
1203: m->list_req_pending = TRUE;
1204:
1205: vm_page_insert(m, object, cluster_end);
1206: CLUSTER_STAT(pages_at_higher_offsets++;)
1207: object->absent_count++;
1208: }
1209: assert(cluster_end <= cluster_top);
1210: }
1211: else {
1212: cluster_end = offset + PAGE_SIZE;
1213: }
1214: give_up:
1215: assert(cluster_end >= offset + PAGE_SIZE);
1216: length = cluster_end - cluster_start;
1217:
1218: #if MACH_CLUSTER_STATS
1219: CLUSTER_STAT_HIGHER(pages_at_higher_offsets);
1220: CLUSTER_STAT_LOWER(pages_at_lower_offsets);
1221: CLUSTER_STAT_CLUSTER(length/PAGE_SIZE);
1222: #endif /* MACH_CLUSTER_STATS */
1223:
1224: no_clustering:
1225: #if TRACEFAULTPAGE
1226: dbgTrace(0xBEEF0012, (unsigned int) object, (unsigned int) 0); /* (TEST/DEBUG) */
1227: #endif
1228: /*
1229: * We have a busy page, so we can
1230: * release the object lock.
1231: */
1232: vm_object_unlock(object);
1233:
1234: /*
1235: * Call the memory manager to retrieve the data.
1236: */
1237:
1238: if (type_of_fault)
1239: *type_of_fault = DBG_PAGEIN_FAULT;
1240: VM_STAT(pageins++);
1241: current_task()->pageins++;
1242:
1243: /*
1244: * If this object uses a copy_call strategy,
1245: * and we are interested in a copy of this object
1246: * (having gotten here only by following a
1247: * shadow chain), then tell the memory manager
1248: * via a flag added to the desired_access
1249: * parameter, so that it can detect a race
1250: * between our walking down the shadow chain
1251: * and its pushing pages up into a copy of
1252: * the object that it manages.
1253: */
1254:
1255: if (object->copy_strategy == MEMORY_OBJECT_COPY_CALL &&
1256: object != first_object) {
1257: wants_copy_flag = VM_PROT_WANTS_COPY;
1258: } else {
1259: wants_copy_flag = VM_PROT_NONE;
1260: }
1261:
1262: XPR(XPR_VM_FAULT,
1263: "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1264: (integer_t)object, offset, (integer_t)m,
1265: access_required | wants_copy_flag, 0);
1266:
1267: #ifdef MACH_BSD
1268: if (((rpc_subsystem_t)pager_mux_hash_lookup(object->pager)) ==
1269: ((rpc_subsystem_t) &vnode_pager_workaround)) {
1270: rc = vnode_pager_data_request(object->pager,
1271: object->pager_request,
1272: cluster_start + object->paging_offset,
1273: length,
1274: access_required | wants_copy_flag);
1275: } else {
1276: rc = memory_object_data_request(object->pager,
1277: object->pager_request,
1278: cluster_start + object->paging_offset,
1279: length,
1280: access_required | wants_copy_flag);
1281: }
1282: #else
1283: rc = memory_object_data_request(object->pager,
1284: object->pager_request,
1285: cluster_start + object->paging_offset,
1286: length,
1287: access_required | wants_copy_flag);
1288:
1289: #endif
1290:
1291: #if TRACEFAULTPAGE
1292: dbgTrace(0xBEEF0013, (unsigned int) object, (unsigned int) rc); /* (TEST/DEBUG) */
1293: #endif
1294: if (rc != KERN_SUCCESS) {
1295: if (rc != MACH_SEND_INTERRUPTED
1296: && vm_fault_debug)
1297: printf("%s(0x%x, 0x%x, 0x%x, 0x%x, 0x%x) failed, rc=%d, object=0x%x\n",
1298: "memory_object_data_request",
1299: object->pager,
1300: object->pager_request,
1301: cluster_start + object->paging_offset,
1302: length, access_required,
1303: rc, object);
1304: /*
1305: * Don't want to leave a busy page around,
1306: * but the data request may have blocked,
1307: * so check if it's still there and busy.
1308: */
1309: vm_object_lock(object);
1310: for (; length;
1311: length -= PAGE_SIZE,
1312: cluster_start += PAGE_SIZE) {
1313: vm_page_t p;
1314: if ((p = vm_page_lookup(object,
1315: cluster_start))
1316: && p->absent && p->busy
1317: && p != first_m) {
1318: VM_PAGE_FREE(m);
1319: }
1320: }
1321: vm_fault_cleanup(object, first_m);
1322:
1323: return((rc == MACH_SEND_INTERRUPTED) ?
1324: VM_FAULT_INTERRUPTED :
1325: VM_FAULT_MEMORY_ERROR);
1326: }
1327:
1328: /*
1329: * Retry with same object/offset, since new data may
1330: * be in a different page (i.e., m is meaningless at
1331: * this point).
1332: */
1333: vm_object_lock(object);
1334: continue;
1335: }
1336:
1337: /*
1338: * The only case in which we get here is if
1339: * object has no pager (or unwiring). If the pager doesn't
1340: * have the page this is handled in the m->absent case above
1341: * (and if you change things here you should look above).
1342: */
1343: #if TRACEFAULTPAGE
1344: dbgTrace(0xBEEF0014, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1345: #endif
1346: if (object == first_object)
1347: first_m = m;
1348: else
1349: assert(m == VM_PAGE_NULL);
1350:
1351: XPR(XPR_VM_FAULT,
1352: "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1353: (integer_t)object, offset, (integer_t)m,
1354: (integer_t)object->shadow, 0);
1355: /*
1356: * Move on to the next object. Lock the next
1357: * object before unlocking the current one.
1358: */
1359: next_object = object->shadow;
1360: if (next_object == VM_OBJECT_NULL) {
1361: assert(!must_be_resident);
1362:
1363: /*
1364: * If there's no object left, fill the page
1365: * in the top object with zeros. But first we
1366: * need to allocate a real page.
1367: */
1368:
1369: if (object != first_object) {
1370: vm_object_paging_end(object);
1371: vm_object_unlock(object);
1372:
1373: object = first_object;
1374: offset = first_offset;
1375: vm_object_lock(object);
1376: }
1377:
1378: m = first_m;
1379: assert(m->object == object);
1380: first_m = VM_PAGE_NULL;
1381:
1382:
1383: if ((vm_page_free_target -
1384: ((vm_page_free_target-vm_page_free_min)>>2))
1385: > vm_page_free_count) {
1386: VM_PAGE_FREE(m);
1387: /* take an extra ref so object won't die */
1388: assert(object->ref_count > 0);
1389: vm_fault_cleanup(object, first_m);
1390: vm_page_wait(); /* kick off pageout daemon */
1391: if ((m = vm_page_grab()) != VM_PAGE_NULL) {
1392: /* need to kick off other parties */
1393: /* waiting on free pages */
1394: VM_PAGE_FREE(m);
1395: }
1396: return VM_FAULT_RETRY;
1397: }
1398:
1399: if (m->fictitious && !vm_page_convert(m)) {
1400: VM_PAGE_FREE(m);
1401: vm_fault_cleanup(object, VM_PAGE_NULL);
1402: return(VM_FAULT_MEMORY_SHORTAGE);
1403: }
1404:
1405: if (!no_zero_fill) {
1406: vm_object_unlock(object);
1407: vm_page_zero_fill(m);
1408: if (type_of_fault)
1409: *type_of_fault = DBG_ZERO_FILL_FAULT;
1410: VM_STAT(zero_fill_count++);
1411: vm_object_lock(object);
1412: }
1413: vm_page_lock_queues();
1414: VM_PAGE_QUEUES_REMOVE(m);
1415: queue_enter(&vm_page_queue_inactive,
1416: m, vm_page_t, pageq);
1417: m->inactive = TRUE;
1418: vm_page_inactive_count++;
1419: vm_page_unlock_queues();
1420: pmap_clear_modify(m->phys_addr);
1421: break;
1422: }
1423: else {
1424: if ((object != first_object) || must_be_resident)
1425: vm_object_paging_end(object);
1426: offset += object->shadow_offset;
1427: hi_offset += object->shadow_offset;
1428: lo_offset += object->shadow_offset;
1429: access_required = VM_PROT_READ;
1430: vm_object_lock(next_object);
1431: vm_object_unlock(object);
1432: object = next_object;
1433: vm_object_paging_begin(object);
1434: }
1435: }
1436:
1437: /*
1438: * PAGE HAS BEEN FOUND.
1439: *
1440: * This page (m) is:
1441: * busy, so that we can play with it;
1442: * not absent, so that nobody else will fill it;
1443: * possibly eligible for pageout;
1444: *
1445: * The top-level page (first_m) is:
1446: * VM_PAGE_NULL if the page was found in the
1447: * top-level object;
1448: * busy, not absent, and ineligible for pageout.
1449: *
1450: * The current object (object) is locked. A paging
1451: * reference is held for the current and top-level
1452: * objects.
1453: */
1454:
1455: #if TRACEFAULTPAGE
1456: dbgTrace(0xBEEF0015, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1457: #endif
1458: #if EXTRA_ASSERTIONS
1459: assert(m->busy && !m->absent);
1460: assert((first_m == VM_PAGE_NULL) ||
1461: (first_m->busy && !first_m->absent &&
1462: !first_m->active && !first_m->inactive));
1463: #endif /* EXTRA_ASSERTIONS */
1464:
1465: XPR(XPR_VM_FAULT,
1466: "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1467: (integer_t)object, offset, (integer_t)m,
1468: (integer_t)first_object, (integer_t)first_m);
1469: /*
1470: * If the page is being written, but isn't
1471: * already owned by the top-level object,
1472: * we have to copy it into a new page owned
1473: * by the top-level object.
1474: */
1475:
1476: if (object != first_object) {
1477: /*
1478: * We only really need to copy if we
1479: * want to write it.
1480: */
1481:
1482: #if TRACEFAULTPAGE
1483: dbgTrace(0xBEEF0016, (unsigned int) object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1484: #endif
1485: if (fault_type & VM_PROT_WRITE) {
1486: vm_page_t copy_m;
1487:
1488: assert(!must_be_resident);
1489:
1490: /*
1491: * If we try to collapse first_object at this
1492: * point, we may deadlock when we try to get
1493: * the lock on an intermediate object (since we
1494: * have the bottom object locked). We can't
1495: * unlock the bottom object, because the page
1496: * we found may move (by collapse) if we do.
1497: *
1498: * Instead, we first copy the page. Then, when
1499: * we have no more use for the bottom object,
1500: * we unlock it and try to collapse.
1501: *
1502: * Note that we copy the page even if we didn't
1503: * need to... that's the breaks.
1504: */
1505:
1506: /*
1507: * Allocate a page for the copy
1508: */
1509: copy_m = vm_page_grab();
1510: if (copy_m == VM_PAGE_NULL) {
1511: RELEASE_PAGE(m);
1512: vm_fault_cleanup(object, first_m);
1513: return(VM_FAULT_MEMORY_SHORTAGE);
1514: }
1515:
1516: vm_object_unlock(object);
1517:
1518: XPR(XPR_VM_FAULT,
1519: "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1520: (integer_t)object, offset,
1521: (integer_t)m, (integer_t)copy_m, 0);
1522: vm_page_copy(m, copy_m);
1523: vm_object_lock(object);
1524:
1525: /*
1526: * If another map is truly sharing this
1527: * page with us, we have to flush all
1528: * uses of the original page, since we
1529: * can't distinguish those which want the
1530: * original from those which need the
1531: * new copy.
1532: *
1533: * XXXO If we know that only one map has
1534: * access to this page, then we could
1535: * avoid the pmap_page_protect() call.
1536: */
1537:
1538: vm_page_lock_queues();
1539: assert(!m->cleaning);
1540: pmap_page_protect(m->phys_addr, VM_PROT_NONE);
1541: vm_page_deactivate(m);
1542: copy_m->dirty = TRUE;
1543: /*
1544: * Setting reference here prevents this fault from
1545: * being counted as a (per-thread) reactivate as well
1546: * as a copy-on-write.
1547: */
1548: first_m->reference = TRUE;
1549: vm_page_unlock_queues();
1550:
1551: /*
1552: * We no longer need the old page or object.
1553: */
1554:
1555: PAGE_WAKEUP_DONE(m);
1556: vm_object_paging_end(object);
1557: vm_object_unlock(object);
1558:
1559: if (type_of_fault)
1560: *type_of_fault = DBG_COW_FAULT;
1561: VM_STAT(cow_faults++);
1562: current_task()->cow_faults++;
1563: object = first_object;
1564: offset = first_offset;
1565:
1566: vm_object_lock(object);
1567: VM_PAGE_FREE(first_m);
1568: first_m = VM_PAGE_NULL;
1569: assert(copy_m->busy);
1570: vm_page_insert(copy_m, object, offset);
1571: m = copy_m;
1572:
1573: /*
1574: * Now that we've gotten the copy out of the
1575: * way, let's try to collapse the top object.
1576: * But we have to play ugly games with
1577: * paging_in_progress to do that...
1578: */
1579:
1580: vm_object_paging_end(object);
1581: vm_object_collapse(object);
1582: vm_object_paging_begin(object);
1583: }
1584: else {
1585: *protection &= (~VM_PROT_WRITE);
1586: }
1587: }
1588:
1589: /*
1590: * Now check whether the page needs to be pushed into the
1591: * copy object. The use of asymmetric copy on write for
1592: * shared temporary objects means that we may do two copies to
1593: * satisfy the fault; one above to get the page from a
1594: * shadowed object, and one here to push it into the copy.
1595: */
1596:
1597: while (first_object->copy_strategy == MEMORY_OBJECT_COPY_DELAY &&
1598: (copy_object = first_object->copy) != VM_OBJECT_NULL) {
1599: vm_offset_t copy_offset;
1600: vm_page_t copy_m;
1601:
1602: #if TRACEFAULTPAGE
1603: dbgTrace(0xBEEF0017, (unsigned int) copy_object, (unsigned int) fault_type); /* (TEST/DEBUG) */
1604: #endif
1605: /*
1606: * If the page is being written, but hasn't been
1607: * copied to the copy-object, we have to copy it there.
1608: */
1609:
1610: if ((fault_type & VM_PROT_WRITE) == 0) {
1611: *protection &= ~VM_PROT_WRITE;
1612: break;
1613: }
1614:
1615: /*
1616: * If the page was guaranteed to be resident,
1617: * we must have already performed the copy.
1618: */
1619:
1620: if (must_be_resident)
1621: break;
1622:
1623: /*
1624: * Try to get the lock on the copy_object.
1625: */
1626: if (!vm_object_lock_try(copy_object)) {
1627: vm_object_unlock(object);
1628:
1629: mutex_pause(); /* wait a bit */
1630:
1631: vm_object_lock(object);
1632: continue;
1633: }
1634:
1635: /*
1636: * Make another reference to the copy-object,
1637: * to keep it from disappearing during the
1638: * copy.
1639: */
1640: assert(copy_object->ref_count > 0);
1641: copy_object->ref_count++;
1642: VM_OBJ_RES_INCR(copy_object);
1643:
1644: /*
1645: * Does the page exist in the copy?
1646: */
1647: copy_offset = first_offset - copy_object->shadow_offset;
1648: if (copy_object->size <= copy_offset)
1649: /*
1650: * Copy object doesn't cover this page -- do nothing.
1651: */
1652: ;
1653: else if ((copy_m =
1654: vm_page_lookup(copy_object, copy_offset)) != VM_PAGE_NULL) {
1655: /* Page currently exists in the copy object */
1656: if (copy_m->busy) {
1657: /*
1658: * If the page is being brought
1659: * in, wait for it and then retry.
1660: */
1661: RELEASE_PAGE(m);
1662: /* take an extra ref so object won't die */
1663: assert(copy_object->ref_count > 0);
1664: copy_object->ref_count++;
1665: vm_object_res_reference(copy_object);
1666: vm_object_unlock(copy_object);
1667: vm_fault_cleanup(object, first_m);
1668: counter(c_vm_fault_page_block_backoff_kernel++);
1669: vm_object_lock(copy_object);
1670: assert(copy_object->ref_count > 0);
1671: VM_OBJ_RES_DECR(copy_object);
1672: copy_object->ref_count--;
1673: assert(copy_object->ref_count > 0);
1674: copy_m = vm_page_lookup(copy_object, copy_offset);
1675: if (copy_m != VM_PAGE_NULL &&
1676: copy_m->busy) {
1677: PAGE_ASSERT_WAIT(copy_m, interruptible);
1678: vm_object_unlock(copy_object);
1679: thread_block((void (*)(void))0);
1680: vm_object_deallocate(copy_object);
1681: goto backoff;
1682: } else {
1683: vm_object_unlock(copy_object);
1684: vm_object_deallocate(copy_object);
1685: return VM_FAULT_RETRY;
1686: }
1687: }
1688: }
1689: else if (!PAGED_OUT(copy_object, copy_offset)) {
1690: /*
1691: * If PAGED_OUT is TRUE, then the page used to exist
1692: * in the copy-object, and has already been paged out.
1693: * We don't need to repeat this. If PAGED_OUT is
1694: * FALSE, then either we don't know (!pager_created,
1695: * for example) or it hasn't been paged out.
1696: * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1697: * We must copy the page to the copy object.
1698: */
1699:
1700: /*
1701: * Allocate a page for the copy
1702: */
1703: copy_m = vm_page_alloc(copy_object, copy_offset);
1704: if (copy_m == VM_PAGE_NULL) {
1705: RELEASE_PAGE(m);
1706: VM_OBJ_RES_DECR(copy_object);
1707: copy_object->ref_count--;
1708: assert(copy_object->ref_count > 0);
1709: vm_object_unlock(copy_object);
1710: vm_fault_cleanup(object, first_m);
1711: return(VM_FAULT_MEMORY_SHORTAGE);
1712: }
1713:
1714: /*
1715: * Must copy page into copy-object.
1716: */
1717:
1718: vm_page_copy(m, copy_m);
1719:
1720: /*
1721: * If the old page was in use by any users
1722: * of the copy-object, it must be removed
1723: * from all pmaps. (We can't know which
1724: * pmaps use it.)
1725: */
1726:
1727: vm_page_lock_queues();
1728: assert(!m->cleaning);
1729: pmap_page_protect(m->phys_addr, VM_PROT_NONE);
1730: copy_m->dirty = TRUE;
1731: vm_page_unlock_queues();
1732:
1733: /*
1734: * If there's a pager, then immediately
1735: * page out this page, using the "initialize"
1736: * option. Else, we use the copy.
1737: */
1738:
1739: if
1740: #if MACH_PAGEMAP
1741: ((!copy_object->pager_created) ||
1742: vm_external_state_get(
1743: copy_object->existence_map, copy_offset)
1744: == VM_EXTERNAL_STATE_ABSENT)
1745: #else
1746: (!copy_object->pager_created)
1747: #endif
1748: {
1749: vm_page_lock_queues();
1750: vm_page_activate(copy_m);
1751: vm_page_unlock_queues();
1752: PAGE_WAKEUP_DONE(copy_m);
1753: }
1754: else {
1755: assert(copy_m->busy == TRUE);
1756:
1757: /*
1758: * The page is already ready for pageout:
1759: * not on pageout queues and busy.
1760: * Unlock everything except the
1761: * copy_object itself.
1762: */
1763:
1764: vm_object_unlock(object);
1765:
1766: /*
1767: * Write the page to the copy-object,
1768: * flushing it from the kernel.
1769: */
1770:
1771: vm_pageout_initialize_page(copy_m);
1772:
1773: /*
1774: * Since the pageout may have
1775: * temporarily dropped the
1776: * copy_object's lock, we
1777: * check whether we'll have
1778: * to deallocate the hard way.
1779: */
1780:
1781: if ((copy_object->shadow != object) ||
1782: (copy_object->ref_count == 1)) {
1783: vm_object_unlock(copy_object);
1784: vm_object_deallocate(copy_object);
1785: vm_object_lock(object);
1786: continue;
1787: }
1788:
1789: /*
1790: * Pick back up the old object's
1791: * lock. [It is safe to do so,
1792: * since it must be deeper in the
1793: * object tree.]
1794: */
1795:
1796: vm_object_lock(object);
1797: }
1798:
1799: /*
1800: * Because we're pushing a page upward
1801: * in the object tree, we must restart
1802: * any faults that are waiting here.
1803: * [Note that this is an expansion of
1804: * PAGE_WAKEUP that uses the THREAD_RESTART
1805: * wait result]. Can't turn off the page's
1806: * busy bit because we're not done with it.
1807: */
1808:
1809: if (m->wanted) {
1810: m->wanted = FALSE;
1811: thread_wakeup_with_result((event_t) m,
1812: THREAD_RESTART);
1813: }
1814: }
1815:
1816: /*
1817: * The reference count on copy_object must be
1818: * at least 2: one for our extra reference,
1819: * and at least one from the outside world
1820: * (we checked that when we last locked
1821: * copy_object).
1822: */
1823: copy_object->ref_count--;
1824: assert(copy_object->ref_count > 0);
1825: VM_OBJ_RES_DECR(copy_object);
1826: vm_object_unlock(copy_object);
1827:
1828: break;
1829: }
1830:
1831: *result_page = m;
1832: *top_page = first_m;
1833:
1834: XPR(XPR_VM_FAULT,
1835: "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1836: (integer_t)object, offset, (integer_t)m, (integer_t)first_m, 0);
1837: /*
1838: * If the page can be written, assume that it will be.
1839: * [Earlier, we restrict the permission to allow write
1840: * access only if the fault so required, so we don't
1841: * mark read-only data as dirty.]
1842: */
1843:
1844: #if !VM_FAULT_STATIC_CONFIG
1845: if (vm_fault_dirty_handling && (*protection & VM_PROT_WRITE))
1846: m->dirty = TRUE;
1847: #endif
1848: #if TRACEFAULTPAGE
1849: dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_page_deactivate_behind); /* (TEST/DEBUG) */
1850: #endif
1851: if (vm_page_deactivate_behind) {
1852: if (offset && /* don't underflow */
1853: (object->last_alloc == (offset - PAGE_SIZE))) {
1854: m = vm_page_lookup(object, object->last_alloc);
1855: if ((m != VM_PAGE_NULL) && !m->busy) {
1856: vm_page_lock_queues();
1857: vm_page_deactivate(m);
1858: vm_page_unlock_queues();
1859: }
1860: #if TRACEFAULTPAGE
1861: dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
1862: #endif
1863: }
1864: object->last_alloc = offset;
1865: }
1866: #if TRACEFAULTPAGE
1867: dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS, 0); /* (TEST/DEBUG) */
1868: #endif
1869: return(VM_FAULT_SUCCESS);
1870:
1871: #if 0
1872: block_and_backoff:
1873: vm_fault_cleanup(object, first_m);
1874:
1875: counter(c_vm_fault_page_block_backoff_kernel++);
1876: thread_block((void (*)(void))0);
1877: #endif
1878:
1879: backoff:
1880: if (thread->wait_result == THREAD_AWAKENED)
1881: {
1882: return VM_FAULT_RETRY;
1883: }
1884: else
1885: {
1886: return VM_FAULT_INTERRUPTED;
1887: }
1888:
1889: #undef RELEASE_PAGE
1890: }
1891:
1892: /*
1893: * Routine: vm_fault
1894: * Purpose:
1895: * Handle page faults, including pseudo-faults
1896: * used to change the wiring status of pages.
1897: * Returns:
1898: * Explicit continuations have been removed.
1899: * Implementation:
1900: * vm_fault and vm_fault_page save mucho state
1901: * in the moral equivalent of a closure. The state
1902: * structure is allocated when first entering vm_fault
1903: * and deallocated when leaving vm_fault.
1904: */
1905:
1906: kern_return_t
1907: vm_fault(
1908: vm_map_t map,
1909: vm_offset_t vaddr,
1910: vm_prot_t fault_type,
1911: boolean_t change_wiring)
1912: {
1913: vm_map_version_t version; /* Map version for verificiation */
1914: boolean_t wired; /* Should mapping be wired down? */
1915: vm_object_t object; /* Top-level object */
1916: vm_offset_t offset; /* Top-level offset */
1917: vm_prot_t prot; /* Protection for mapping */
1918: vm_behavior_t behavior; /* Expected paging behavior */
1919: vm_offset_t lo_offset, hi_offset;
1920: vm_object_t old_copy_object; /* Saved copy object */
1921: vm_page_t result_page; /* Result of vm_fault_page */
1922: vm_page_t top_page; /* Placeholder page */
1923: kern_return_t kr;
1924:
1925: register
1926: vm_page_t m; /* Fast access to result_page */
1927: kern_return_t error_code; /* page error reasons */
1928: register
1929: vm_object_t cur_object;
1930: register
1931: vm_offset_t cur_offset;
1932: vm_page_t cur_m;
1933: vm_object_t new_object;
1934: int type_of_fault;
1935:
1936:
1937: KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_START,
1938: vaddr,
1939: 0,
1940: 0,
1941: 0,
1942: 0);
1943: /*
1944: * assume we will hit a page in the cache
1945: * otherwise, explicitly override with
1946: * the real fault type once we determine it
1947: */
1948: type_of_fault = DBG_CACHE_HIT_FAULT;
1949:
1950: VM_STAT(faults++);
1951: current_task()->faults++;
1952:
1953: RetryFault: ;
1954:
1955: /*
1956: * Find the backing store object and offset into
1957: * it to begin the search.
1958: */
1959: vm_map_lock_read(map);
1960: kr = vm_map_lookup_locked(&map, vaddr, fault_type, &version,
1961: &object, &offset,
1962: &prot, &wired,
1963: &behavior, &lo_offset, &hi_offset);
1964:
1965: if (kr != KERN_SUCCESS) {
1966: vm_map_unlock_read(map);
1967: goto done;
1968: }
1969:
1970: /*
1971: * If the page is wired, we must fault for the current protection
1972: * value, to avoid further faults.
1973: */
1974:
1975: if (wired)
1976: fault_type = prot | VM_PROT_WRITE;
1977:
1978: #if VM_FAULT_CLASSIFY
1979: /*
1980: * Temporary data gathering code
1981: */
1982: vm_fault_classify(object, offset, fault_type);
1983: #endif
1984: /*
1985: * Fast fault code. The basic idea is to do as much as
1986: * possible while holding the map lock and object locks.
1987: * Busy pages are not used until the object lock has to
1988: * be dropped to do something (copy, zero fill, pmap enter).
1989: * Similarly, paging references aren't acquired until that
1990: * point, and object references aren't used.
1991: *
1992: * If we can figure out what to do
1993: * (zero fill, copy on write, pmap enter) while holding
1994: * the locks, then it gets done. Otherwise, we give up,
1995: * and use the original fault path (which doesn't hold
1996: * the map lock, and relies on busy pages).
1997: * The give up cases include:
1998: * - Have to talk to pager.
1999: * - Page is busy, absent or in error.
2000: * - Pager has locked out desired access.
2001: * - Fault needs to be restarted.
2002: * - Have to push page into copy object.
2003: *
2004: * The code is an infinite loop that moves one level down
2005: * the shadow chain each time. cur_object and cur_offset
2006: * refer to the current object being examined. object and offset
2007: * are the original object from the map. The loop is at the
2008: * top level if and only if object and cur_object are the same.
2009: *
2010: * Invariants: Map lock is held throughout. Lock is held on
2011: * original object and cur_object (if different) when
2012: * continuing or exiting loop.
2013: *
2014: */
2015:
2016:
2017: /*
2018: * If this page is to be inserted in a copy delay object
2019: * for writing, and if the object has a copy, then the
2020: * copy delay strategy is implemented in the slow fault page.
2021: */
2022: if (object->copy_strategy != MEMORY_OBJECT_COPY_DELAY ||
2023: object->copy == VM_OBJECT_NULL ||
2024: (fault_type & VM_PROT_WRITE) == 0) {
2025: cur_object = object;
2026: cur_offset = offset;
2027:
2028: while (TRUE) {
2029: m = vm_page_lookup(cur_object, cur_offset);
2030: if (m != VM_PAGE_NULL) {
2031: if (m->busy)
2032: break;
2033:
2034: if (m->unusual && (m->error || m->restart ||
2035: m->absent || (fault_type & m->page_lock))) {
2036:
2037: /*
2038: * Unusual case. Give up.
2039: */
2040: break;
2041: }
2042:
2043: /*
2044: * Two cases of map in faults:
2045: * - At top level w/o copy object.
2046: * - Read fault anywhere.
2047: * --> must disallow write.
2048: */
2049:
2050: if (object == cur_object &&
2051: object->copy == VM_OBJECT_NULL)
2052: goto FastMapInFault;
2053:
2054: if ((fault_type & VM_PROT_WRITE) == 0) {
2055:
2056: prot &= ~VM_PROT_WRITE;
2057:
2058: /*
2059: * Set up to map the page ...
2060: * mark the page busy, drop
2061: * locks and take a paging reference
2062: * on the object with the page.
2063: */
2064:
2065: if (object != cur_object) {
2066: vm_object_unlock(object);
2067: object = cur_object;
2068: }
2069: FastMapInFault:
2070: m->busy = TRUE;
2071:
2072: vm_object_paging_begin(object);
2073: vm_object_unlock(object);
2074:
2075: FastPmapEnter:
2076: /*
2077: * Check a couple of global reasons to
2078: * be conservative about write access.
2079: * Then do the pmap_enter.
2080: */
2081: #if !VM_FAULT_STATIC_CONFIG
2082: if (vm_fault_dirty_handling
2083: #if MACH_KDB
2084: || db_watchpoint_list
2085: #endif
2086: && (fault_type & VM_PROT_WRITE) == 0)
2087: prot &= ~VM_PROT_WRITE;
2088: #else /* STATIC_CONFIG */
2089: #if MACH_KDB
2090: if (db_watchpoint_list
2091: && (fault_type & VM_PROT_WRITE) == 0)
2092: prot &= ~VM_PROT_WRITE;
2093: #endif /* MACH_KDB */
2094: #endif /* STATIC_CONFIG */
2095: PMAP_ENTER(vm_map_pmap(map), vaddr, m,
2096: prot, wired);
2097:
2098: if (m->clustered) {
2099: vm_pagein_cluster_used++;
2100: m->clustered = FALSE;
2101:
2102: pmap_attribute(vm_map_pmap(map),
2103: vaddr,
2104: PAGE_SIZE,
2105: MATTR_CACHE,
2106: &mv_cache_sync);
2107: }
2108: /*
2109: * Grab the object lock to manipulate
2110: * the page queues. Change wiring
2111: * case is obvious. In soft ref bits
2112: * case activate page only if it fell
2113: * off paging queues, otherwise just
2114: * activate it if it's inactive.
2115: *
2116: * NOTE: original vm_fault code will
2117: * move active page to back of active
2118: * queue. This code doesn't.
2119: */
2120: vm_object_lock(object);
2121: vm_page_lock_queues();
2122:
2123: m->reference = TRUE;
2124:
2125: if (change_wiring) {
2126: if (wired)
2127: vm_page_wire(m);
2128: else
2129: vm_page_unwire(m);
2130: }
2131: #if VM_FAULT_STATIC_CONFIG
2132: else {
2133: if (!m->active && !m->inactive)
2134: vm_page_activate(m);
2135: }
2136: #else
2137: else if (software_reference_bits) {
2138: if (!m->active && !m->inactive)
2139: vm_page_activate(m);
2140: }
2141: else if (!m->active) {
2142: vm_page_activate(m);
2143: }
2144: #endif
2145: vm_page_unlock_queues();
2146:
2147: /*
2148: * That's it, clean up and return.
2149: */
2150: PAGE_WAKEUP_DONE(m);
2151: vm_object_paging_end(object);
2152: vm_object_unlock(object);
2153: vm_map_unlock_read(map);
2154:
2155: KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2156: vaddr,
2157: type_of_fault,
2158: KERN_SUCCESS,
2159: 0,
2160: 0);
2161: return KERN_SUCCESS;
2162: }
2163:
2164: /*
2165: * Copy on write fault. If objects match, then
2166: * object->copy must not be NULL (else control
2167: * would be in previous code block), and we
2168: * have a potential push into the copy object
2169: * with which we won't cope here.
2170: */
2171:
2172: if (cur_object == object)
2173: break;
2174:
2175: /*
2176: * This is now a shadow based copy on write
2177: * fault -- it requires a copy up the shadow
2178: * chain.
2179: *
2180: * Allocate a page in the original top level
2181: * object. Give up if allocate fails. Also
2182: * need to remember current page, as it's the
2183: * source of the copy.
2184: */
2185: cur_m = m;
2186: m = vm_page_alloc(object, offset);
2187: if (m == VM_PAGE_NULL) {
2188: break;
2189: }
2190:
2191: /*
2192: * Now do the copy. Mark the source busy
2193: * and take out paging references on both
2194: * objects.
2195: *
2196: * NOTE: This code holds the map lock across
2197: * the page copy.
2198: */
2199:
2200: cur_m->busy = TRUE;
2201:
2202: vm_object_paging_begin(cur_object);
2203: vm_object_unlock(cur_object);
2204: vm_object_paging_begin(object);
2205: vm_object_unlock(object);
2206:
2207: vm_page_copy(cur_m, m);
2208: type_of_fault = DBG_COW_FAULT;
2209: VM_STAT(cow_faults++);
2210: current_task()->cow_faults++;
2211:
2212: /*
2213: * Now cope with the source page and object
2214: * If the top object has a ref count of 1
2215: * then no other map can access it, and hence
2216: * it's not necessary to do the pmap_page_protect.
2217: */
2218:
2219: vm_object_lock(object);
2220: vm_object_lock(cur_object);
2221:
2222: vm_page_lock_queues();
2223: vm_page_deactivate(cur_m);
2224: m->dirty = TRUE;
2225: if (object->ref_count != 1)
2226: pmap_page_protect(cur_m->phys_addr,
2227: VM_PROT_NONE);
2228: vm_page_unlock_queues();
2229:
2230: PAGE_WAKEUP_DONE(cur_m);
2231: vm_object_paging_end(cur_object);
2232: vm_object_unlock(cur_object);
2233:
2234: /*
2235: * Slight hack to call vm_object collapse
2236: * and then reuse common map in code.
2237: * note that the object lock was taken above.
2238: */
2239:
2240: vm_object_paging_end(object);
2241: vm_object_collapse(object);
2242: vm_object_paging_begin(object);
2243: vm_object_unlock(object);
2244:
2245: goto FastPmapEnter;
2246: }
2247: else {
2248:
2249: /*
2250: * No page at cur_object, cur_offset
2251: */
2252:
2253: if (cur_object->pager_created) {
2254:
2255: /*
2256: * Have to talk to the pager. Give up.
2257: */
2258:
2259: break;
2260: }
2261:
2262:
2263: if (cur_object->shadow == VM_OBJECT_NULL) {
2264:
2265: /*
2266: * Zero fill fault. Page gets
2267: * filled in top object. Insert
2268: * page, then drop any lower lock.
2269: * Give up if no page.
2270: */
2271: if ((vm_page_free_target -
2272: ((vm_page_free_target-vm_page_free_min)>>2))
2273: > vm_page_free_count) {
2274: break;
2275: }
2276: m = vm_page_alloc(object, offset);
2277: if (m == VM_PAGE_NULL) {
2278: break;
2279: }
2280:
2281: if (cur_object != object)
2282: vm_object_unlock(cur_object);
2283:
2284: vm_object_paging_begin(object);
2285: vm_object_unlock(object);
2286:
2287: /*
2288: * Now zero fill page and map it.
2289: * the page is probably going to
2290: * be written soon, so don't bother
2291: * to clear the modified bit
2292: *
2293: * NOTE: This code holds the map
2294: * lock across the zero fill.
2295: */
2296:
2297: if (!map->no_zero_fill) {
2298: vm_page_zero_fill(m);
2299: type_of_fault = DBG_ZERO_FILL_FAULT;
2300: VM_STAT(zero_fill_count++);
2301: }
2302: vm_page_lock_queues();
2303: VM_PAGE_QUEUES_REMOVE(m);
2304: queue_enter(&vm_page_queue_inactive,
2305: m, vm_page_t, pageq);
2306: m->inactive = TRUE;
2307: vm_page_inactive_count++;
2308: vm_page_unlock_queues();
2309: goto FastPmapEnter;
2310: }
2311:
2312: /*
2313: * On to the next level
2314: */
2315:
2316: cur_offset += cur_object->shadow_offset;
2317: new_object = cur_object->shadow;
2318: vm_object_lock(new_object);
2319: if (cur_object != object)
2320: vm_object_unlock(cur_object);
2321: cur_object = new_object;
2322:
2323: continue;
2324: }
2325: }
2326:
2327: /*
2328: * Cleanup from fast fault failure. Drop any object
2329: * lock other than original and drop map lock.
2330: */
2331:
2332: if (object != cur_object)
2333: vm_object_unlock(cur_object);
2334: }
2335: vm_map_unlock_read(map);
2336:
2337: /*
2338: * Make a reference to this object to
2339: * prevent its disposal while we are messing with
2340: * it. Once we have the reference, the map is free
2341: * to be diddled. Since objects reference their
2342: * shadows (and copies), they will stay around as well.
2343: */
2344:
2345: assert(object->ref_count > 0);
2346: object->ref_count++;
2347: vm_object_res_reference(object);
2348: vm_object_paging_begin(object);
2349:
2350: XPR(XPR_VM_FAULT,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
2351: kr = vm_fault_page(object, offset, fault_type,
2352: (change_wiring && !wired),
2353: ((!change_wiring) ? THREAD_ABORTSAFE : THREAD_UNINT),
2354: lo_offset, hi_offset, behavior,
2355: &prot, &result_page, &top_page,
2356: &type_of_fault,
2357: &error_code, map->no_zero_fill, FALSE);
2358:
2359: /*
2360: * If we didn't succeed, lose the object reference immediately.
2361: */
2362:
2363: if (kr != VM_FAULT_SUCCESS)
2364: vm_object_deallocate(object);
2365:
2366: /*
2367: * See why we failed, and take corrective action.
2368: */
2369:
2370: switch (kr) {
2371: case VM_FAULT_SUCCESS:
2372: break;
2373: case VM_FAULT_RETRY:
2374: goto RetryFault;
2375: case VM_FAULT_INTERRUPTED:
2376: kr = KERN_SUCCESS;
2377: goto done;
2378: case VM_FAULT_MEMORY_SHORTAGE:
2379: VM_PAGE_WAIT();
2380: goto RetryFault;
2381: case VM_FAULT_FICTITIOUS_SHORTAGE:
2382: vm_page_more_fictitious();
2383: goto RetryFault;
2384: case VM_FAULT_MEMORY_ERROR:
2385: if (error_code)
2386: kr = error_code;
2387: else
2388: kr = KERN_MEMORY_ERROR;
2389: goto done;
2390: }
2391:
2392: m = result_page;
2393:
2394: assert((change_wiring && !wired) ?
2395: (top_page == VM_PAGE_NULL) :
2396: ((top_page == VM_PAGE_NULL) == (m->object == object)));
2397:
2398: /*
2399: * How to clean up the result of vm_fault_page. This
2400: * happens whether the mapping is entered or not.
2401: */
2402:
2403: #define UNLOCK_AND_DEALLOCATE \
2404: MACRO_BEGIN \
2405: vm_fault_cleanup(m->object, top_page); \
2406: vm_object_deallocate(object); \
2407: MACRO_END
2408:
2409: /*
2410: * What to do with the resulting page from vm_fault_page
2411: * if it doesn't get entered into the physical map:
2412: */
2413:
2414: #define RELEASE_PAGE(m) \
2415: MACRO_BEGIN \
2416: PAGE_WAKEUP_DONE(m); \
2417: vm_page_lock_queues(); \
2418: if (!m->active && !m->inactive) \
2419: vm_page_activate(m); \
2420: vm_page_unlock_queues(); \
2421: MACRO_END
2422:
2423: /*
2424: * We must verify that the maps have not changed
2425: * since our last lookup.
2426: */
2427:
2428: old_copy_object = m->object->copy;
2429:
2430: vm_object_unlock(m->object);
2431: while (!vm_map_verify(map, &version)) {
2432: vm_object_t retry_object;
2433: vm_offset_t retry_offset;
2434: vm_prot_t retry_prot;
2435:
2436: /*
2437: * To avoid trying to write_lock the map while another
2438: * thread has it read_locked (in vm_map_pageable), we
2439: * do not try for write permission. If the page is
2440: * still writable, we will get write permission. If it
2441: * is not, or has been marked needs_copy, we enter the
2442: * mapping without write permission, and will merely
2443: * take another fault.
2444: */
2445: vm_map_lock_read(map);
2446: kr = vm_map_lookup_locked(&map, vaddr,
2447: fault_type & ~VM_PROT_WRITE, &version,
2448: &retry_object, &retry_offset, &retry_prot,
2449: &wired, &behavior, &lo_offset, &hi_offset);
2450: vm_map_unlock_read(map);
2451:
2452: if (kr != KERN_SUCCESS) {
2453: vm_object_lock(m->object);
2454: RELEASE_PAGE(m);
2455: UNLOCK_AND_DEALLOCATE;
2456: goto done;
2457: }
2458:
2459: vm_object_unlock(retry_object);
2460: vm_object_lock(m->object);
2461:
2462: if ((retry_object != object) ||
2463: (retry_offset != offset)) {
2464: RELEASE_PAGE(m);
2465: UNLOCK_AND_DEALLOCATE;
2466: goto RetryFault;
2467: }
2468:
2469: /*
2470: * Check whether the protection has changed or the object
2471: * has been copied while we left the map unlocked.
2472: */
2473: prot &= retry_prot;
2474: vm_object_unlock(m->object);
2475: }
2476: vm_object_lock(m->object);
2477:
2478: /*
2479: * If the copy object changed while the top-level object
2480: * was unlocked, then we must take away write permission.
2481: */
2482:
2483: if (m->object->copy != old_copy_object)
2484: prot &= ~VM_PROT_WRITE;
2485:
2486: /*
2487: * If we want to wire down this page, but no longer have
2488: * adequate permissions, we must start all over.
2489: */
2490:
2491: if (wired && (fault_type != (prot|VM_PROT_WRITE))) {
2492: vm_map_verify_done(map, &version);
2493: RELEASE_PAGE(m);
2494: UNLOCK_AND_DEALLOCATE;
2495: goto RetryFault;
2496: }
2497:
2498: /*
2499: * It's critically important that a wired-down page be faulted
2500: * only once in each map for which it is wired.
2501: */
2502: vm_object_unlock(m->object);
2503:
2504: /*
2505: * Put this page into the physical map.
2506: * We had to do the unlock above because pmap_enter
2507: * may cause other faults. The page may be on
2508: * the pageout queues. If the pageout daemon comes
2509: * across the page, it will remove it from the queues.
2510: */
2511: PMAP_ENTER(map->pmap, vaddr, m, prot, wired);
2512:
2513: /* Sync I & D caches for new mapping*/
2514: pmap_attribute(map->pmap,
2515: vaddr,
2516: PAGE_SIZE,
2517: MATTR_CACHE,
2518: &mv_cache_sync);
2519:
2520: /*
2521: * If the page is not wired down and isn't already
2522: * on a pageout queue, then put it where the
2523: * pageout daemon can find it.
2524: */
2525: vm_object_lock(m->object);
2526: vm_page_lock_queues();
2527: if (change_wiring) {
2528: if (wired)
2529: vm_page_wire(m);
2530: else
2531: vm_page_unwire(m);
2532: }
2533: #if VM_FAULT_STATIC_CONFIG
2534: else {
2535: if (!m->active && !m->inactive)
2536: vm_page_activate(m);
2537: m->reference = TRUE;
2538: }
2539: #else
2540: else if (software_reference_bits) {
2541: if (!m->active && !m->inactive)
2542: vm_page_activate(m);
2543: m->reference = TRUE;
2544: } else {
2545: vm_page_activate(m);
2546: }
2547: #endif
2548: vm_page_unlock_queues();
2549:
2550: /*
2551: * Unlock everything, and return
2552: */
2553:
2554: vm_map_verify_done(map, &version);
2555: PAGE_WAKEUP_DONE(m);
2556: kr = KERN_SUCCESS;
2557: UNLOCK_AND_DEALLOCATE;
2558:
2559: #undef UNLOCK_AND_DEALLOCATE
2560: #undef RELEASE_PAGE
2561:
2562: done:
2563: KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END,
2564: vaddr,
2565: type_of_fault,
2566: kr,
2567: 0,
2568: 0);
2569: return(kr);
2570: }
2571:
2572: /*
2573: * vm_fault_wire:
2574: *
2575: * Wire down a range of virtual addresses in a map.
2576: */
2577: kern_return_t
2578: vm_fault_wire(
2579: vm_map_t map,
2580: vm_map_entry_t entry)
2581: {
2582:
2583: register vm_offset_t va;
2584: register pmap_t pmap;
2585: register vm_offset_t end_addr = entry->vme_end;
2586: register kern_return_t rc;
2587:
2588: assert(entry->in_transition);
2589: pmap = vm_map_pmap(map);
2590:
2591: /*
2592: * Inform the physical mapping system that the
2593: * range of addresses may not fault, so that
2594: * page tables and such can be locked down as well.
2595: */
2596:
2597: pmap_pageable(pmap, entry->vme_start, end_addr, FALSE);
2598:
2599: /*
2600: * We simulate a fault to get the page and enter it
2601: * in the physical map.
2602: */
2603:
2604: for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
2605: if ((rc = vm_fault_wire_fast(map, va, entry)) != KERN_SUCCESS) {
2606: rc = vm_fault(map, va, VM_PROT_NONE, TRUE);
2607: }
2608:
2609: if (rc != KERN_SUCCESS) {
2610: struct vm_map_entry tmp_entry = *entry;
2611:
2612: /* unwire wired pages */
2613: tmp_entry.vme_end = va;
2614: vm_fault_unwire(map, &tmp_entry, FALSE);
2615:
2616: return rc;
2617: }
2618: }
2619: return KERN_SUCCESS;
2620: }
2621:
2622: /*
2623: * vm_fault_unwire:
2624: *
2625: * Unwire a range of virtual addresses in a map.
2626: */
2627: void
2628: vm_fault_unwire(
2629: vm_map_t map,
2630: vm_map_entry_t entry,
2631: boolean_t deallocate)
2632: {
2633: register vm_offset_t va;
2634: register pmap_t pmap;
2635: register vm_offset_t end_addr = entry->vme_end;
2636: vm_object_t object;
2637:
2638: pmap = vm_map_pmap(map);
2639:
2640: object = (entry->is_sub_map)
2641: ? VM_OBJECT_NULL : entry->object.vm_object;
2642:
2643: /*
2644: * Since the pages are wired down, we must be able to
2645: * get their mappings from the physical map system.
2646: */
2647:
2648: for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
2649: pmap_change_wiring(pmap, va, FALSE);
2650:
2651: if (object == VM_OBJECT_NULL) {
2652: (void) vm_fault(map, va, VM_PROT_NONE, TRUE);
2653: } else {
2654: vm_prot_t prot;
2655: vm_page_t result_page;
2656: vm_page_t top_page;
2657: vm_object_t result_object;
2658: vm_fault_return_t result;
2659:
2660: do {
2661: prot = VM_PROT_NONE;
2662:
2663: vm_object_lock(object);
2664: vm_object_paging_begin(object);
2665: XPR(XPR_VM_FAULT,
2666: "vm_fault_unwire -> vm_fault_page\n",
2667: 0,0,0,0,0);
2668: result = vm_fault_page(object,
2669: entry->offset +
2670: (va - entry->vme_start),
2671: VM_PROT_NONE, TRUE,
2672: THREAD_UNINT,
2673: entry->offset,
2674: entry->offset +
2675: (entry->vme_end
2676: - entry->vme_start),
2677: entry->behavior,
2678: &prot,
2679: &result_page,
2680: &top_page,
2681: (int *)0,
2682: 0, map->no_zero_fill,
2683: FALSE);
2684: } while (result == VM_FAULT_RETRY);
2685:
2686: if (result != VM_FAULT_SUCCESS)
2687: panic("vm_fault_unwire: failure");
2688:
2689: result_object = result_page->object;
2690: if (deallocate) {
2691: assert(!result_page->fictitious);
2692: pmap_page_protect(result_page->phys_addr,
2693: VM_PROT_NONE);
2694: VM_PAGE_FREE(result_page);
2695: } else {
2696: vm_page_lock_queues();
2697: vm_page_unwire(result_page);
2698: vm_page_unlock_queues();
2699: PAGE_WAKEUP_DONE(result_page);
2700: }
2701:
2702: vm_fault_cleanup(result_object, top_page);
2703: }
2704: }
2705:
2706: /*
2707: * Inform the physical mapping system that the range
2708: * of addresses may fault, so that page tables and
2709: * such may be unwired themselves.
2710: */
2711:
2712: pmap_pageable(pmap, entry->vme_start, end_addr, TRUE);
2713:
2714: }
2715:
2716: /*
2717: * vm_fault_wire_fast:
2718: *
2719: * Handle common case of a wire down page fault at the given address.
2720: * If successful, the page is inserted into the associated physical map.
2721: * The map entry is passed in to avoid the overhead of a map lookup.
2722: *
2723: * NOTE: the given address should be truncated to the
2724: * proper page address.
2725: *
2726: * KERN_SUCCESS is returned if the page fault is handled; otherwise,
2727: * a standard error specifying why the fault is fatal is returned.
2728: *
2729: * The map in question must be referenced, and remains so.
2730: * Caller has a read lock on the map.
2731: *
2732: * This is a stripped version of vm_fault() for wiring pages. Anything
2733: * other than the common case will return KERN_FAILURE, and the caller
2734: * is expected to call vm_fault().
2735: */
2736: kern_return_t
2737: vm_fault_wire_fast(
2738: vm_map_t map,
2739: vm_offset_t va,
2740: vm_map_entry_t entry)
2741: {
2742: vm_object_t object;
2743: vm_offset_t offset;
2744: register vm_page_t m;
2745: vm_prot_t prot;
2746: thread_act_t thr_act;
2747:
2748: VM_STAT(faults++);
2749:
2750: if((thr_act=current_act()) && (thr_act->task != TASK_NULL))
2751: thr_act->task->faults++;
2752:
2753: /*
2754: * Recovery actions
2755: */
2756:
2757: #undef RELEASE_PAGE
2758: #define RELEASE_PAGE(m) { \
2759: PAGE_WAKEUP_DONE(m); \
2760: vm_page_lock_queues(); \
2761: vm_page_unwire(m); \
2762: vm_page_unlock_queues(); \
2763: }
2764:
2765:
2766: #undef UNLOCK_THINGS
2767: #define UNLOCK_THINGS { \
2768: object->paging_in_progress--; \
2769: vm_object_unlock(object); \
2770: }
2771:
2772: #undef UNLOCK_AND_DEALLOCATE
2773: #define UNLOCK_AND_DEALLOCATE { \
2774: UNLOCK_THINGS; \
2775: vm_object_deallocate(object); \
2776: }
2777: /*
2778: * Give up and have caller do things the hard way.
2779: */
2780:
2781: #define GIVE_UP { \
2782: UNLOCK_AND_DEALLOCATE; \
2783: return(KERN_FAILURE); \
2784: }
2785:
2786:
2787: /*
2788: * If this entry is not directly to a vm_object, bail out.
2789: */
2790: if (entry->is_sub_map)
2791: return(KERN_FAILURE);
2792:
2793: /*
2794: * Find the backing store object and offset into it.
2795: */
2796:
2797: object = entry->object.vm_object;
2798: offset = (va - entry->vme_start) + entry->offset;
2799: prot = entry->protection;
2800:
2801: /*
2802: * Make a reference to this object to prevent its
2803: * disposal while we are messing with it.
2804: */
2805:
2806: vm_object_lock(object);
2807: assert(object->ref_count > 0);
2808: object->ref_count++;
2809: vm_object_res_reference(object);
2810: object->paging_in_progress++;
2811:
2812: /*
2813: * INVARIANTS (through entire routine):
2814: *
2815: * 1) At all times, we must either have the object
2816: * lock or a busy page in some object to prevent
2817: * some other thread from trying to bring in
2818: * the same page.
2819: *
2820: * 2) Once we have a busy page, we must remove it from
2821: * the pageout queues, so that the pageout daemon
2822: * will not grab it away.
2823: *
2824: */
2825:
2826: /*
2827: * Look for page in top-level object. If it's not there or
2828: * there's something going on, give up.
2829: */
2830: m = vm_page_lookup(object, offset);
2831: if ((m == VM_PAGE_NULL) || (m->busy) ||
2832: (m->unusual && ( m->error || m->restart || m->absent ||
2833: prot & m->page_lock))) {
2834:
2835: GIVE_UP;
2836: }
2837:
2838: /*
2839: * Wire the page down now. All bail outs beyond this
2840: * point must unwire the page.
2841: */
2842:
2843: vm_page_lock_queues();
2844: vm_page_wire(m);
2845: vm_page_unlock_queues();
2846:
2847: /*
2848: * Mark page busy for other threads.
2849: */
2850: assert(!m->busy);
2851: m->busy = TRUE;
2852: assert(!m->absent);
2853:
2854: /*
2855: * Give up if the page is being written and there's a copy object
2856: */
2857: if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {
2858: RELEASE_PAGE(m);
2859: GIVE_UP;
2860: }
2861:
2862: /*
2863: * Put this page into the physical map.
2864: * We have to unlock the object because pmap_enter
2865: * may cause other faults.
2866: */
2867: vm_object_unlock(object);
2868:
2869: PMAP_ENTER(map->pmap, va, m, prot, TRUE);
2870: /* Sync I & D caches for new mapping */
2871: pmap_attribute(map->pmap,
2872: va,
2873: PAGE_SIZE,
2874: MATTR_CACHE,
2875: &mv_cache_sync);
2876:
2877: /*
2878: * Must relock object so that paging_in_progress can be cleared.
2879: */
2880: vm_object_lock(object);
2881:
2882: /*
2883: * Unlock everything, and return
2884: */
2885:
2886: PAGE_WAKEUP_DONE(m);
2887: UNLOCK_AND_DEALLOCATE;
2888:
2889: return(KERN_SUCCESS);
2890:
2891: }
2892:
2893: /*
2894: * Routine: vm_fault_copy_cleanup
2895: * Purpose:
2896: * Release a page used by vm_fault_copy.
2897: */
2898:
2899: void
2900: vm_fault_copy_cleanup(
2901: vm_page_t page,
2902: vm_page_t top_page)
2903: {
2904: vm_object_t object = page->object;
2905:
2906: vm_object_lock(object);
2907: PAGE_WAKEUP_DONE(page);
2908: vm_page_lock_queues();
2909: if (!page->active && !page->inactive)
2910: vm_page_activate(page);
2911: vm_page_unlock_queues();
2912: vm_fault_cleanup(object, top_page);
2913: }
2914:
2915: void
2916: vm_fault_copy_dst_cleanup(
2917: vm_page_t page)
2918: {
2919: vm_object_t object;
2920:
2921: if (page != VM_PAGE_NULL) {
2922: object = page->object;
2923: vm_object_lock(object);
2924: vm_page_lock_queues();
2925: vm_page_unwire(page);
2926: vm_page_unlock_queues();
2927: vm_object_paging_end(object);
2928: vm_object_unlock(object);
2929: }
2930: }
2931:
2932: /*
2933: * Routine: vm_fault_copy
2934: *
2935: * Purpose:
2936: * Copy pages from one virtual memory object to another --
2937: * neither the source nor destination pages need be resident.
2938: *
2939: * Before actually copying a page, the version associated with
2940: * the destination address map wil be verified.
2941: *
2942: * In/out conditions:
2943: * The caller must hold a reference, but not a lock, to
2944: * each of the source and destination objects and to the
2945: * destination map.
2946: *
2947: * Results:
2948: * Returns KERN_SUCCESS if no errors were encountered in
2949: * reading or writing the data. Returns KERN_INTERRUPTED if
2950: * the operation was interrupted (only possible if the
2951: * "interruptible" argument is asserted). Other return values
2952: * indicate a permanent error in copying the data.
2953: *
2954: * The actual amount of data copied will be returned in the
2955: * "copy_size" argument. In the event that the destination map
2956: * verification failed, this amount may be less than the amount
2957: * requested.
2958: */
2959: kern_return_t
2960: vm_fault_copy(
2961: vm_object_t src_object,
2962: vm_offset_t src_offset,
2963: vm_size_t *src_size, /* INOUT */
2964: vm_object_t dst_object,
2965: vm_offset_t dst_offset,
2966: vm_map_t dst_map,
2967: vm_map_version_t *dst_version,
2968: int interruptible)
2969: {
2970: vm_page_t result_page;
2971:
2972: vm_page_t src_page;
2973: vm_page_t src_top_page;
2974: vm_prot_t src_prot;
2975:
2976: vm_page_t dst_page;
2977: vm_page_t dst_top_page;
2978: vm_prot_t dst_prot;
2979:
2980: vm_size_t amount_left;
2981: vm_object_t old_copy_object;
2982: kern_return_t error = 0;
2983:
2984: vm_size_t part_size;
2985:
2986: /*
2987: * In order not to confuse the clustered pageins, align
2988: * the different offsets on a page boundary.
2989: */
2990: vm_offset_t src_lo_offset = trunc_page(src_offset);
2991: vm_offset_t dst_lo_offset = trunc_page(dst_offset);
2992: vm_offset_t src_hi_offset = round_page(src_offset + *src_size);
2993: vm_offset_t dst_hi_offset = round_page(dst_offset + *src_size);
2994:
2995: #define RETURN(x) \
2996: MACRO_BEGIN \
2997: *src_size -= amount_left; \
2998: MACRO_RETURN(x); \
2999: MACRO_END
3000:
3001: amount_left = *src_size;
3002: do { /* while (amount_left > 0) */
3003: /*
3004: * There may be a deadlock if both source and destination
3005: * pages are the same. To avoid this deadlock, the copy must
3006: * start by getting the destination page in order to apply
3007: * COW semantics if any.
3008: */
3009:
3010: RetryDestinationFault: ;
3011:
3012: dst_prot = VM_PROT_WRITE|VM_PROT_READ;
3013:
3014: vm_object_lock(dst_object);
3015: vm_object_paging_begin(dst_object);
3016:
3017: XPR(XPR_VM_FAULT,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3018: switch (vm_fault_page(dst_object,
3019: trunc_page(dst_offset),
3020: VM_PROT_WRITE|VM_PROT_READ,
3021: FALSE,
3022: THREAD_UNINT,
3023: dst_lo_offset,
3024: dst_hi_offset,
3025: VM_BEHAVIOR_SEQUENTIAL,
3026: &dst_prot,
3027: &dst_page,
3028: &dst_top_page,
3029: (int *)0,
3030: &error,
3031: dst_map->no_zero_fill,
3032: FALSE)) {
3033: case VM_FAULT_SUCCESS:
3034: break;
3035: case VM_FAULT_RETRY:
3036: goto RetryDestinationFault;
3037: case VM_FAULT_INTERRUPTED:
3038: RETURN(MACH_SEND_INTERRUPTED);
3039: case VM_FAULT_MEMORY_SHORTAGE:
3040: VM_PAGE_WAIT();
3041: goto RetryDestinationFault;
3042: case VM_FAULT_FICTITIOUS_SHORTAGE:
3043: vm_page_more_fictitious();
3044: goto RetryDestinationFault;
3045: case VM_FAULT_MEMORY_ERROR:
3046: if (error)
3047: return (error);
3048: else
3049: return(KERN_MEMORY_ERROR);
3050: }
3051: assert ((dst_prot & VM_PROT_WRITE) != VM_PROT_NONE);
3052:
3053: old_copy_object = dst_page->object->copy;
3054:
3055: /*
3056: * There exists the possiblity that the source and
3057: * destination page are the same. But we can't
3058: * easily determine that now. If they are the
3059: * same, the call to vm_fault_page() for the
3060: * destination page will deadlock. To prevent this we
3061: * wire the page so we can drop busy without having
3062: * the page daemon steal the page. We clean up the
3063: * top page but keep the paging reference on the object
3064: * holding the dest page so it doesn't go away.
3065: */
3066:
3067: vm_page_lock_queues();
3068: vm_page_wire(dst_page);
3069: vm_page_unlock_queues();
3070: PAGE_WAKEUP_DONE(dst_page);
3071: vm_object_unlock(dst_page->object);
3072:
3073: if (dst_top_page != VM_PAGE_NULL) {
3074: vm_object_lock(dst_object);
3075: VM_PAGE_FREE(dst_top_page);
3076: vm_object_paging_end(dst_object);
3077: vm_object_unlock(dst_object);
3078: }
3079:
3080: RetrySourceFault: ;
3081:
3082: if (src_object == VM_OBJECT_NULL) {
3083: /*
3084: * No source object. We will just
3085: * zero-fill the page in dst_object.
3086: */
3087: src_page = VM_PAGE_NULL;
3088: } else {
3089: vm_object_lock(src_object);
3090: src_page = vm_page_lookup(src_object,
3091: trunc_page(src_offset));
3092: if (src_page == dst_page)
3093: src_prot = dst_prot;
3094: else {
3095: src_prot = VM_PROT_READ;
3096: vm_object_paging_begin(src_object);
3097:
3098: XPR(XPR_VM_FAULT,
3099: "vm_fault_copy(2) -> vm_fault_page\n",
3100: 0,0,0,0,0);
3101: switch (vm_fault_page(src_object,
3102: trunc_page(src_offset),
3103: VM_PROT_READ,
3104: FALSE,
3105: interruptible,
3106: src_lo_offset,
3107: src_hi_offset,
3108: VM_BEHAVIOR_SEQUENTIAL,
3109: &src_prot,
3110: &result_page,
3111: &src_top_page,
3112: (int *)0,
3113: &error,
3114: FALSE,
3115: FALSE)) {
3116:
3117: case VM_FAULT_SUCCESS:
3118: break;
3119: case VM_FAULT_RETRY:
3120: goto RetrySourceFault;
3121: case VM_FAULT_INTERRUPTED:
3122: vm_fault_copy_dst_cleanup(dst_page);
3123: RETURN(MACH_SEND_INTERRUPTED);
3124: case VM_FAULT_MEMORY_SHORTAGE:
3125: VM_PAGE_WAIT();
3126: goto RetrySourceFault;
3127: case VM_FAULT_FICTITIOUS_SHORTAGE:
3128: vm_page_more_fictitious();
3129: goto RetrySourceFault;
3130: case VM_FAULT_MEMORY_ERROR:
3131: vm_fault_copy_dst_cleanup(dst_page);
3132: if (error)
3133: return (error);
3134: else
3135: return(KERN_MEMORY_ERROR);
3136: }
3137:
3138: src_page = result_page;
3139:
3140: assert((src_top_page == VM_PAGE_NULL) ==
3141: (src_page->object == src_object));
3142: }
3143: assert ((src_prot & VM_PROT_READ) != VM_PROT_NONE);
3144: vm_object_unlock(src_page->object);
3145: }
3146:
3147: if (!vm_map_verify(dst_map, dst_version)) {
3148: if (src_page != VM_PAGE_NULL && src_page != dst_page)
3149: vm_fault_copy_cleanup(src_page, src_top_page);
3150: vm_fault_copy_dst_cleanup(dst_page);
3151: break;
3152: }
3153:
3154: vm_object_lock(dst_page->object);
3155:
3156: if (dst_page->object->copy != old_copy_object) {
3157: vm_object_unlock(dst_page->object);
3158: vm_map_verify_done(dst_map, dst_version);
3159: if (src_page != VM_PAGE_NULL && src_page != dst_page)
3160: vm_fault_copy_cleanup(src_page, src_top_page);
3161: vm_fault_copy_dst_cleanup(dst_page);
3162: break;
3163: }
3164: vm_object_unlock(dst_page->object);
3165:
3166: /*
3167: * Copy the page, and note that it is dirty
3168: * immediately.
3169: */
3170:
3171: if (!page_aligned(src_offset) ||
3172: !page_aligned(dst_offset) ||
3173: !page_aligned(amount_left)) {
3174:
3175: vm_offset_t src_po,
3176: dst_po;
3177:
3178: src_po = src_offset - trunc_page(src_offset);
3179: dst_po = dst_offset - trunc_page(dst_offset);
3180:
3181: if (dst_po > src_po) {
3182: part_size = PAGE_SIZE - dst_po;
3183: } else {
3184: part_size = PAGE_SIZE - src_po;
3185: }
3186: if (part_size > (amount_left)){
3187: part_size = amount_left;
3188: }
3189:
3190: if (src_page == VM_PAGE_NULL) {
3191: vm_page_part_zero_fill(dst_page,
3192: dst_po, part_size);
3193: } else {
3194: vm_page_part_copy(src_page, src_po,
3195: dst_page, dst_po, part_size);
3196: if(!dst_page->dirty){
3197: vm_object_lock(dst_object);
3198: dst_page->dirty = TRUE;
3199: vm_object_unlock(dst_page->object);
3200: }
3201:
3202: }
3203: } else {
3204: part_size = PAGE_SIZE;
3205:
3206: if (src_page == VM_PAGE_NULL)
3207: vm_page_zero_fill(dst_page);
3208: else{
3209: vm_page_copy(src_page, dst_page);
3210: if(!dst_page->dirty){
3211: vm_object_lock(dst_object);
3212: dst_page->dirty = TRUE;
3213: vm_object_unlock(dst_page->object);
3214: }
3215: }
3216:
3217: }
3218:
3219: /*
3220: * Unlock everything, and return
3221: */
3222:
3223: vm_map_verify_done(dst_map, dst_version);
3224:
3225: if (src_page != VM_PAGE_NULL && src_page != dst_page)
3226: vm_fault_copy_cleanup(src_page, src_top_page);
3227: vm_fault_copy_dst_cleanup(dst_page);
3228:
3229: amount_left -= part_size;
3230: src_offset += part_size;
3231: dst_offset += part_size;
3232: } while (amount_left > 0);
3233:
3234: RETURN(KERN_SUCCESS);
3235: #undef RETURN
3236:
3237: /*NOTREACHED*/
3238: }
3239:
3240: #ifdef notdef
3241:
3242: /*
3243: * Routine: vm_fault_page_overwrite
3244: *
3245: * Description:
3246: * A form of vm_fault_page that assumes that the
3247: * resulting page will be overwritten in its entirety,
3248: * making it unnecessary to obtain the correct *contents*
3249: * of the page.
3250: *
3251: * Implementation:
3252: * XXX Untested. Also unused. Eventually, this technology
3253: * could be used in vm_fault_copy() to advantage.
3254: */
3255: vm_fault_return_t
3256: vm_fault_page_overwrite(
3257: register
3258: vm_object_t dst_object,
3259: vm_offset_t dst_offset,
3260: vm_page_t *result_page) /* OUT */
3261: {
3262: register
3263: vm_page_t dst_page;
3264: thread_t thread = current_thread();
3265:
3266: #define interruptible THREAD_UNINT /* XXX */
3267:
3268: while (TRUE) {
3269: /*
3270: * Look for a page at this offset
3271: */
3272:
3273: while ((dst_page = vm_page_lookup(dst_object, dst_offset))
3274: == VM_PAGE_NULL) {
3275: /*
3276: * No page, no problem... just allocate one.
3277: */
3278:
3279: dst_page = vm_page_alloc(dst_object, dst_offset);
3280: if (dst_page == VM_PAGE_NULL) {
3281: vm_object_unlock(dst_object);
3282: VM_PAGE_WAIT();
3283: vm_object_lock(dst_object);
3284: continue;
3285: }
3286:
3287: /*
3288: * Pretend that the memory manager
3289: * write-protected the page.
3290: *
3291: * Note that we will be asking for write
3292: * permission without asking for the data
3293: * first.
3294: */
3295:
3296: dst_page->overwriting = TRUE;
3297: dst_page->page_lock = VM_PROT_WRITE;
3298: dst_page->absent = TRUE;
3299: dst_page->unusual = TRUE;
3300: dst_object->absent_count++;
3301:
3302: break;
3303:
3304: /*
3305: * When we bail out, we might have to throw
3306: * away the page created here.
3307: */
3308:
3309: #define DISCARD_PAGE \
3310: MACRO_BEGIN \
3311: vm_object_lock(dst_object); \
3312: dst_page = vm_page_lookup(dst_object, dst_offset); \
3313: if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
3314: VM_PAGE_FREE(dst_page); \
3315: vm_object_unlock(dst_object); \
3316: MACRO_END
3317: }
3318:
3319: /*
3320: * If the page is write-protected...
3321: */
3322:
3323: if (dst_page->page_lock & VM_PROT_WRITE) {
3324: /*
3325: * ... and an unlock request hasn't been sent
3326: */
3327:
3328: if ( ! (dst_page->unlock_request & VM_PROT_WRITE)) {
3329: vm_prot_t u;
3330: kern_return_t rc;
3331:
3332: /*
3333: * ... then send one now.
3334: */
3335:
3336: if (!dst_object->pager_ready) {
3337: vm_object_assert_wait(dst_object,
3338: VM_OBJECT_EVENT_PAGER_READY,
3339: interruptible);
3340: vm_object_unlock(dst_object);
3341: thread_block((void (*)(void))0);
3342: if (thread->wait_result !=
3343: THREAD_AWAKENED) {
3344: DISCARD_PAGE;
3345: return(VM_FAULT_INTERRUPTED);
3346: }
3347: continue;
3348: }
3349:
3350: u = dst_page->unlock_request |= VM_PROT_WRITE;
3351: vm_object_unlock(dst_object);
3352:
3353: if ((rc = memory_object_data_unlock(
3354: dst_object->pager,
3355: dst_object->pager_request,
3356: dst_offset + dst_object->paging_offset,
3357: PAGE_SIZE,
3358: u)) != KERN_SUCCESS) {
3359: if (vm_fault_debug)
3360: printf("vm_object_overwrite: memory_object_data_unlock failed\n");
3361: DISCARD_PAGE;
3362: return((rc == MACH_SEND_INTERRUPTED) ?
3363: VM_FAULT_INTERRUPTED :
3364: VM_FAULT_MEMORY_ERROR);
3365: }
3366: vm_object_lock(dst_object);
3367: continue;
3368: }
3369:
3370: /* ... fall through to wait below */
3371: } else {
3372: /*
3373: * If the page isn't being used for other
3374: * purposes, then we're done.
3375: */
3376: if ( ! (dst_page->busy || dst_page->absent ||
3377: dst_page->error || dst_page->restart) )
3378: break;
3379: }
3380:
3381: PAGE_ASSERT_WAIT(dst_page, interruptible);
3382: vm_object_unlock(dst_object);
3383: thread_block((void (*)(void))0);
3384: if (thread->wait_result != THREAD_AWAKENED) {
3385: DISCARD_PAGE;
3386: return(VM_FAULT_INTERRUPTED);
3387: }
3388: }
3389:
3390: *result_page = dst_page;
3391: return(VM_FAULT_SUCCESS);
3392:
3393: #undef interruptible
3394: #undef DISCARD_PAGE
3395: }
3396:
3397: #endif /* notdef */
3398:
3399: #if VM_FAULT_CLASSIFY
3400: /*
3401: * Temporary statistics gathering support.
3402: */
3403:
3404: /*
3405: * Statistics arrays:
3406: */
3407: #define VM_FAULT_TYPES_MAX 5
3408: #define VM_FAULT_LEVEL_MAX 8
3409:
3410: int vm_fault_stats[VM_FAULT_TYPES_MAX][VM_FAULT_LEVEL_MAX];
3411:
3412: #define VM_FAULT_TYPE_ZERO_FILL 0
3413: #define VM_FAULT_TYPE_MAP_IN 1
3414: #define VM_FAULT_TYPE_PAGER 2
3415: #define VM_FAULT_TYPE_COPY 3
3416: #define VM_FAULT_TYPE_OTHER 4
3417:
3418:
3419: void
3420: vm_fault_classify(vm_object_t object,
3421: vm_offset_t offset,
3422: vm_prot_t fault_type)
3423: {
3424: int type, level = 0;
3425: vm_page_t m;
3426:
3427: while (TRUE) {
3428: m = vm_page_lookup(object, offset);
3429: if (m != VM_PAGE_NULL) {
3430: if (m->busy || m->error || m->restart || m->absent ||
3431: fault_type & m->page_lock) {
3432: type = VM_FAULT_TYPE_OTHER;
3433: break;
3434: }
3435: if (((fault_type & VM_PROT_WRITE) == 0) ||
3436: ((level == 0) && object->copy == VM_OBJECT_NULL)) {
3437: type = VM_FAULT_TYPE_MAP_IN;
3438: break;
3439: }
3440: type = VM_FAULT_TYPE_COPY;
3441: break;
3442: }
3443: else {
3444: if (object->pager_created) {
3445: type = VM_FAULT_TYPE_PAGER;
3446: break;
3447: }
3448: if (object->shadow == VM_OBJECT_NULL) {
3449: type = VM_FAULT_TYPE_ZERO_FILL;
3450: break;
3451: }
3452:
3453: offset += object->shadow_offset;
3454: object = object->shadow;
3455: level++;
3456: continue;
3457: }
3458: }
3459:
3460: if (level > VM_FAULT_LEVEL_MAX)
3461: level = VM_FAULT_LEVEL_MAX;
3462:
3463: vm_fault_stats[type][level] += 1;
3464:
3465: return;
3466: }
3467:
3468: /* cleanup routine to call from debugger */
3469:
3470: void
3471: vm_fault_classify_init(void)
3472: {
3473: int type, level;
3474:
3475: for (type = 0; type < VM_FAULT_TYPES_MAX; type++) {
3476: for (level = 0; level < VM_FAULT_LEVEL_MAX; level++) {
3477: vm_fault_stats[type][level] = 0;
3478: }
3479: }
3480:
3481: return;
3482: }
3483: #endif /* VM_FAULT_CLASSIFY */
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.