Source to i386/i386/pmap.c
/*-
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* the Systems Programming Group of the University of Utah Computer
* Science Department and William Jolitz of UUNET Technologies Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)pmap.c 8.1 (Berkeley) 6/11/93
*/
/*
* Derived from hp300 version by Mike Hibler, this version by William
* Jolitz uses a recursive map [a pde points to the page directory] to
* map the page tables using the pagetables themselves. This is done to
* reduce the impact on kernel virtual memory for lots of sparse address
* space, and to reduce the cost of memory to each process.
*
* Derived from: hp300/@(#)pmap.c 7.1 (Berkeley) 12/5/90
*/
/*
* Reno i386 version, from Mike Hibler's hp300 version.
*/
/*
* Manages physical address maps.
*
* In addition to hardware address maps, this
* module is called upon to provide software-use-only
* maps which may or may not be stored in the same
* form as hardware maps. These pseudo-maps are
* used to store intermediate results from copy
* operations to and from address spaces.
*
* Since the information managed by this module is
* also stored by the logical address mapping module,
* this module may throw away valid virtual-to-physical
* mappings at almost any time. However, invalidations
* of virtual-to-physical mappings must be done as
* requested.
*
* In order to cope with hardware architectures which
* make virtual-to-physical map invalidates expensive,
* this module may delay invalidate or reduced protection
* operations until such time as they are actually
* necessary. This module is given full information as
* to which processors are currently using which maps,
* and to when physical maps must be made correct.
*/
#include <sys/param.h>
#include <sys/proc.h>
#include <sys/malloc.h>
#include <sys/user.h>
#include <vm/vm.h>
#include <vm/vm_kern.h>
#include <vm/vm_page.h>
#ifdef NOTDEF
include <vm/vm_pageout.h>
include <machine/isa.h>
#endif
/*
* Allocate various and sundry SYSMAPs used in the days of old VM
* and not yet converted. XXX.
*/
#define BSDVM_COMPAT 1
#ifdef DEBUG
struct {
int kernel; /* entering kernel mapping */
int user; /* entering user mapping */
int ptpneeded; /* needed to allocate a PT page */
int pwchange; /* no mapping change, just wiring or protection */
int wchange; /* no mapping change, just wiring */
int mchange; /* was mapped but mapping to different page */
int managed; /* a managed page */
int firstpv; /* first mapping for this PA */
int secondpv; /* second mapping for this PA */
int ci; /* cache inhibited */
int unmanaged; /* not a managed page */
int flushes; /* cache flushes */
} enter_stats;
struct {
int calls;
int removes;
int pvfirst;
int pvsearch;
int ptinvalid;
int uflushes;
int sflushes;
} remove_stats;
int debugmap = 0;
int pmapdebug = 0;
#define PDB_FOLLOW 0x0001
#define PDB_INIT 0x0002
#define PDB_ENTER 0x0004
#define PDB_REMOVE 0x0008
#define PDB_CREATE 0x0010
#define PDB_PTPAGE 0x0020
#define PDB_CACHE 0x0040
#define PDB_BITS 0x0080
#define PDB_COLLECT 0x0100
#define PDB_PROTECT 0x0200
#define PDB_PDRTAB 0x0400
#define PDB_PARANOIA 0x2000
#define PDB_WIRING 0x4000
#define PDB_PVDUMP 0x8000
int pmapvacflush = 0;
#define PVF_ENTER 0x01
#define PVF_REMOVE 0x02
#define PVF_PROTECT 0x04
#define PVF_TOTAL 0x80
#endif
/*
* Get PDEs and PTEs for user/kernel address space
*/
#define pmap_pde(m, v) (&((m)->pm_pdir[((vm_offset_t)(v) >> PD_SHIFT)&1023]))
#define pmap_pte_pa(pte) (*(int *)(pte) & PG_FRAME)
#define pmap_pde_v(pte) ((pte)->pd_v)
#define pmap_pte_w(pte) ((pte)->pg_w)
/* #define pmap_pte_ci(pte) ((pte)->pg_ci) */
#define pmap_pte_m(pte) ((pte)->pg_m)
#define pmap_pte_u(pte) ((pte)->pg_u)
#define pmap_pte_v(pte) ((pte)->pg_v)
#define pmap_pte_set_w(pte, v) ((pte)->pg_w = (v))
#define pmap_pte_set_prot(pte, v) ((pte)->pg_prot = (v))
/*
* Given a map and a machine independent protection code,
* convert to a vax protection code.
*/
#define pte_prot(m, p) (protection_codes[p])
int protection_codes[8];
struct pmap kernel_pmap_store;
vm_offset_t avail_start; /* PA of first available physical page */
vm_offset_t avail_end; /* PA of last available physical page */
vm_size_t mem_size; /* memory size in bytes */
vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss)*/
vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
vm_offset_t vm_first_phys; /* PA of first managed page */
vm_offset_t vm_last_phys; /* PA just past last managed page */
int i386pagesperpage; /* PAGE_SIZE / I386_PAGE_SIZE */
boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */
char *pmap_attributes; /* reference and modify bits */
boolean_t pmap_testbit();
void pmap_clear_modify();
#if BSDVM_COMPAT
#include <sys/msgbuf.h>
/*
* All those kernel PT submaps that BSD is so fond of
*/
struct pte *CMAP1, *CMAP2, *mmap;
caddr_t CADDR1, CADDR2, vmmap;
struct pte *msgbufmap;
struct msgbuf *msgbufp;
#endif
void pmap_activate __P((pmap_t, struct pcb *));
/*
* Bootstrap the system enough to run with virtual memory.
* Map the kernel's code and data, and allocate the system page table.
*
* On the I386 this is called after mapping has already been enabled
* and just syncs the pmap module with what has already been done.
* [We can't call it easily with mapping off since the kernel is not
* mapped with PA == VA, hence we would have to relocate every address
* from the linked base (virtual) address 0xFE000000 to the actual
* (physical) address starting relative to 0]
*/
struct pte *pmap_pte();
extern vm_offset_t atdevbase;
void
pmap_bootstrap(firstaddr, loadaddr)
vm_offset_t firstaddr;
vm_offset_t loadaddr;
{
#if BSDVM_COMPAT
vm_offset_t va;
struct pte *pte;
#endif
extern vm_offset_t maxmem, physmem;
extern int IdlePTD;
/* disable pageing in basemem for all machines until this cryptic comment
* can be explained
*/
#if 1 || defined(ODYSSEUS) || defined(ARGO) || defined(CIRCE)
firstaddr=0x100000; /* for some reason, basemem screws up on this machine */
#endif
printf("ps %x pe %x ", firstaddr, maxmem <<PG_SHIFT);
avail_start = firstaddr;
avail_end = maxmem << PG_SHIFT;
/* XXX: allow for msgbuf */
avail_end -= i386_round_page(sizeof(struct msgbuf));
mem_size = physmem << PG_SHIFT;
virtual_avail = atdevbase + 0x100000 - 0xa0000 + 10*NBPG;
virtual_end = VM_MAX_KERNEL_ADDRESS;
i386pagesperpage = PAGE_SIZE / I386_PAGE_SIZE;
/*
* Initialize protection array.
*/
i386_protection_init();
#ifdef notdef
/*
* Create Kernel page directory table and page maps.
* [ currently done in locore. i have wild and crazy ideas -wfj ]
*/
bzero(firstaddr, 4*NBPG);
kernel_pmap->pm_pdir = firstaddr + VM_MIN_KERNEL_ADDRESS;
kernel_pmap->pm_ptab = firstaddr + VM_MIN_KERNEL_ADDRESS + NBPG;
firstaddr += NBPG;
for (x = i386_btod(VM_MIN_KERNEL_ADDRESS);
x < i386_btod(VM_MIN_KERNEL_ADDRESS)+3; x++) {
struct pde *pde;
pde = kernel_pmap->pm_pdir + x;
*(int *)pde = firstaddr + x*NBPG | PG_V | PG_KW;
}
#else
kernel_pmap->pm_pdir = (pd_entry_t *)(0xfe000000 + IdlePTD);
#endif
simple_lock_init(&kernel_pmap->pm_lock);
kernel_pmap->pm_count = 1;
#if BSDVM_COMPAT
/*
* Allocate all the submaps we need
*/
#define SYSMAP(c, p, v, n) \
v = (c)va; va += ((n)*I386_PAGE_SIZE); p = pte; pte += (n);
va = virtual_avail;
pte = pmap_pte(kernel_pmap, va);
SYSMAP(caddr_t ,CMAP1 ,CADDR1 ,1 )
SYSMAP(caddr_t ,CMAP2 ,CADDR2 ,1 )
SYSMAP(caddr_t ,mmap ,vmmap ,1 )
SYSMAP(struct msgbuf * ,msgbufmap ,msgbufp ,1 )
virtual_avail = va;
#endif
/**(int *)PTD = 0;
load_cr3(rcr3());*/
}
pmap_isvalidphys(addr) {
if (addr < 0xa0000) return (1);
if (addr >= 0x100000) return (1);
return(0);
}
/*
* Bootstrap memory allocator. This function allows for early dynamic
* memory allocation until the virtual memory system has been bootstrapped.
* After that point, either kmem_alloc or malloc should be used. This
* function works by stealing pages from the (to be) managed page pool,
* stealing virtual address space, then mapping the pages and zeroing them.
*
* It should be used from pmap_bootstrap till vm_page_startup, afterwards
* it cannot be used, and will generate a panic if tried. Note that this
* memory will never be freed, and in essence it is wired down.
*/
void *
pmap_bootstrap_alloc(size) {
vm_offset_t val;
int i;
extern boolean_t vm_page_startup_initialized;
if (vm_page_startup_initialized)
panic("pmap_bootstrap_alloc: called after startup initialized");
size = round_page(size);
val = virtual_avail;
/* deal with "hole incursion" */
for (i = 0; i < size; i += PAGE_SIZE) {
while (!pmap_isvalidphys(avail_start))
avail_start += PAGE_SIZE;
virtual_avail = pmap_map(virtual_avail, avail_start,
avail_start + PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE);
avail_start += PAGE_SIZE;
}
blkclr ((caddr_t) val, size);
return ((void *) val);
}
/*
* Initialize the pmap module.
* Called by vm_init, to initialize any structures that the pmap
* system needs to map virtual memory.
*/
void
pmap_init(phys_start, phys_end)
vm_offset_t phys_start, phys_end;
{
vm_offset_t addr, addr2;
vm_size_t npg, s;
int rv;
extern int KPTphys;
#ifdef DEBUG
if (pmapdebug & PDB_FOLLOW)
printf("pmap_init(%x, %x)\n", phys_start, phys_end);
#endif
/*
* Now that kernel map has been allocated, we can mark as
* unavailable regions which we have mapped in locore.
*/
addr = atdevbase;
(void) vm_map_find(kernel_map, NULL, (vm_offset_t) 0,
&addr, (0x100000-0xa0000), FALSE);
addr = (vm_offset_t) 0xfe000000+KPTphys/* *NBPG */;
vm_object_reference(kernel_object);
(void) vm_map_find(kernel_map, kernel_object, addr,
&addr, 2*NBPG, FALSE);
/*
* Allocate memory for random pmap data structures. Includes the
* pv_head_table and pmap_attributes.
*/
npg = atop(phys_end - phys_start);
s = (vm_size_t) (sizeof(struct pv_entry) * npg + npg);
s = round_page(s);
addr = (vm_offset_t) kmem_alloc(kernel_map, s);
pv_table = (pv_entry_t) addr;
addr += sizeof(struct pv_entry) * npg;
pmap_attributes = (char *) addr;
#ifdef DEBUG
if (pmapdebug & PDB_INIT)
printf("pmap_init: %x bytes (%x pgs): tbl %x attr %x\n",
s, npg, pv_table, pmap_attributes);
#endif
/*
* Now it is safe to enable pv_table recording.
*/
vm_first_phys = phys_start;
vm_last_phys = phys_end;
pmap_initialized = TRUE;
}
/*
* Used to map a range of physical addresses into kernel
* virtual address space.
*
* For now, VM is already on, we only need to map the
* specified memory.
*/
vm_offset_t
pmap_map(virt, start, end, prot)
vm_offset_t virt;
vm_offset_t start;
vm_offset_t end;
int prot;
{
#ifdef DEBUG
if (pmapdebug & PDB_FOLLOW)
printf("pmap_map(%x, %x, %x, %x)\n", virt, start, end, prot);
#endif
while (start < end) {
pmap_enter(kernel_pmap, virt, start, prot, FALSE);
virt += PAGE_SIZE;
start += PAGE_SIZE;
}
return(virt);
}
/*
* Create and return a physical map.
*
* If the size specified for the map
* is zero, the map is an actual physical
* map, and may be referenced by the
* hardware.
*
* If the size specified is non-zero,
* the map will be used in software only, and
* is bounded by that size.
*
* [ just allocate a ptd and mark it uninitialize -- should we track
* with a table which process has which ptd? -wfj ]
*/
pmap_t
pmap_create(size)
vm_size_t size;
{
register pmap_t pmap;
#ifdef DEBUG
if (pmapdebug & (PDB_FOLLOW|PDB_CREATE))
printf("pmap_create(%x)\n", size);
#endif
/*
* Software use map does not need a pmap
*/
if (size)
return(NULL);
/* XXX: is it ok to wait here? */
pmap = (pmap_t) malloc(sizeof *pmap, M_VMPMAP, M_WAITOK);
#ifdef notifwewait
if (pmap == NULL)
panic("pmap_create: cannot allocate a pmap");
#endif
bzero(pmap, sizeof(*pmap));
pmap_pinit(pmap);
return (pmap);
}
/*
* Initialize a preallocated and zeroed pmap structure,
* such as one in a vmspace structure.
*/
void
pmap_pinit(pmap)
register struct pmap *pmap;
{
#ifdef DEBUG
if (pmapdebug & (PDB_FOLLOW|PDB_CREATE))
pg("pmap_pinit(%x)\n", pmap);
#endif
/*
* No need to allocate page table space yet but we do need a
* valid page directory table.
*/
pmap->pm_pdir = (pd_entry_t *) kmem_alloc(kernel_map, NBPG);
/* wire in kernel global address entries */
bcopy(PTD+KPTDI_FIRST, pmap->pm_pdir+KPTDI_FIRST,
(KPTDI_LAST-KPTDI_FIRST+1)*4);
/* install self-referential address mapping entry */
*(int *)(pmap->pm_pdir+PTDPTDI) =
(int)pmap_extract(kernel_pmap, (vm_offset_t)pmap->pm_pdir) | PG_V | PG_URKW;
pmap->pm_count = 1;
simple_lock_init(&pmap->pm_lock);
}
/*
* Retire the given physical map from service.
* Should only be called if the map contains
* no valid mappings.
*/
void
pmap_destroy(pmap)
register pmap_t pmap;
{
int count;
#ifdef DEBUG
if (pmapdebug & PDB_FOLLOW)
printf("pmap_destroy(%x)\n", pmap);
#endif
if (pmap == NULL)
return;
simple_lock(&pmap->pm_lock);
count = --pmap->pm_count;
simple_unlock(&pmap->pm_lock);
if (count == 0) {
pmap_release(pmap);
free((caddr_t)pmap, M_VMPMAP);
}
}
/*
* Release any resources held by the given physical map.
* Called when a pmap initialized by pmap_pinit is being released.
* Should only be called if the map contains no valid mappings.
*/
void
pmap_release(pmap)
register struct pmap *pmap;
{
#ifdef DEBUG
if (pmapdebug & PDB_FOLLOW)
pg("pmap_release(%x)\n", pmap);
#endif
#ifdef notdef /* DIAGNOSTIC */
/* count would be 0 from pmap_destroy... */
simple_lock(&pmap->pm_lock);
if (pmap->pm_count != 1)
panic("pmap_release count");
#endif
kmem_free(kernel_map, (vm_offset_t)pmap->pm_pdir, NBPG);
}
/*
* Add a reference to the specified pmap.
*/
void
pmap_reference(pmap)
pmap_t pmap;
{
#ifdef DEBUG
if (pmapdebug & PDB_FOLLOW)
pg("pmap_reference(%x)", pmap);
#endif
if (pmap != NULL) {
simple_lock(&pmap->pm_lock);
pmap->pm_count++;
simple_unlock(&pmap->pm_lock);
}
}
/*
* Remove the given range of addresses from the specified map.
*
* It is assumed that the start and end are properly
* rounded to the page size.
*/
void
pmap_remove(pmap, sva, eva)
register struct pmap *pmap;
vm_offset_t sva, eva;
{
register vm_offset_t pa, va;
register pt_entry_t *pte;
register pv_entry_t pv, npv;
register int ix;
pmap_t ptpmap;
int *pde, s, bits;
boolean_t firstpage = TRUE;
boolean_t flushcache = FALSE;
#ifdef DEBUG
pt_entry_t opte;
if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
printf("pmap_remove(%x, %x, %x)", pmap, sva, eva);
if (eva >= USRSTACK && eva <= UPT_MAX_ADDRESS)
nullop();
#endif
if (pmap == NULL)
return;
#ifdef DEBUG
remove_stats.calls++;
#endif
for (va = sva; va < eva; va += PAGE_SIZE) {
/*
* Weed out invalid mappings.
* Note: we assume that the page directory table is
* always allocated, and in kernel virtual.
*/
if (!pmap_pde_v(pmap_pde(pmap, va)))
continue;
pte = pmap_pte(pmap, va);
if (pte == 0)
continue;
pa = pmap_pte_pa(pte);
if (pa == 0)
continue;
#ifdef DEBUG
opte = *pte;
remove_stats.removes++;
#endif
/*
* Update statistics
*/
if (pmap_pte_w(pte))
pmap->pm_stats.wired_count--;
pmap->pm_stats.resident_count--;
/*
* Invalidate the PTEs.
* XXX: should cluster them up and invalidate as many
* as possible at once.
*/
#ifdef DEBUG
if (pmapdebug & PDB_REMOVE)
printf("remove: inv %x ptes at %x(%x) ",
i386pagesperpage, pte, *(int *)pte);
#endif
bits = ix = 0;
do {
bits |= *(int *)pte & (PG_U|PG_M);
*(int *)pte++ = 0;
/*TBIS(va + ix * I386_PAGE_SIZE);*/
} while (++ix != i386pagesperpage);
if (pmap == &curproc->p_vmspace->vm_pmap)
pmap_activate(pmap, (struct pcb *)curproc->p_addr);
/* are we current address space or kernel? */
/*if (pmap->pm_pdir[PTDPTDI].pd_pfnum == PTDpde.pd_pfnum
|| pmap == kernel_pmap)
load_cr3(curpcb->pcb_ptd);*/
tlbflush();
#ifdef needednotdone
reduce wiring count on page table pages as references drop
#endif
/*
* Remove from the PV table (raise IPL since we
* may be called at interrupt time).
*/
if (pa < vm_first_phys || pa >= vm_last_phys)
continue;
pv = pa_to_pvh(pa);
s = splimp();
/*
* If it is the first entry on the list, it is actually
* in the header and we must copy the following entry up
* to the header. Otherwise we must search the list for
* the entry. In either case we free the now unused entry.
*/
if (pmap == pv->pv_pmap && va == pv->pv_va) {
npv = pv->pv_next;
if (npv) {
*pv = *npv;
free((caddr_t)npv, M_VMPVENT);
} else
pv->pv_pmap = NULL;
#ifdef DEBUG
remove_stats.pvfirst++;
#endif
} else {
for (npv = pv->pv_next; npv; npv = npv->pv_next) {
#ifdef DEBUG
remove_stats.pvsearch++;
#endif
if (pmap == npv->pv_pmap && va == npv->pv_va)
break;
pv = npv;
}
#ifdef DEBUG
if (npv == NULL)
panic("pmap_remove: PA not in pv_tab");
#endif
pv->pv_next = npv->pv_next;
free((caddr_t)npv, M_VMPVENT);
pv = pa_to_pvh(pa);
}
#ifdef notdef
[tally number of pagetable pages, if sharing of ptpages adjust here]
#endif
/*
* Update saved attributes for managed page
*/
pmap_attributes[pa_index(pa)] |= bits;
splx(s);
}
#ifdef notdef
[cache and tlb flushing, if needed]
#endif
}
/*
* Routine: pmap_remove_all
* Function:
* Removes this physical page from
* all physical maps in which it resides.
* Reflects back modify bits to the pager.
*/
void
pmap_remove_all(pa)
vm_offset_t pa;
{
register pv_entry_t pv;
int s;
#ifdef DEBUG
if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
printf("pmap_remove_all(%x)", pa);
/*pmap_pvdump(pa);*/
#endif
/*
* Not one of ours
*/
if (pa < vm_first_phys || pa >= vm_last_phys)
return;
pv = pa_to_pvh(pa);
s = splimp();
/*
* Do it the easy way for now
*/
while (pv->pv_pmap != NULL) {
#ifdef DEBUG
if (!pmap_pde_v(pmap_pde(pv->pv_pmap, pv->pv_va)) ||
pmap_pte_pa(pmap_pte(pv->pv_pmap, pv->pv_va)) != pa)
panic("pmap_remove_all: bad mapping");
#endif
pmap_remove(pv->pv_pmap, pv->pv_va, pv->pv_va + PAGE_SIZE);
}
splx(s);
}
/*
* Routine: pmap_copy_on_write
* Function:
* Remove write privileges from all
* physical maps for this physical page.
*/
void
pmap_copy_on_write(pa)
vm_offset_t pa;
{
#ifdef DEBUG
if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT))
printf("pmap_copy_on_write(%x)", pa);
#endif
pmap_changebit(pa, PG_RO, TRUE);
}
/*
* Set the physical protection on the
* specified range of this map as requested.
*/
void
pmap_protect(pmap, sva, eva, prot)
register pmap_t pmap;
vm_offset_t sva, eva;
vm_prot_t prot;
{
register pt_entry_t *pte;
register vm_offset_t va;
register int ix;
int i386prot;
boolean_t firstpage = TRUE;
#ifdef DEBUG
if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT))
printf("pmap_protect(%x, %x, %x, %x)", pmap, sva, eva, prot);
#endif
if (pmap == NULL)
return;
if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
pmap_remove(pmap, sva, eva);
return;
}
if (prot & VM_PROT_WRITE)
return;
for (va = sva; va < eva; va += PAGE_SIZE) {
/*
* Page table page is not allocated.
* Skip it, we don't want to force allocation
* of unnecessary PTE pages just to set the protection.
*/
if (!pmap_pde_v(pmap_pde(pmap, va))) {
/* XXX: avoid address wrap around */
if (va >= i386_trunc_pdr((vm_offset_t)-1))
break;
va = i386_round_pdr(va + PAGE_SIZE) - PAGE_SIZE;
continue;
} else pte = pmap_pte(pmap, va);
/*
* Page not valid. Again, skip it.
* Should we do this? Or set protection anyway?
*/
if (!pmap_pte_v(pte))
continue;
ix = 0;
i386prot = pte_prot(pmap, prot);
if(va < UPT_MAX_ADDRESS)
i386prot |= 2 /*PG_u*/;
do {
/* clear VAC here if PG_RO? */
pmap_pte_set_prot(pte++, i386prot);
/*TBIS(va + ix * I386_PAGE_SIZE);*/
} while (++ix != i386pagesperpage);
}
out:
if (pmap == &curproc->p_vmspace->vm_pmap)
pmap_activate(pmap, (struct pcb *)curproc->p_addr);
}
/*
* Insert the given physical page (p) at
* the specified virtual address (v) in the
* target physical map with the protection requested.
*
* If specified, the page will be wired down, meaning
* that the related pte can not be reclaimed.
*
* NB: This is the only routine which MAY NOT lazy-evaluate
* or lose information. That is, this routine must actually
* insert this page into the given map NOW.
*/
void
pmap_enter(pmap, va, pa, prot, wired)
register pmap_t pmap;
vm_offset_t va;
register vm_offset_t pa;
vm_prot_t prot;
boolean_t wired;
{
register pt_entry_t *pte;
register int npte, ix;
vm_offset_t opa;
boolean_t cacheable = TRUE;
boolean_t checkpv = TRUE;
#ifdef DEBUG
if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
printf("pmap_enter(%x, %x, %x, %x, %x)",
pmap, va, pa, prot, wired);
if(!pmap_isvalidphys(pa)) panic("invalid phys");
#endif
if (pmap == NULL)
return;
if(va > VM_MAX_KERNEL_ADDRESS)panic("pmap_enter: toobig");
/* also, should not muck with PTD va! */
#ifdef DEBUG
if (pmap == kernel_pmap)
enter_stats.kernel++;
else
enter_stats.user++;
#endif
/*
* Page Directory table entry not valid, we need a new PT page
*/
if (!pmap_pde_v(pmap_pde(pmap, va))) {
pg("ptdi %x", pmap->pm_pdir[PTDPTDI]);
}
pte = pmap_pte(pmap, va);
opa = pmap_pte_pa(pte);
#ifdef DEBUG
if (pmapdebug & PDB_ENTER)
printf("enter: pte %x, *pte %x ", pte, *(int *)pte);
#endif
/*
* Mapping has not changed, must be protection or wiring change.
*/
if (opa == pa) {
#ifdef DEBUG
enter_stats.pwchange++;
#endif
/*
* Wiring change, just update stats.
* We don't worry about wiring PT pages as they remain
* resident as long as there are valid mappings in them.
* Hence, if a user page is wired, the PT page will be also.
*/
if (wired && !pmap_pte_w(pte) || !wired && pmap_pte_w(pte)) {
#ifdef DEBUG
if (pmapdebug & PDB_ENTER)
pg("enter: wiring change -> %x ", wired);
#endif
if (wired)
pmap->pm_stats.wired_count++;
else
pmap->pm_stats.wired_count--;
#ifdef DEBUG
enter_stats.wchange++;
#endif
}
goto validate;
}
/*
* Mapping has changed, invalidate old range and fall through to
* handle validating new mapping.
*/
if (opa) {
#ifdef DEBUG
if (pmapdebug & PDB_ENTER)
printf("enter: removing old mapping %x pa %x ", va, opa);
#endif
pmap_remove(pmap, va, va + PAGE_SIZE);
#ifdef DEBUG
enter_stats.mchange++;
#endif
}
/*
* Enter on the PV list if part of our managed memory
* Note that we raise IPL while manipulating pv_table
* since pmap_enter can be called at interrupt time.
*/
if (pa >= vm_first_phys && pa < vm_last_phys) {
register pv_entry_t pv, npv;
int s;
#ifdef DEBUG
enter_stats.managed++;
#endif
pv = pa_to_pvh(pa);
s = splimp();
#ifdef DEBUG
if (pmapdebug & PDB_ENTER)
printf("enter: pv at %x: %x/%x/%x ",
pv, pv->pv_va, pv->pv_pmap, pv->pv_next);
#endif
/*
* No entries yet, use header as the first entry
*/
if (pv->pv_pmap == NULL) {
#ifdef DEBUG
enter_stats.firstpv++;
#endif
pv->pv_va = va;
pv->pv_pmap = pmap;
pv->pv_next = NULL;
pv->pv_flags = 0;
}
/*
* There is at least one other VA mapping this page.
* Place this entry after the header.
*/
else {
/*printf("second time: ");*/
#ifdef DEBUG
for (npv = pv; npv; npv = npv->pv_next)
if (pmap == npv->pv_pmap && va == npv->pv_va)
panic("pmap_enter: already in pv_tab");
#endif
npv = (pv_entry_t)
malloc(sizeof *npv, M_VMPVENT, M_NOWAIT);
npv->pv_va = va;
npv->pv_pmap = pmap;
npv->pv_next = pv->pv_next;
pv->pv_next = npv;
#ifdef DEBUG
if (!npv->pv_next)
enter_stats.secondpv++;
#endif
splx(s);
}
}
/*
* Assumption: if it is not part of our managed memory
* then it must be device memory which may be volitile.
*/
if (pmap_initialized) {
checkpv = cacheable = FALSE;
#ifdef DEBUG
enter_stats.unmanaged++;
#endif
}
/*
* Increment counters
*/
pmap->pm_stats.resident_count++;
if (wired)
pmap->pm_stats.wired_count++;
validate:
/*
* Now validate mapping with desired protection/wiring.
* Assume uniform modified and referenced status for all
* I386 pages in a MACH page.
*/
npte = (pa & PG_FRAME) | pte_prot(pmap, prot) | PG_V;
npte |= (*(int *)pte & (PG_M|PG_U));
if (wired)
npte |= PG_W;
if(va < UPT_MIN_ADDRESS)
npte |= PG_u;
else if(va < UPT_MAX_ADDRESS)
npte |= PG_u | PG_RW;
#ifdef DEBUG
if (pmapdebug & PDB_ENTER)
printf("enter: new pte value %x ", npte);
#endif
ix = 0;
do {
*(int *)pte++ = npte;
/*TBIS(va);*/
npte += I386_PAGE_SIZE;
va += I386_PAGE_SIZE;
} while (++ix != i386pagesperpage);
pte--;
#ifdef DEBUGx
cache, tlb flushes
#endif
/*pads(pmap);*/
/*load_cr3(((struct pcb *)curproc->p_addr)->pcb_ptd);*/
tlbflush();
}
/*
* pmap_page_protect:
*
* Lower the permission for all mappings to a given page.
*/
void
pmap_page_protect(phys, prot)
vm_offset_t phys;
vm_prot_t prot;
{
switch (prot) {
case VM_PROT_READ:
case VM_PROT_READ|VM_PROT_EXECUTE:
pmap_copy_on_write(phys);
break;
case VM_PROT_ALL:
break;
default:
pmap_remove_all(phys);
break;
}
}
/*
* Routine: pmap_change_wiring
* Function: Change the wiring attribute for a map/virtual-address
* pair.
* In/out conditions:
* The mapping must already exist in the pmap.
*/
void
pmap_change_wiring(pmap, va, wired)
register pmap_t pmap;
vm_offset_t va;
boolean_t wired;
{
register pt_entry_t *pte;
register int ix;
#ifdef DEBUG
if (pmapdebug & PDB_FOLLOW)
printf("pmap_change_wiring(%x, %x, %x)", pmap, va, wired);
#endif
if (pmap == NULL)
return;
pte = pmap_pte(pmap, va);
#ifdef DEBUG
/*
* Page table page is not allocated.
* Should this ever happen? Ignore it for now,
* we don't want to force allocation of unnecessary PTE pages.
*/
if (!pmap_pde_v(pmap_pde(pmap, va))) {
if (pmapdebug & PDB_PARANOIA)
pg("pmap_change_wiring: invalid PDE for %x ", va);
return;
}
/*
* Page not valid. Should this ever happen?
* Just continue and change wiring anyway.
*/
if (!pmap_pte_v(pte)) {
if (pmapdebug & PDB_PARANOIA)
pg("pmap_change_wiring: invalid PTE for %x ", va);
}
#endif
if (wired && !pmap_pte_w(pte) || !wired && pmap_pte_w(pte)) {
if (wired)
pmap->pm_stats.wired_count++;
else
pmap->pm_stats.wired_count--;
}
/*
* Wiring is not a hardware characteristic so there is no need
* to invalidate TLB.
*/
ix = 0;
do {
pmap_pte_set_w(pte++, wired);
} while (++ix != i386pagesperpage);
}
/*
* Routine: pmap_pte
* Function:
* Extract the page table entry associated
* with the given map/virtual_address pair.
* [ what about induced faults -wfj]
*/
struct pte *pmap_pte(pmap, va)
register pmap_t pmap;
vm_offset_t va;
{
#ifdef DEBUGx
if (pmapdebug & PDB_FOLLOW)
printf("pmap_pte(%x, %x) ->\n", pmap, va);
#endif
if (pmap && pmap_pde_v(pmap_pde(pmap, va))) {
/* are we current address space or kernel? */
if (pmap->pm_pdir[PTDPTDI].pd_pfnum == PTDpde.pd_pfnum
|| pmap == kernel_pmap)
return ((struct pte *) vtopte(va));
/* otherwise, we are alternate address space */
else {
if (pmap->pm_pdir[PTDPTDI].pd_pfnum
!= APTDpde.pd_pfnum) {
APTDpde = pmap->pm_pdir[PTDPTDI];
tlbflush();
}
return((struct pte *) avtopte(va));
}
}
return(0);
}
/*
* Routine: pmap_extract
* Function:
* Extract the physical page address associated
* with the given map/virtual_address pair.
*/
vm_offset_t
pmap_extract(pmap, va)
register pmap_t pmap;
vm_offset_t va;
{
register vm_offset_t pa;
#ifdef DEBUGx
if (pmapdebug & PDB_FOLLOW)
pg("pmap_extract(%x, %x) -> ", pmap, va);
#endif
pa = 0;
if (pmap && pmap_pde_v(pmap_pde(pmap, va))) {
pa = *(int *) pmap_pte(pmap, va);
}
if (pa)
pa = (pa & PG_FRAME) | (va & ~PG_FRAME);
#ifdef DEBUGx
if (pmapdebug & PDB_FOLLOW)
printf("%x\n", pa);
#endif
return(pa);
}
/*
* Copy the range specified by src_addr/len
* from the source map to the range dst_addr/len
* in the destination map.
*
* This routine is only advisory and need not do anything.
*/
void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
pmap_t dst_pmap;
pmap_t src_pmap;
vm_offset_t dst_addr;
vm_size_t len;
vm_offset_t src_addr;
{
#ifdef DEBUG
if (pmapdebug & PDB_FOLLOW)
printf("pmap_copy(%x, %x, %x, %x, %x)",
dst_pmap, src_pmap, dst_addr, len, src_addr);
#endif
}
/*
* Require that all active physical maps contain no
* incorrect entries NOW. [This update includes
* forcing updates of any address map caching.]
*
* Generally used to insure that a thread about
* to run will see a semantically correct world.
*/
void pmap_update()
{
#ifdef DEBUG
if (pmapdebug & PDB_FOLLOW)
printf("pmap_update()");
#endif
tlbflush();
}
/*
* Routine: pmap_collect
* Function:
* Garbage collects the physical map system for
* pages which are no longer used.
* Success need not be guaranteed -- that is, there
* may well be pages which are not referenced, but
* others may be collected.
* Usage:
* Called by the pageout daemon when pages are scarce.
* [ needs to be written -wfj ]
*/
void
pmap_collect(pmap)
pmap_t pmap;
{
register vm_offset_t pa;
register pv_entry_t pv;
register int *pte;
vm_offset_t kpa;
int s;
#ifdef DEBUG
int *pde;
int opmapdebug;
printf("pmap_collect(%x) ", pmap);
#endif
if (pmap != kernel_pmap)
return;
}
/* [ macro again?, should I force kstack into user map here? -wfj ] */
void
pmap_activate(pmap, pcbp)
register pmap_t pmap;
struct pcb *pcbp;
{
int x;
#ifdef DEBUG
if (pmapdebug & (PDB_FOLLOW|PDB_PDRTAB))
pg("pmap_activate(%x, %x) ", pmap, pcbp);
#endif
PMAP_ACTIVATE(pmap, pcbp);
/*printf("pde ");
for(x=0x3f6; x < 0x3fA; x++)
printf("%x ", pmap->pm_pdir[x]);*/
/*pads(pmap);*/
/*pg(" pcb_cr3 %x", pcbp->pcb_cr3);*/
}
/*
* pmap_zero_page zeros the specified (machine independent)
* page by mapping the page into virtual memory and using
* bzero to clear its contents, one machine dependent page
* at a time.
*/
void
pmap_zero_page(phys)
register vm_offset_t phys;
{
register int ix;
#ifdef DEBUG
if (pmapdebug & PDB_FOLLOW)
printf("pmap_zero_page(%x)", phys);
#endif
phys >>= PG_SHIFT;
ix = 0;
do {
clearseg(phys++);
} while (++ix != i386pagesperpage);
}
/*
* pmap_copy_page copies the specified (machine independent)
* page by mapping the page into virtual memory and using
* bcopy to copy the page, one machine dependent page at a
* time.
*/
void
pmap_copy_page(src, dst)
register vm_offset_t src, dst;
{
register int ix;
#ifdef DEBUG
if (pmapdebug & PDB_FOLLOW)
printf("pmap_copy_page(%x, %x)", src, dst);
#endif
src >>= PG_SHIFT;
dst >>= PG_SHIFT;
ix = 0;
do {
physcopyseg(src++, dst++);
} while (++ix != i386pagesperpage);
}
/*
* Routine: pmap_pageable
* Function:
* Make the specified pages (by pmap, offset)
* pageable (or not) as requested.
*
* A page which is not pageable may not take
* a fault; therefore, its page table entry
* must remain valid for the duration.
*
* This routine is merely advisory; pmap_enter
* will specify that these pages are to be wired
* down (or not) as appropriate.
*/
void
pmap_pageable(pmap, sva, eva, pageable)
pmap_t pmap;
vm_offset_t sva, eva;
boolean_t pageable;
{
#ifdef DEBUG
if (pmapdebug & PDB_FOLLOW)
printf("pmap_pageable(%x, %x, %x, %x)",
pmap, sva, eva, pageable);
#endif
/*
* If we are making a PT page pageable then all valid
* mappings must be gone from that page. Hence it should
* be all zeros and there is no need to clean it.
* Assumptions:
* - we are called with only one page at a time
* - PT pages have only one pv_table entry
*/
if (pmap == kernel_pmap && pageable && sva + PAGE_SIZE == eva) {
register pv_entry_t pv;
register vm_offset_t pa;
#ifdef DEBUG
if ((pmapdebug & (PDB_FOLLOW|PDB_PTPAGE)) == PDB_PTPAGE)
printf("pmap_pageable(%x, %x, %x, %x)",
pmap, sva, eva, pageable);
#endif
/*if (!pmap_pde_v(pmap_pde(pmap, sva)))
return;*/
if(pmap_pte(pmap, sva) == 0)
return;
pa = pmap_pte_pa(pmap_pte(pmap, sva));
if (pa < vm_first_phys || pa >= vm_last_phys)
return;
pv = pa_to_pvh(pa);
/*if (!ispt(pv->pv_va))
return;*/
#ifdef DEBUG
if (pv->pv_va != sva || pv->pv_next) {
pg("pmap_pageable: bad PT page va %x next %x\n",
pv->pv_va, pv->pv_next);
return;
}
#endif
/*
* Mark it unmodified to avoid pageout
*/
pmap_clear_modify(pa);
#ifdef needsomethinglikethis
if (pmapdebug & PDB_PTPAGE)
pg("pmap_pageable: PT page %x(%x) unmodified\n",
sva, *(int *)pmap_pte(pmap, sva));
if (pmapdebug & PDB_WIRING)
pmap_check_wiring("pageable", sva);
#endif
}
}
/*
* Clear the modify bits on the specified physical page.
*/
void
pmap_clear_modify(pa)
vm_offset_t pa;
{
#ifdef DEBUG
if (pmapdebug & PDB_FOLLOW)
printf("pmap_clear_modify(%x)", pa);
#endif
pmap_changebit(pa, PG_M, FALSE);
}
/*
* pmap_clear_reference:
*
* Clear the reference bit on the specified physical page.
*/
void pmap_clear_reference(pa)
vm_offset_t pa;
{
#ifdef DEBUG
if (pmapdebug & PDB_FOLLOW)
printf("pmap_clear_reference(%x)", pa);
#endif
pmap_changebit(pa, PG_U, FALSE);
}
/*
* pmap_is_referenced:
*
* Return whether or not the specified physical page is referenced
* by any physical maps.
*/
boolean_t
pmap_is_referenced(pa)
vm_offset_t pa;
{
#ifdef DEBUG
if (pmapdebug & PDB_FOLLOW) {
boolean_t rv = pmap_testbit(pa, PG_U);
printf("pmap_is_referenced(%x) -> %c", pa, "FT"[rv]);
return(rv);
}
#endif
return(pmap_testbit(pa, PG_U));
}
/*
* pmap_is_modified:
*
* Return whether or not the specified physical page is modified
* by any physical maps.
*/
boolean_t
pmap_is_modified(pa)
vm_offset_t pa;
{
#ifdef DEBUG
if (pmapdebug & PDB_FOLLOW) {
boolean_t rv = pmap_testbit(pa, PG_M);
printf("pmap_is_modified(%x) -> %c", pa, "FT"[rv]);
return(rv);
}
#endif
return(pmap_testbit(pa, PG_M));
}
vm_offset_t
pmap_phys_address(ppn)
int ppn;
{
return(i386_ptob(ppn));
}
/*
* Miscellaneous support routines follow
*/
i386_protection_init()
{
register int *kp, prot;
kp = protection_codes;
for (prot = 0; prot < 8; prot++) {
switch (prot) {
case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
*kp++ = 0;
break;
case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
*kp++ = PG_RO;
break;
case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
*kp++ = PG_RW;
break;
}
}
}
static
boolean_t
pmap_testbit(pa, bit)
register vm_offset_t pa;
int bit;
{
register pv_entry_t pv;
register int *pte, ix;
int s;
if (pa < vm_first_phys || pa >= vm_last_phys)
return(FALSE);
pv = pa_to_pvh(pa);
s = splimp();
/*
* Check saved info first
*/
if (pmap_attributes[pa_index(pa)] & bit) {
splx(s);
return(TRUE);
}
/*
* Not found, check current mappings returning
* immediately if found.
*/
if (pv->pv_pmap != NULL) {
for (; pv; pv = pv->pv_next) {
pte = (int *) pmap_pte(pv->pv_pmap, pv->pv_va);
ix = 0;
do {
if (*pte++ & bit) {
splx(s);
return(TRUE);
}
} while (++ix != i386pagesperpage);
}
}
splx(s);
return(FALSE);
}
pmap_changebit(pa, bit, setem)
register vm_offset_t pa;
int bit;
boolean_t setem;
{
register pv_entry_t pv;
register int *pte, npte, ix;
vm_offset_t va;
int s;
boolean_t firstpage = TRUE;
#ifdef DEBUG
if (pmapdebug & PDB_BITS)
printf("pmap_changebit(%x, %x, %s)",
pa, bit, setem ? "set" : "clear");
#endif
if (pa < vm_first_phys || pa >= vm_last_phys)
return;
pv = pa_to_pvh(pa);
s = splimp();
/*
* Clear saved attributes (modify, reference)
*/
if (!setem)
pmap_attributes[pa_index(pa)] &= ~bit;
/*
* Loop over all current mappings setting/clearing as appropos
* If setting RO do we need to clear the VAC?
*/
if (pv->pv_pmap != NULL) {
#ifdef DEBUG
int toflush = 0;
#endif
for (; pv; pv = pv->pv_next) {
#ifdef DEBUG
toflush |= (pv->pv_pmap == kernel_pmap) ? 2 : 1;
#endif
va = pv->pv_va;
/*
* XXX don't write protect pager mappings
*/
if (bit == PG_RO) {
extern vm_offset_t pager_sva, pager_eva;
if (va >= pager_sva && va < pager_eva)
continue;
}
pte = (int *) pmap_pte(pv->pv_pmap, va);
ix = 0;
do {
if (setem)
npte = *pte | bit;
else
npte = *pte & ~bit;
if (*pte != npte) {
*pte = npte;
/*TBIS(va);*/
}
va += I386_PAGE_SIZE;
pte++;
} while (++ix != i386pagesperpage);
if (pv->pv_pmap == &curproc->p_vmspace->vm_pmap)
pmap_activate(pv->pv_pmap, (struct pcb *)curproc->p_addr);
}
#ifdef somethinglikethis
if (setem && bit == PG_RO && (pmapvacflush & PVF_PROTECT)) {
if ((pmapvacflush & PVF_TOTAL) || toflush == 3)
DCIA();
else if (toflush == 2)
DCIS();
else
DCIU();
}
#endif
}
splx(s);
}
#ifdef DEBUG
pmap_pvdump(pa)
vm_offset_t pa;
{
register pv_entry_t pv;
printf("pa %x", pa);
for (pv = pa_to_pvh(pa); pv; pv = pv->pv_next) {
printf(" -> pmap %x, va %x, flags %x",
pv->pv_pmap, pv->pv_va, pv->pv_flags);
pads(pv->pv_pmap);
}
printf(" ");
}
#ifdef notyet
pmap_check_wiring(str, va)
char *str;
vm_offset_t va;
{
vm_map_entry_t entry;
register int count, *pte;
va = trunc_page(va);
if (!pmap_pde_v(pmap_pde(kernel_pmap, va)) ||
!pmap_pte_v(pmap_pte(kernel_pmap, va)))
return;
if (!vm_map_lookup_entry(pt_map, va, &entry)) {
pg("wired_check: entry for %x not found\n", va);
return;
}
count = 0;
for (pte = (int *)va; pte < (int *)(va+PAGE_SIZE); pte++)
if (*pte)
count++;
if (entry->wired_count != count)
pg("*%s*: %x: w%d/a%d\n",
str, va, entry->wired_count, count);
}
#endif
/* print address space of pmap*/
pads(pm) pmap_t pm; {
unsigned va, i, j;
struct pte *ptep;
if(pm == kernel_pmap) return;
for (i = 0; i < 1024; i++)
if(pm->pm_pdir[i].pd_v)
for (j = 0; j < 1024 ; j++) {
va = (i<<22)+(j<<12);
if (pm == kernel_pmap && va < 0xfe000000)
continue;
if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
continue;
ptep = pmap_pte(pm, va);
if(pmap_pte_v(ptep))
printf("%x:%x ", va, *(int *)ptep);
} ;
}
#endif