|
|
1.1 root 1: /*
2: * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3: * All rights reserved.
4: *
5: * Redistribution is only permitted until one year after the first shipment
6: * of 4.4BSD by the Regents. Otherwise, redistribution and use in source and
7: * binary forms are permitted provided that: (1) source distributions retain
8: * this entire copyright notice and comment, and (2) distributions including
9: * binaries display the following acknowledgement: This product includes
10: * software developed by the University of California, Berkeley and its
11: * contributors'' in the documentation or other materials provided with the
12: * distribution and in all advertising materials mentioning features or use
13: * of this software. Neither the name of the University nor the names of
14: * its contributors may be used to endorse or promote products derived from
15: * this software without specific prior written permission.
16: * THIS SOFTWARE IS PROVIDED AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
17: * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
18: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
19: *
20: * @(#)vm_page.c 7.14 (Berkeley) 6/28/90
21: */
22:
23: #include "param.h"
24: #include "systm.h"
25: #include "user.h"
26: #include "proc.h"
27: #include "buf.h"
28: #include "text.h"
29: #include "vnode.h"
30: #include "cmap.h"
31: #include "vm.h"
32: #include "trace.h"
33: #include "file.h"
34:
35: #include "machine/cpu.h"
36: #include "machine/pte.h"
37: #include "machine/mtpr.h"
38:
39: #if defined(tahoe)
40: #if CLSIZE == 1
41: #define uncachecl(pte) uncache(pte)
42: #endif
43: #if CLSIZE == 2
44: #define uncachecl(pte) uncache(pte), uncache((pte)+1)
45: #endif
46: #if CLSIZE > 2
47: #define uncachecl(pte) { \
48: register ii; \
49: for (ii = 0; ii < CLSIZE; ii++) \
50: uncache((pte) + ii); \
51: }
52: #endif
53: #else /* tahoe */
54: #define uncache(pte) /* nothing */
55: #define uncachecl(pte) /* nothing */
56: #endif
57:
58: int nohash = 0;
59: /*
60: * Handle a page fault.
61: *
62: * Basic outline
63: * If page is allocated, but just not valid:
64: * Wait if intransit, else just revalidate
65: * Done
66: * Compute <vp,bn> from which page operation would take place
67: * If page is text page, and filling from file system or swap space:
68: * If in free list cache, reattach it and then done
69: * Allocate memory for page in
70: * If block here, restart because we could have swapped, etc.
71: * Lock process from swapping for duration
72: * Update pte's to reflect that page is intransit.
73: * If page is zero fill on demand:
74: * Clear pages and flush free list cache of stale cacheing
75: * for this swap page (e.g. before initializing again due
76: * to 407/410 exec).
77: * If page is fill from file and in buffer cache:
78: * Copy the page from the buffer cache.
79: * If not a fill on demand:
80: * Determine swap address and cluster to page in
81: * Do the swap to bring the page in
82: * Instrument the pagein
83: * After swap validate the required new page
84: * Leave prepaged pages reclaimable (not valid)
85: * Update shared copies of text page tables
86: * Complete bookkeeping on pages brought in:
87: * No longer intransit
88: * Hash text pages into core hash structure
89: * Unlock pages (modulo raw i/o requirements)
90: * Flush translation buffer
91: * Process pagein is done
92: */
93: #ifdef TRACE
94: #define pgtrace(e) trace(e,v,u.u_procp->p_pid)
95: #else
96: #define pgtrace(e)
97: #endif
98:
99: int preptofree = 1; /* send pre-paged pages to free list */
100:
101: pagein(virtaddr, dlyu)
102: unsigned virtaddr;
103: int dlyu;
104: {
105: register struct proc *p;
106: register struct pte *pte;
107: register unsigned v;
108: unsigned pf;
109: int type, fileno;
110: struct pte opte;
111: struct vnode *vp;
112: register int i;
113: int klsize;
114: unsigned vsave;
115: struct cmap *c;
116: int j;
117: daddr_t bn, bncache, bnswap;
118: int si, sk;
119: int swerror = 0;
120: #ifdef PGINPROF
121: int otime, olbolt, oicr, s;
122: long a;
123:
124: s = splclock();
125: otime = time, olbolt = lbolt, oicr = mfpr(ICR);
126: #endif
127: cnt.v_faults++;
128: /*
129: * Classify faulted page into a segment and get a pte
130: * for the faulted page.
131: */
132: vsave = v = clbase(btop(virtaddr));
133: p = u.u_procp;
134: if (isatsv(p, v))
135: type = CTEXT;
136: else if (isassv(p, v))
137: type = CSTACK;
138: else
139: type = CDATA;
140: pte = vtopte(p, v);
141: if (pte->pg_v) {
142: #ifdef MAPMEM
143: /* will this ever happen? */
144: if (pte->pg_fod) {
145: #ifdef PGINPROF
146: splx(s);
147: #endif
148: return;
149: }
150: #endif
151: panic("pagein");
152: }
153:
154: /*
155: * If page is reclaimable, reclaim it.
156: * If page is text and intransit, sleep while it is intransit,
157: * If it is valid after the sleep, we are done.
158: * Otherwise we have to start checking again, since page could
159: * even be reclaimable now (we may have swapped for a long time).
160: */
161: restart:
162: if (pte->pg_fod == 0 && pte->pg_pfnum) {
163: if (type == CTEXT && cmap[pgtocm(pte->pg_pfnum)].c_intrans) {
164: pgtrace(TR_INTRANS);
165: sleep((caddr_t)p->p_textp, PSWP+1);
166: pgtrace(TR_EINTRANS);
167: pte = vtopte(p, v);
168: if (pte->pg_v) {
169: valid:
170: if (dlyu) {
171: c = &cmap[pgtocm(pte->pg_pfnum)];
172: if (c->c_lock) {
173: c->c_want = 1;
174: sleep((caddr_t)c, PSWP+1);
175: goto restart;
176: }
177: c->c_lock = 1;
178: }
179: newptes(pte, v, CLSIZE);
180: cnt.v_intrans++;
181: #ifdef PGINPROF
182: splx(s);
183: #endif
184: return;
185: }
186: goto restart;
187: }
188: /*
189: * If page is in the free list, then take
190: * it back into the resident set, updating
191: * the size recorded for the resident set.
192: */
193: si = splimp();
194: c = &cmap[pgtocm(pte->pg_pfnum)];
195: if (c->c_free) {
196: pgtrace(TR_FRECLAIM);
197: munlink(c);
198: cnt.v_pgfrec++;
199: if (type == CTEXT)
200: p->p_textp->x_rssize += CLSIZE;
201: else
202: p->p_rssize += CLSIZE;
203: } else
204: pgtrace(TR_RECLAIM);
205: splx(si);
206: uncachecl(pte);
207: pte->pg_v = 1;
208: if (anycl(pte, pg_m))
209: pte->pg_m = 1;
210: distcl(pte);
211: if (type == CTEXT)
212: distpte(p->p_textp, (unsigned)vtotp(p, v), pte);
213: u.u_ru.ru_minflt++;
214: cnt.v_pgrec++;
215: if (dlyu) {
216: c = &cmap[pgtocm(pte->pg_pfnum)];
217: if (c->c_lock) {
218: c->c_want = 1;
219: sleep((caddr_t)c, PSWP+1);
220: goto restart;
221: }
222: c->c_lock = 1;
223: }
224: newptes(pte, v, CLSIZE);
225: #ifdef PGINPROF
226: a = vmtime(otime, olbolt, oicr);
227: rectime += a;
228: if (a >= 0)
229: vmfltmon(rmon, a, rmonmin, rres, NRMON);
230: splx(s);
231: #endif
232: return;
233: }
234: #ifdef PGINPROF
235: splx(s);
236: #endif
237: /*
238: * <vp,bn> is where data comes from/goes to.
239: * <vp,bncache> is where data is cached from/to.
240: * <swapdev_vp,bnswap> is where data will eventually go.
241: */
242: if (pte->pg_fod == 0) {
243: fileno = -1;
244: bnswap = bncache = bn = vtod(p, v, &u.u_dmap, &u.u_smap);
245: vp = swapdev_vp;
246: } else {
247: fileno = ((struct fpte *)pte)->pg_fileno;
248: bn = ((struct fpte *)pte)->pg_blkno;
249: bnswap = vtod(p, v, &u.u_dmap, &u.u_smap);
250: if (fileno == PG_FTEXT) {
251: if (p->p_textp == 0)
252: panic("pagein PG_FTEXT");
253: if (VOP_BMAP(p->p_textp->x_vptr, (daddr_t)0, &vp,
254: (daddr_t *)0)) {
255: swkill(p, "pagein: filesystem unmounted");
256: return;
257: }
258: bncache = bn;
259: } else if (fileno == PG_FZERO) {
260: vp = swapdev_vp;
261: bncache = bnswap;
262: }
263: }
264: klsize = 1;
265: opte = *pte;
266:
267: /*
268: * Check for text detached but in free list.
269: * This can happen only if the page is filling
270: * from a inode or from the swap device, (e.g. not when reading
271: * in 407/410 execs to a zero fill page.)
272: * Honor lock bit to avoid races with pageouts.
273: */
274: if (type == CTEXT && fileno != PG_FZERO && !nohash) {
275: si = splimp();
276: while ((c = mfind(vp, bncache)) != 0) {
277: if (c->c_lock == 0)
278: break;
279: MLOCK(c);
280: MUNLOCK(c);
281: }
282: if (c) {
283: if (c->c_type != CTEXT || c->c_gone == 0 ||
284: c->c_free == 0)
285: panic("pagein mfind");
286: p->p_textp->x_rssize += CLSIZE;
287: /*
288: * Following code mimics memall().
289: */
290: munlink(c);
291: pf = cmtopg(c - cmap);
292: for (j = 0; j < CLSIZE; j++) {
293: *(int *)pte = 0;
294: pte->pg_pfnum = pf++;
295: pte->pg_prot = opte.pg_prot;
296: pte++;
297: }
298: pte -= CLSIZE;
299: c->c_free = 0;
300: c->c_gone = 0;
301: if (c->c_intrans || c->c_want)
302: panic("pagein intrans|want");
303: c->c_lock = 1;
304: if (c->c_page != vtotp(p, v))
305: panic("pagein c_page chgd");
306: c->c_ndx = p->p_textp - &text[0];
307: if (vp == swapdev_vp) {
308: cnt.v_xsfrec++;
309: pgtrace(TR_XSFREC);
310: } else {
311: cnt.v_xifrec++;
312: pgtrace(TR_XIFREC);
313: }
314: cnt.v_pgrec++;
315: u.u_ru.ru_minflt++;
316: if (vp != swapdev_vp) {
317: munhash(swapdev_vp, bnswap);
318: pte->pg_m = 1;
319: }
320: splx(si);
321: goto skipswap;
322: }
323: splx(si);
324: }
325:
326: /*
327: * Wasn't reclaimable or reattachable.
328: * Have to prepare to bring the page in.
329: * We allocate the page before locking so we will
330: * be swappable if there is no free memory.
331: * If we block we have to start over, since anything
332: * could have happened.
333: */
334: sk = splimp(); /* lock memalls from here into kluster */
335: if (freemem < CLSIZE * KLMAX) {
336: pgtrace(TR_WAITMEM);
337: while (freemem < CLSIZE * KLMAX)
338: sleep((caddr_t)&freemem, PSWP+2);
339: pgtrace(TR_EWAITMEM);
340: splx(sk);
341: pte = vtopte(p, v);
342: #ifdef PGINPROF
343: s = splclock();
344: #endif
345: if (pte->pg_v)
346: goto valid;
347: goto restart;
348: }
349:
350: /*
351: * Now can get memory and committed to bringing in the page.
352: * Lock this process, get a page,
353: * construct the new pte, and increment
354: * the (process or text) resident set size.
355: */
356: p->p_flag |= SPAGE;
357: if (memall(pte, CLSIZE, p, type) == 0)
358: panic("pagein memall");
359: pte->pg_prot = opte.pg_prot;
360: pf = pte->pg_pfnum;
361: cmap[pgtocm(pf)].c_intrans = 1;
362: distcl(pte);
363: if (type == CTEXT) {
364: p->p_textp->x_rssize += CLSIZE;
365: distpte(p->p_textp, (unsigned)vtotp(p, v), pte);
366: } else
367: p->p_rssize += CLSIZE;
368:
369: /*
370: * Two cases: either fill on demand (zero, or from file or text)
371: * or from swap space.
372: */
373: if (opte.pg_fod) {
374: pte->pg_m = 1;
375: if (fileno == PG_FZERO || fileno == PG_FTEXT) {
376: /*
377: * Flush any previous text page use of this
378: * swap device block.
379: */
380: if (type == CTEXT)
381: munhash(swapdev_vp, bnswap);
382: /*
383: * If zero fill, short-circuit hard work
384: * by just clearing pages.
385: */
386: if (fileno == PG_FZERO) {
387: pgtrace(TR_ZFOD);
388: for (i = 0; i < CLSIZE; i++) {
389: clearseg(pf+i);
390: #if defined(tahoe)
391: mtpr(P1DC, (int)virtaddr + i * NBPG);
392: #endif
393: }
394: if (type != CTEXT)
395: cnt.v_zfod += CLSIZE;
396: splx(sk);
397: goto skipswap;
398: }
399: pgtrace(TR_EXFOD);
400: cnt.v_exfod += CLSIZE;
401: }
402: /*
403: * Fill from inode. Try to find adjacent
404: * pages to bring in also.
405: */
406: v = fodkluster(p, v, pte, &klsize, vp, &bn);
407: bncache = bn;
408: splx(sk);
409: #ifdef TRACE
410: if (type != CTEXT)
411: trace(TR_XFODMISS, vp, bn);
412: #endif
413: } else {
414: if (opte.pg_pfnum)
415: panic("pagein pfnum");
416: pgtrace(TR_SWAPIN);
417: /*
418: * Fill from swap area. Try to find adjacent
419: * pages to bring in also.
420: */
421: v = kluster(p, v, pte, B_READ, &klsize,
422: (type == CTEXT) ? kltxt :
423: ((p->p_flag & SSEQL) ? klseql : klin), bn);
424: splx(sk);
425: /* THIS COULD BE COMPUTED INCREMENTALLY... */
426: bncache = bn = vtod(p, v, &u.u_dmap, &u.u_smap);
427: }
428:
429: distcl(pte);
430: swerror = swap(p, bn, ptob(v), klsize * ctob(CLSIZE),
431: B_READ, B_PGIN, vp, 0);
432: #ifdef TRACE
433: trace(TR_PGINDONE, vsave, u.u_procp->p_pid);
434: #endif
435:
436: /*
437: * Instrumentation.
438: */
439: u.u_ru.ru_majflt++;
440: cnt.v_pgin++;
441: cnt.v_pgpgin += klsize * CLSIZE;
442: #ifdef PGINPROF
443: a = vmtime(otime, olbolt, oicr) / 100;
444: pgintime += a;
445: if (a >= 0)
446: vmfltmon(pmon, a, pmonmin, pres, NPMON);
447: #endif
448:
449: skipswap:
450: /*
451: * Fix page table entries.
452: *
453: * Only page requested in is validated, and rest of pages
454: * can be ``reclaimed''. This allows system to reclaim prepaged pages
455: * quickly if they are not used and memory is tight.
456: */
457: pte = vtopte(p, vsave);
458: pte->pg_v = 1;
459: #ifdef REFBIT
460: /*
461: * Start with the page used so that pageout doesn't free it
462: * immediately.
463: */
464: pte->pg_u = 1;
465: #endif
466: distcl(pte);
467: if (type == CTEXT) {
468: if (swerror) {
469: xinval(p->p_textp->x_vptr);
470: } else {
471: distpte(p->p_textp, (unsigned)vtotp(p, vsave), pte);
472: if (opte.pg_fod)
473: p->p_textp->x_flag |= XWRIT;
474: }
475: wakeup((caddr_t)p->p_textp);
476: }
477:
478: /*
479: * Memall returned page(s) locked. Unlock all
480: * pages in cluster. If locking pages for raw i/o
481: * leave the page which was required to be paged in locked,
482: * but still unlock others.
483: * If text pages, hash into the cmap situation table.
484: */
485: pte = vtopte(p, v);
486: for (i = 0; i < klsize; i++) {
487: c = &cmap[pgtocm(pte->pg_pfnum)];
488: c->c_intrans = 0;
489: if (type == CTEXT && c->c_blkno == 0 && bncache && !nohash &&
490: !swerror) {
491: mhash(c, vp, bncache);
492: bncache += btodb(CLBYTES);
493: }
494: if (v != vsave || !dlyu)
495: MUNLOCK(c);
496: if (v != vsave && type != CTEXT && preptofree &&
497: opte.pg_fod == 0) {
498: /*
499: * Throw pre-paged data/stack pages at the
500: * bottom of the free list; leave pg_u clear.
501: */
502: p->p_rssize -= CLSIZE;
503: memfree(pte, CLSIZE, 0);
504: }
505: #ifdef REFBIT
506: /*
507: * Text pages paged-in and allocated during the kluster
508: * must be validated, as they are now in the resident set.
509: */
510: if (v != vsave && type == CTEXT) {
511: pte->pg_v = 1;
512: distpte(p->p_textp, (unsigned)vtotp(p, v), pte);
513: }
514: #endif
515: newptes(pte, v, CLSIZE);
516: v += CLSIZE;
517: pte += CLSIZE;
518: }
519:
520: /*
521: * All done.
522: */
523: p->p_flag &= ~SPAGE;
524:
525: /*
526: * If process is declared fifo, memory is tight,
527: * and this was a data page-in, free memory
528: * klsdist pagein clusters away from the current fault.
529: */
530: if ((p->p_flag&SSEQL) && freemem < lotsfree && type == CDATA) {
531: int k = (vtodp(p, vsave) / CLSIZE) / klseql;
532: #ifdef notdef
533: if (vsave > u.u_vsave)
534: k -= klsdist;
535: else
536: k += klsdist;
537: dpageout(p, k * klseql * CLSIZE, klout*CLSIZE);
538: u.u_vsave = vsave;
539: #else
540: dpageout(p, (k - klsdist) * klseql * CLSIZE, klout*CLSIZE);
541: dpageout(p, (k + klsdist) * klseql * CLSIZE, klout*CLSIZE);
542: #endif
543: }
544: }
545:
546: /*
547: * Take away n pages of data space
548: * starting at data page dp.
549: * Used to take pages away from sequential processes.
550: * Mimics pieces of code in pageout() below.
551: */
552: dpageout(p, dp, n)
553: struct proc *p;
554: int dp, n;
555: {
556: register struct cmap *c;
557: int i, klsize;
558: register struct pte *pte;
559: unsigned v;
560: daddr_t daddr;
561:
562: if (dp < 0) {
563: n += dp;
564: dp = 0;
565: }
566: if (dp + n > p->p_dsize)
567: n = p->p_dsize - dp;
568: for (i = 0; i < n; i += CLSIZE, dp += CLSIZE) {
569: pte = dptopte(p, dp);
570: if (pte->pg_fod || pte->pg_pfnum == 0)
571: continue;
572: c = &cmap[pgtocm(pte->pg_pfnum)];
573: if (c->c_lock || c->c_free)
574: continue;
575: uncachecl(pte);
576: if (pte->pg_v) {
577: pte->pg_v = 0;
578: if (anycl(pte, pg_m))
579: pte->pg_m = 1;
580: distcl(pte);
581: p->p_flag |= SPTECHG;
582: }
583: if (dirtycl(pte)) {
584: if (bswlist.av_forw == NULL)
585: continue;
586: MLOCK(c);
587: pte->pg_m = 0;
588: distcl(pte);
589: p->p_poip++;
590: v = kluster(p, dptov(p, dp), pte, B_WRITE,
591: &klsize, klout, (daddr_t)0);
592: /* THIS ASSUMES THAT p == u.u_procp */
593: daddr = vtod(p, v, &u.u_dmap, &u.u_smap);
594: (void) swap(p, daddr, ptob(v), klsize * ctob(CLSIZE),
595: B_WRITE, B_DIRTY, swapdev_vp, pte->pg_pfnum);
596: } else {
597: if (c->c_gone == 0)
598: p->p_rssize -= CLSIZE;
599: memfree(pte, CLSIZE, 0);
600: cnt.v_seqfree += CLSIZE;
601: }
602: }
603: }
604:
605: unsigned maxdmap;
606: unsigned maxtsize;
607:
608: /*
609: * Setup the paging constants for the clock algorithm.
610: * Called after the system is initialized and the amount of memory
611: * and number of paging devices is known.
612: *
613: * Threshold constants are defined in machine/vmparam.h.
614: */
615: vminit()
616: {
617:
618: /*
619: * Lotsfree is threshold where paging daemon turns on.
620: */
621: if (lotsfree == 0) {
622: lotsfree = LOTSFREE / NBPG;
623: if (lotsfree > LOOPPAGES / LOTSFREEFRACT)
624: lotsfree = LOOPPAGES / LOTSFREEFRACT;
625: }
626: /*
627: * Desfree is amount of memory desired free.
628: * If less than this for extended period, do swapping.
629: */
630: if (desfree == 0) {
631: desfree = DESFREE / NBPG;
632: if (desfree > LOOPPAGES / DESFREEFRACT)
633: desfree = LOOPPAGES / DESFREEFRACT;
634: }
635:
636: /*
637: * Minfree is minimal amount of free memory which is tolerable.
638: */
639: if (minfree == 0) {
640: minfree = MINFREE / NBPG;
641: if (minfree > desfree / MINFREEFRACT)
642: minfree = desfree / MINFREEFRACT;
643: }
644:
645: /*
646: * Maxpgio thresholds how much paging is acceptable.
647: * This figures that 2/3 busy on an arm is all that is
648: * tolerable for paging. We assume one operation per disk rev.
649: */
650: if (maxpgio == 0)
651: maxpgio = (DISKRPM * 2) / 3;
652:
653: /*
654: * Clock to scan using max of ~~10% of processor time for sampling,
655: * this estimated to allow maximum of 200 samples per second.
656: * This yields a ``fastscan'' of roughly (with CLSIZE=2):
657: * <=1m 2m 3m 4m 8m
658: * 5s 10s 15s 20s 40s
659: */
660: if (fastscan == 0)
661: fastscan = 200;
662: if (fastscan > LOOPPAGES / 5)
663: fastscan = LOOPPAGES / 5;
664:
665: /*
666: * Set slow scan time to 1/2 the fast scan time.
667: */
668: if (slowscan == 0)
669: slowscan = fastscan / 2;
670:
671: /*
672: * Calculate the swap allocation constants.
673: */
674: if (dmmin == 0) {
675: dmmin = DMMIN;
676: if (dmmin < CLBYTES/DEV_BSIZE)
677: dmmin = CLBYTES/DEV_BSIZE;
678: }
679: if (dmmax == 0) {
680: dmmax = DMMAX;
681: while (dmapsize(dmmin, dmmax / 2) >= MAXDSIZ && dmmax > dmmin)
682: dmmax /= 2;
683: }
684: maxdmap = dmapsize(dmmin, dmmax);
685: if (dmtext == 0)
686: dmtext = DMTEXT;
687: if (dmtext > dmmax)
688: dmtext = dmmax;
689: if (maxtsize == 0)
690: maxtsize = MAXTSIZ;
691: if (maxtsize > dtob(NXDAD * dmtext))
692: maxtsize = dtob(NXDAD * dmtext);
693:
694: /*
695: * Set up the initial limits on process VM.
696: * Set the maximum resident set size to be all
697: * of (reasonably) available memory. This causes
698: * any single, large process to start random page
699: * replacement once it fills memory.
700: */
701: u.u_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
702: u.u_rlimit[RLIMIT_STACK].rlim_max = MIN(MAXSSIZ, maxdmap);
703: u.u_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
704: u.u_rlimit[RLIMIT_DATA].rlim_max = MIN(MAXDSIZ, maxdmap);
705: u.u_rlimit[RLIMIT_RSS].rlim_cur = u.u_rlimit[RLIMIT_RSS].rlim_max =
706: ctob(LOOPPAGES - desfree);
707: proc[0].p_maxrss = LOOPPAGES - desfree;
708: }
709:
710: dmapsize(dmin, dmax)
711: int dmin, dmax;
712: {
713: register int i, blk, size = 0;
714:
715: blk = dmin;
716: for (i = 0; i < NDMAP; i++) {
717: size += blk;
718: if (blk < dmax)
719: blk *= 2;
720: }
721: return (dtob(size));
722: }
723:
724: int pushes;
725:
726: #define FRONT 1
727: #define BACK 2
728:
729: /*
730: * The page out daemon, which runs as process 2.
731: *
732: * As long as there are at least lotsfree pages,
733: * this process is not run. When the number of free
734: * pages stays in the range desfree to lotsfree,
735: * this daemon runs through the pages in the loop
736: * at a rate determined in vmsched(). Pageout manages
737: * two hands on the clock. The front hand moves through
738: * memory, clearing the valid bit (simulating a reference bit),
739: * and stealing pages from procs that are over maxrss.
740: * The back hand travels a distance behind the front hand,
741: * freeing the pages that have not been referenced in the time
742: * since the front hand passed. If modified, they are pushed to
743: * swap before being freed.
744: */
745: pageout()
746: {
747: register int count;
748: register int maxhand = pgtocm(maxfree);
749: register int fronthand, backhand;
750:
751: /*
752: * Set the two clock hands to be separated by a reasonable amount,
753: * but no more than 360 degrees apart.
754: */
755: backhand = 0 / CLBYTES;
756: fronthand = HANDSPREAD / CLBYTES;
757: if (fronthand >= maxhand)
758: fronthand = maxhand - 1;
759:
760: loop:
761: /*
762: * Before sleeping, look to see if there are any swap I/O headers
763: * in the ``cleaned'' list that correspond to dirty
764: * pages that have been pushed asynchronously. If so,
765: * empty the list by calling cleanup().
766: *
767: * N.B.: We guarantee never to block while the cleaned list is nonempty.
768: */
769: (void) splbio();
770: if (bclnlist != NULL) {
771: (void) spl0();
772: cleanup();
773: goto loop;
774: }
775: sleep((caddr_t)&proc[2], PSWP+1);
776: (void) spl0();
777: count = 0;
778: pushes = 0;
779: while (nscan < desscan && freemem < lotsfree) {
780: /*
781: * If checkpage manages to add a page to the free list,
782: * we give ourselves another couple of trips around the loop.
783: */
784: if (checkpage(fronthand, FRONT))
785: count = 0;
786: if (checkpage(backhand, BACK))
787: count = 0;
788: cnt.v_scan++;
789: nscan++;
790: if (++backhand >= maxhand)
791: backhand = 0;
792: if (++fronthand >= maxhand) {
793: fronthand = 0;
794: cnt.v_rev++;
795: if (count > 2) {
796: /*
797: * Extremely unlikely, but we went around
798: * the loop twice and didn't get anywhere.
799: * Don't cycle, stop till the next clock tick.
800: */
801: goto loop;
802: }
803: count++;
804: }
805: }
806: goto loop;
807: }
808:
809: /*
810: * An iteration of the clock pointer (hand) around the loop.
811: * Look at the page at hand. If it is a
812: * locked (for physical i/o e.g.), system (u., page table)
813: * or free, then leave it alone.
814: * Otherwise, if we are running the front hand,
815: * invalidate the page for simulation of the reference bit.
816: * If the proc is over maxrss, we take it.
817: * If running the back hand, check whether the page
818: * has been reclaimed. If not, free the page,
819: * pushing it to disk first if necessary.
820: */
821: checkpage(hand, whichhand)
822: int hand, whichhand;
823: {
824: register struct proc *rp;
825: register struct text *xp;
826: register struct cmap *c;
827: register struct pte *pte;
828: swblk_t daddr;
829: unsigned v;
830: int klsize;
831:
832: top:
833: /*
834: * Find a process and text pointer for the
835: * page, and a virtual page number in either the
836: * process or the text image.
837: */
838: c = &cmap[hand];
839: if (c->c_lock || c->c_free)
840: return (0);
841: switch (c->c_type) {
842:
843: case CSYS:
844: return (0);
845:
846: case CTEXT:
847: xp = &text[c->c_ndx];
848: rp = xp->x_caddr;
849: v = tptov(rp, c->c_page);
850: pte = tptopte(rp, c->c_page);
851: break;
852:
853: case CDATA:
854: case CSTACK:
855: rp = &proc[c->c_ndx];
856: while (rp->p_flag & SNOVM)
857: rp = rp->p_xlink;
858: xp = rp->p_textp;
859: if (c->c_type == CDATA) {
860: v = dptov(rp, c->c_page);
861: pte = dptopte(rp, c->c_page);
862: } else {
863: v = sptov(rp, c->c_page);
864: pte = sptopte(rp, c->c_page);
865: }
866: break;
867: }
868: if (pte->pg_pfnum != cmtopg(hand))
869: panic("bad c_page");
870: #ifdef REFBIT
871: /*
872: * If any processes attached to the text page have used
873: * it, then mark this one used and on the following
874: * distpte, they will all be marked used.
875: */
876: if (c->c_type == CTEXT && tanyu(xp, vtotp(rp, v)))
877: pte->pg_u = 1;
878: /*
879: * If page is referenced, clear its reference bit.
880: * If page is not referenced, clear valid bit
881: * and add it to the free list.
882: */
883: uncachecl(pte);
884: if (anycl(pte, pg_u))
885: #else
886: /*
887: * If page is valid; make invalid but reclaimable.
888: * If this pte is not valid, then it must be reclaimable
889: * and we can add it to the free list.
890: */
891: if (pte->pg_v)
892: #endif
893: {
894: if (whichhand == BACK)
895: return (0);
896: #ifdef REFBIT
897: pte->pg_u = 0;
898: #else
899: pte->pg_v = 0;
900: rp->p_flag |= SPTECHG;
901: #endif
902: if (anycl(pte, pg_m))
903: pte->pg_m = 1;
904: distcl(pte);
905: if (c->c_type == CTEXT)
906: distpte(xp, (unsigned)vtotp(rp, v), pte);
907: if ((rp->p_flag & (SSEQL|SUANOM)) == 0 &&
908: rp->p_rssize <= rp->p_maxrss)
909: return (0);
910: }
911: if (c->c_type != CTEXT) {
912: /*
913: * Guarantee a minimal investment in data
914: * space for jobs in balance set.
915: */
916: if (rp->p_rssize < saferss - rp->p_slptime)
917: return (0);
918: }
919:
920: /*
921: * If the page is currently dirty, we
922: * have to arrange to have it cleaned before it
923: * can be freed. We mark it clean immediately.
924: * If it is reclaimed while being pushed, then modified
925: * again, we are assured of the correct order of
926: * writes because we lock the page during the write.
927: * This guarantees that a swap() of this process (and
928: * thus this page), initiated in parallel, will,
929: * in fact, push the page after us.
930: *
931: * The most general worst case here would be for
932: * a reclaim, a modify and a swapout to occur
933: * all before the single page transfer completes.
934: */
935: if (dirtycl(pte)) {
936: /*
937: * If the process is being swapped out
938: * or about to exit, do not bother with its
939: * dirty pages
940: */
941: if (rp->p_flag & (SLOCK|SWEXIT))
942: return (0);
943: /*
944: * Limit pushes to avoid saturating
945: * pageout device.
946: */
947: if (pushes > maxpgio / RATETOSCHEDPAGING)
948: return (0);
949: pushes++;
950:
951: /*
952: * Now carefully make sure that there will
953: * be a header available for the push so that
954: * we will not block waiting for a header in
955: * swap(). The reason this is important is
956: * that we (proc[2]) are the one who cleans
957: * dirty swap headers and we could otherwise
958: * deadlock waiting for ourselves to clean
959: * swap headers. The sleep here on &proc[2]
960: * is actually (effectively) a sleep on both
961: * ourselves and &bswlist, and this is known
962: * to swdone and swap in vm_swp.c. That is,
963: * &proc[2] will be awakened both when dirty
964: * headers show up and also to get the pageout
965: * daemon moving.
966: */
967: loop2:
968: (void) splbio();
969: if (bclnlist != NULL) {
970: (void) spl0();
971: cleanup();
972: goto loop2;
973: }
974: if (bswlist.av_forw == NULL) {
975: bswlist.b_flags |= B_WANTED;
976: sleep((caddr_t)&proc[2], PSWP+2);
977: (void) spl0();
978: /*
979: * Page disposition may have changed
980: * since process may have exec'ed,
981: * forked, exited or just about
982: * anything else... try this page
983: * frame again, from the top.
984: */
985: goto top;
986: }
987: (void) spl0();
988:
989: MLOCK(c);
990: uaccess(rp, Pushmap, &pushutl);
991: /*
992: * Now committed to pushing the page...
993: */
994: #ifdef REFBIT
995: pte->pg_v = 0;
996: rp->p_flag |= SPTECHG;
997: #endif
998: pte->pg_m = 0;
999: distcl(pte);
1000: if (c->c_type == CTEXT) {
1001: xp->x_poip++;
1002: distpte(xp, (unsigned)vtotp(rp, v), pte);
1003: } else
1004: rp->p_poip++;
1005: v = kluster(rp, v, pte, B_WRITE, &klsize, klout, (daddr_t)0);
1006: if (klsize == 0)
1007: panic("pageout klsize");
1008: daddr = vtod(rp, v, &pushutl.u_dmap, &pushutl.u_smap);
1009: (void) swap(rp, daddr, ptob(v), klsize * ctob(CLSIZE),
1010: B_WRITE, B_DIRTY, swapdev_vp, pte->pg_pfnum);
1011: /*
1012: * The cleaning of this page will be
1013: * completed later, in cleanup() called
1014: * (synchronously) by us (proc[2]). In
1015: * the meantime, the page frame is locked
1016: * so no havoc can result.
1017: */
1018: return (1); /* well, it'll be free soon */
1019: }
1020: /*
1021: * Propagate valid bit changes.
1022: * Decrement the resident set size of the current
1023: * text object/process, and put the page in the
1024: * free list. Don't detach the page yet;
1025: * it may yet have a chance to be reclaimed from
1026: * the free list.
1027: */
1028: #ifdef REFBIT
1029: pte->pg_v = 0;
1030: distcl(pte);
1031: if (c->c_type == CTEXT)
1032: distpte(xp, (unsigned)vtotp(rp, v), pte);
1033: else
1034: rp->p_flag |= SPTECHG;
1035: #endif
1036: if (c->c_gone == 0)
1037: if (c->c_type == CTEXT)
1038: xp->x_rssize -= CLSIZE;
1039: else
1040: rp->p_rssize -= CLSIZE;
1041: memfree(pte, CLSIZE, 0);
1042: cnt.v_dfree += CLSIZE;
1043: return (1); /* freed a page! */
1044: }
1045:
1046: /*
1047: * Process the ``cleaned'' list.
1048: *
1049: * Scan through the linked list of swap I/O headers
1050: * and free the corresponding pages that have been
1051: * cleaned by being written back to the paging area.
1052: * If the page has been reclaimed during this time,
1053: * we do not free the page. As they are processed,
1054: * the swap I/O headers are removed from the cleaned
1055: * list and inserted into the free list.
1056: */
1057: cleanup()
1058: {
1059: register struct buf *bp;
1060: register struct proc *rp;
1061: register struct text *xp;
1062: register struct cmap *c;
1063: register struct pte *pte;
1064: struct pte *upte;
1065: unsigned pf;
1066: register int i;
1067: int s, center;
1068:
1069: for (;;) {
1070: s = splbio();
1071: if ((bp = bclnlist) == 0)
1072: break;
1073: bclnlist = bp->av_forw;
1074: splx(s);
1075: pte = vtopte(&proc[2], btop(bp->b_un.b_addr));
1076: center = 0;
1077: for (i = 0; i < bp->b_bcount; i += CLSIZE * NBPG) {
1078: pf = pte->pg_pfnum;
1079: c = &cmap[pgtocm(pf)];
1080: MUNLOCK(c);
1081: if (pf != bp->b_pfcent) {
1082: if (c->c_gone) {
1083: memfree(pte, CLSIZE, 0);
1084: cnt.v_dfree += CLSIZE;
1085: }
1086: goto skip;
1087: }
1088: center++;
1089: switch (c->c_type) {
1090:
1091: case CSYS:
1092: panic("cleanup CSYS");
1093:
1094: case CTEXT:
1095: xp = &text[c->c_ndx];
1096: xp->x_poip--;
1097: if (xp->x_poip == 0)
1098: wakeup((caddr_t)&xp->x_poip);
1099: break;
1100:
1101: case CDATA:
1102: case CSTACK:
1103: rp = &proc[c->c_ndx];
1104: while (rp->p_flag & SNOVM)
1105: rp = rp->p_xlink;
1106: rp->p_poip--;
1107: if (rp->p_poip == 0)
1108: wakeup((caddr_t)&rp->p_poip);
1109: break;
1110: }
1111: if (c->c_gone == 0) {
1112: switch (c->c_type) {
1113:
1114: case CTEXT:
1115: upte = tptopte(xp->x_caddr, c->c_page);
1116: break;
1117:
1118: case CDATA:
1119: upte = dptopte(rp, c->c_page);
1120: break;
1121:
1122: case CSTACK:
1123: upte = sptopte(rp, c->c_page);
1124: break;
1125: }
1126: if (upte->pg_v)
1127: goto skip;
1128: if (c->c_type == CTEXT)
1129: xp->x_rssize -= CLSIZE;
1130: else
1131: rp->p_rssize -= CLSIZE;
1132: }
1133: memfree(pte, CLSIZE, 0);
1134: cnt.v_dfree += CLSIZE;
1135: skip:
1136: pte += CLSIZE;
1137: }
1138: if (center != 1)
1139: panic("cleanup center");
1140: bp->b_flags = 0;
1141: bp->av_forw = bswlist.av_forw;
1142: bswlist.av_forw = bp;
1143: if (bp->b_vp)
1144: brelvp(bp);
1145: if (bswlist.b_flags & B_WANTED) {
1146: bswlist.b_flags &= ~B_WANTED;
1147: wakeup((caddr_t)&bswlist);
1148: }
1149: }
1150: splx(s);
1151: }
1152:
1153: /*
1154: * Kluster locates pages adjacent to the argument pages
1155: * that are immediately available to include in the pagein/pageout,
1156: * and given the availability of memory includes them.
1157: * It knows that the process image is contiguous in chunks;
1158: * an assumption here is that CLSIZE * KLMAX is a divisor of dmmin,
1159: * so that by looking at KLMAX chunks of pages, all such will
1160: * necessarily be mapped swap contiguous.
1161: */
1162: int noklust;
1163: int klicnt[KLMAX];
1164: int klocnt[KLMAX];
1165:
1166: kluster(p, v, pte0, rw, pkl, klsize, bn0)
1167: register struct proc *p;
1168: unsigned v;
1169: struct pte *pte0;
1170: int rw;
1171: register int *pkl;
1172: int klsize;
1173: daddr_t bn0;
1174: {
1175: int type, cl, clmax;
1176: int kloff, k, klmax;
1177: register struct pte *pte;
1178: int klback, klforw;
1179: int i;
1180: unsigned v0;
1181: daddr_t bn;
1182: register struct cmap *c;
1183:
1184: if (rw == B_READ)
1185: klicnt[0]++;
1186: else
1187: klocnt[0]++;
1188: *pkl = 1;
1189: if (noklust || klsize <= 1 || klsize > KLMAX || (klsize & (klsize - 1)))
1190: return (v);
1191: if (rw == B_READ && freemem < CLSIZE * KLMAX)
1192: return (v);
1193: if (isassv(p, v)) {
1194: type = CSTACK;
1195: cl = vtosp(p, v) / CLSIZE;
1196: clmax = p->p_ssize / CLSIZE;
1197: } else if (isadsv(p, v)) {
1198: type = CDATA;
1199: cl = vtodp(p, v) / CLSIZE;
1200: clmax = p->p_dsize / CLSIZE;
1201: } else {
1202: type = CTEXT;
1203: cl = vtotp(p, v) / CLSIZE;
1204: clmax = p->p_textp->x_size / CLSIZE;
1205: }
1206: kloff = cl & (klsize - 1);
1207: pte = pte0;
1208: bn = bn0;
1209: for (k = kloff; --k >= 0;) {
1210: if (type == CSTACK)
1211: pte += CLSIZE;
1212: else
1213: pte -= CLSIZE;
1214: if (type == CTEXT && rw == B_READ && bn) {
1215: bn -= btodb(CLBYTES);
1216: if (mfind(swapdev_vp, bn))
1217: break;
1218: }
1219: if (!klok(pte, rw))
1220: break;
1221: }
1222: klback = (kloff - k) - 1;
1223: pte = pte0;
1224: if ((cl - kloff) + klsize > clmax)
1225: klmax = clmax - (cl - kloff);
1226: else
1227: klmax = klsize;
1228: bn = bn0;
1229: for (k = kloff; ++k < klmax;) {
1230: if (type == CSTACK)
1231: pte -= CLSIZE;
1232: else
1233: pte += CLSIZE;
1234: if (type == CTEXT && rw == B_READ && bn) {
1235: bn += btodb(CLBYTES);
1236: if (mfind(swapdev_vp, bn))
1237: break;
1238: }
1239: if (!klok(pte, rw))
1240: break;
1241: }
1242: klforw = (k - kloff) - 1;
1243: if (klforw + klback == 0)
1244: return (v);
1245: pte = pte0;
1246: if (type == CSTACK) {
1247: pte -= klforw * CLSIZE;
1248: v -= klforw * CLSIZE;
1249: } else {
1250: pte -= klback * CLSIZE;
1251: v -= klback * CLSIZE;
1252: }
1253: *pkl = klforw + klback + 1;
1254: if (rw == B_READ)
1255: klicnt[0]--, klicnt[*pkl - 1]++;
1256: else
1257: klocnt[0]--, klocnt[*pkl - 1]++;
1258: v0 = v;
1259: for (i = 0; i < *pkl; i++) {
1260: if (pte == pte0)
1261: goto cont;
1262: if (rw == B_WRITE) {
1263: c = &cmap[pgtocm(pte->pg_pfnum)];
1264: MLOCK(c);
1265: pte->pg_m = 0;
1266: distcl(pte);
1267: if (type == CTEXT)
1268: distpte(p->p_textp, (unsigned)vtotp(p, v), pte);
1269: } else {
1270: struct pte opte;
1271:
1272: opte = *pte;
1273: if (memall(pte, CLSIZE, p, type) == 0)
1274: panic("kluster");
1275: pte->pg_prot = opte.pg_prot;
1276: cmap[pgtocm(pte->pg_pfnum)].c_intrans = 1;
1277: distcl(pte);
1278: if (type == CTEXT) {
1279: p->p_textp->x_rssize += CLSIZE;
1280: distpte(p->p_textp, (unsigned)vtotp(p, v), pte);
1281: } else
1282: p->p_rssize += CLSIZE;
1283: distcl(pte);
1284: }
1285: cont:
1286: pte += CLSIZE;
1287: v += CLSIZE;
1288: }
1289: return (v0);
1290: }
1291:
1292: klok(pte, rw)
1293: register struct pte *pte;
1294: int rw;
1295: {
1296: register struct cmap *c;
1297:
1298: if (rw == B_WRITE) {
1299: if (pte->pg_fod)
1300: return (0);
1301: if (pte->pg_pfnum == 0)
1302: return (0);
1303: c = &cmap[pgtocm(pte->pg_pfnum)];
1304: if (c->c_lock || c->c_intrans)
1305: return (0);
1306: uncachecl(pte);
1307: if (!dirtycl(pte))
1308: return (0);
1309: return (1);
1310: } else {
1311: if (pte->pg_fod)
1312: return (0);
1313: if (pte->pg_pfnum)
1314: return (0);
1315: return (1);
1316: }
1317: }
1318:
1319: /*
1320: * Fodkluster locates pages adjacent to the argument pages
1321: * that are immediately available to include in the pagein,
1322: * and given the availability of memory includes them.
1323: * It wants to page in a file system block if it can.
1324: */
1325: int nofodklust = 0;
1326: int fodklcnt[KLMAX];
1327:
1328: fodkluster(p, v0, pte0, pkl, vp, pbn)
1329: register struct proc *p;
1330: unsigned v0;
1331: struct pte *pte0;
1332: int *pkl;
1333: struct vnode *vp;
1334: daddr_t *pbn;
1335: {
1336: register struct pte *pte;
1337: register struct fpte *fpte;
1338: register daddr_t bn;
1339: daddr_t bnswap;
1340: unsigned v, vmin, vmax;
1341: register int klsize;
1342: int klback, type, i;
1343:
1344: fodklcnt[0]++;
1345: *pkl = 1;
1346: if (freemem < KLMAX || nofodklust)
1347: return (v0);
1348: if (isatsv(p, v0)) {
1349: type = CTEXT;
1350: vmin = tptov(p, 0);
1351: vmax = tptov(p, clrnd(p->p_tsize) - CLSIZE);
1352: } else {
1353: type = CDATA;
1354: vmin = dptov(p, 0);
1355: vmax = dptov(p, clrnd(p->p_dsize) - CLSIZE);
1356: }
1357: fpte = (struct fpte *)pte0;
1358: bn = *pbn;
1359: v = v0;
1360: for (klsize = 1; klsize < KLMAX; klsize++) {
1361: if (v <= vmin)
1362: break;
1363: v -= CLSIZE;
1364: fpte -= CLSIZE;
1365: if (fpte->pg_fod == 0)
1366: break;
1367: bn -= btodb(CLBYTES);
1368: if (fpte->pg_blkno != bn)
1369: break;
1370: if (type == CTEXT) {
1371: if (mfind(vp, bn))
1372: break;
1373: /*
1374: * Flush any previous text page use of this
1375: * swap device block.
1376: */
1377: bnswap = vtod(p, v, &u.u_dmap, &u.u_smap);
1378: munhash(swapdev_vp, bnswap);
1379: }
1380: }
1381: klback = klsize - 1;
1382: fpte = (struct fpte *)pte0;
1383: bn = *pbn;
1384: v = v0;
1385: for (; klsize < KLMAX; klsize++) {
1386: v += CLSIZE;
1387: if (v > vmax)
1388: break;
1389: fpte += CLSIZE;
1390: if (fpte->pg_fod == 0)
1391: break;
1392: bn += btodb(CLBYTES);
1393: if (fpte->pg_blkno != bn)
1394: break;
1395: if (type == CTEXT) {
1396: if (mfind(vp, bn))
1397: break;
1398: /*
1399: * Flush any previous text page use of this
1400: * swap device block.
1401: */
1402: bnswap = vtod(p, v, &u.u_dmap, &u.u_smap);
1403: munhash(swapdev_vp, bnswap);
1404: }
1405: }
1406: if (klsize == 1)
1407: return (v0);
1408: pte = pte0;
1409: pte -= klback * CLSIZE;
1410: v0 -= klback * CLSIZE;
1411: *pbn -= klback * btodb(CLBYTES);
1412: *pkl = klsize;
1413: fodklcnt[0]--; fodklcnt[klsize - 1]++;
1414: v = v0;
1415: for (i = 0; i < klsize; i++) {
1416: if (pte != pte0) {
1417: struct pte opte;
1418: int pf;
1419:
1420: opte = *pte;
1421: if (memall(pte, CLSIZE, p, type) == 0)
1422: panic("fodkluster");
1423: pte->pg_prot = opte.pg_prot;
1424: pf = pte->pg_pfnum;
1425: pte->pg_m = 1;
1426: cmap[pgtocm(pf)].c_intrans = 1;
1427: distcl(pte);
1428: if (type == CTEXT) {
1429: p->p_textp->x_rssize += CLSIZE;
1430: distpte(p->p_textp, (unsigned)vtotp(p, v), pte);
1431: } else
1432: p->p_rssize += CLSIZE;
1433: distcl(pte);
1434: }
1435: pte += CLSIZE;
1436: v += CLSIZE;
1437: }
1438: return (v0);
1439: }
1440:
1441: #ifdef REFBIT
1442: /*
1443: * Examine the reference bits in the pte's of all
1444: * processes linked to a particular text segment.
1445: */
1446: tanyu(xp, tp)
1447: struct text *xp;
1448: register tp;
1449: {
1450: register struct proc *p;
1451: register struct pte *pte;
1452:
1453: for (p = xp->x_caddr; p; p = p->p_xlink) {
1454: pte = tptopte(p, tp);
1455: uncache(pte);
1456: if (anycl(pte, pg_u))
1457: return (1);
1458: }
1459: return (0);
1460: }
1461: #endif
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.