|
|
1.1 root 1: /* bio.c 4.2 11/9/80 */
2:
3: #include "../h/param.h"
4: #include "../h/systm.h"
5: #include "../h/dir.h"
6: #include "../h/user.h"
7: #include "../h/buf.h"
8: #include "../h/conf.h"
9: #include "../h/proc.h"
10: #include "../h/seg.h"
11: #include "../h/pte.h"
12: #include "../h/vm.h"
13:
14: /*
15: * The following several routines allocate and free
16: * buffers with various side effects. In general the
17: * arguments to an allocate routine are a device and
18: * a block number, and the value is a pointer to
19: * to the buffer header; the buffer is marked "busy"
20: * so that no one else can touch it. If the block was
21: * already in core, no I/O need be done; if it is
22: * already busy, the process waits until it becomes free.
23: * The following routines allocate a buffer:
24: * getblk
25: * bread
26: * breada
27: * baddr (if it is incore)
28: * Eventually the buffer must be released, possibly with the
29: * side effect of writing it out, by using one of
30: * bwrite
31: * bdwrite
32: * bawrite
33: * brelse
34: */
35:
36: #define BUFHSZ 63
37: #define BUFHASH(blkno) (blkno % BUFHSZ)
38: short bufhash[BUFHSZ];
39:
40: /*
41: * Initialize hash links for buffers.
42: */
43: bhinit()
44: {
45: register int i;
46:
47: for (i = 0; i < BUFHSZ; i++)
48: bufhash[i] = -1;
49: }
50:
51: /* #define DISKMON 1 */
52:
53: #ifdef DISKMON
54: struct {
55: int nbuf;
56: long nread;
57: long nreada;
58: long ncache;
59: long nwrite;
60: long bufcount[NBUF];
61: } io_info;
62: #endif
63:
64: /*
65: * Swap IO headers -
66: * They contain the necessary information for the swap I/O.
67: * At any given time, a swap header can be in three
68: * different lists. When free it is in the free list,
69: * when allocated and the I/O queued, it is on the swap
70: * device list, and finally, if the operation was a dirty
71: * page push, when the I/O completes, it is inserted
72: * in a list of cleaned pages to be processed by the pageout daemon.
73: */
74: struct buf swbuf[NSWBUF];
75: short swsize[NSWBUF]; /* CAN WE JUST USE B_BCOUNT? */
76: int swpf[NSWBUF];
77:
78:
79: #ifdef FASTVAX
80: #define notavail(bp) \
81: { \
82: int s = spl6(); \
83: (bp)->av_back->av_forw = (bp)->av_forw; \
84: (bp)->av_forw->av_back = (bp)->av_back; \
85: (bp)->b_flags |= B_BUSY; \
86: splx(s); \
87: }
88: #endif
89:
90: /*
91: * Read in (if necessary) the block and return a buffer pointer.
92: */
93: struct buf *
94: bread(dev, blkno)
95: dev_t dev;
96: daddr_t blkno;
97: {
98: register struct buf *bp;
99:
100: bp = getblk(dev, blkno);
101: if (bp->b_flags&B_DONE) {
102: #ifdef DISKMON
103: io_info.ncache++;
104: #endif
105: return(bp);
106: }
107: bp->b_flags |= B_READ;
108: bp->b_bcount = BSIZE;
109: (*bdevsw[major(dev)].d_strategy)(bp);
110: #ifdef DISKMON
111: io_info.nread++;
112: #endif
113: u.u_vm.vm_inblk++; /* pay for read */
114: iowait(bp);
115: return(bp);
116: }
117:
118: /*
119: * Read in the block, like bread, but also start I/O on the
120: * read-ahead block (which is not allocated to the caller)
121: */
122: struct buf *
123: breada(dev, blkno, rablkno)
124: dev_t dev;
125: daddr_t blkno, rablkno;
126: {
127: register struct buf *bp, *rabp;
128:
129: bp = NULL;
130: if (!incore(dev, blkno)) {
131: bp = getblk(dev, blkno);
132: if ((bp->b_flags&B_DONE) == 0) {
133: bp->b_flags |= B_READ;
134: bp->b_bcount = BSIZE;
135: (*bdevsw[major(dev)].d_strategy)(bp);
136: #ifdef DISKMON
137: io_info.nread++;
138: #endif
139: u.u_vm.vm_inblk++; /* pay for read */
140: }
141: }
142: if (rablkno && !incore(dev, rablkno)) {
143: rabp = getblk(dev, rablkno);
144: if (rabp->b_flags & B_DONE)
145: brelse(rabp);
146: else {
147: rabp->b_flags |= B_READ|B_ASYNC;
148: rabp->b_bcount = BSIZE;
149: (*bdevsw[major(dev)].d_strategy)(rabp);
150: #ifdef DISKMON
151: io_info.nreada++;
152: #endif
153: u.u_vm.vm_inblk++; /* pay in advance */
154: }
155: }
156: if(bp == NULL)
157: return(bread(dev, blkno));
158: iowait(bp);
159: return(bp);
160: }
161:
162: /*
163: * Write the buffer, waiting for completion.
164: * Then release the buffer.
165: */
166: bwrite(bp)
167: register struct buf *bp;
168: {
169: register flag;
170:
171: flag = bp->b_flags;
172: bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE);
173: bp->b_bcount = BSIZE;
174: #ifdef DISKMON
175: io_info.nwrite++;
176: #endif
177: if ((flag&B_DELWRI) == 0)
178: u.u_vm.vm_oublk++; /* noone paid yet */
179: (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
180: if ((flag&B_ASYNC) == 0) {
181: iowait(bp);
182: brelse(bp);
183: } else if (flag & B_DELWRI)
184: bp->b_flags |= B_AGE;
185: else
186: geterror(bp);
187: }
188:
189: /*
190: * Release the buffer, marking it so that if it is grabbed
191: * for another purpose it will be written out before being
192: * given up (e.g. when writing a partial block where it is
193: * assumed that another write for the same block will soon follow).
194: * This can't be done for magtape, since writes must be done
195: * in the same order as requested.
196: */
197: bdwrite(bp)
198: register struct buf *bp;
199: {
200: register struct buf *dp;
201:
202: if ((bp->b_flags&B_DELWRI) == 0)
203: u.u_vm.vm_oublk++; /* noone paid yet */
204: dp = bdevsw[major(bp->b_dev)].d_tab;
205: if(dp->b_flags & B_TAPE)
206: bawrite(bp);
207: else {
208: bp->b_flags |= B_DELWRI | B_DONE;
209: brelse(bp);
210: }
211: }
212:
213: /*
214: * Release the buffer, start I/O on it, but don't wait for completion.
215: */
216: bawrite(bp)
217: register struct buf *bp;
218: {
219:
220: bp->b_flags |= B_ASYNC;
221: bwrite(bp);
222: }
223:
224: /*
225: * release the buffer, with no I/O implied.
226: */
227: brelse(bp)
228: register struct buf *bp;
229: {
230: register struct buf **backp;
231: register s;
232:
233: if (bp->b_flags&B_WANTED)
234: wakeup((caddr_t)bp);
235: if (bfreelist.b_flags&B_WANTED) {
236: bfreelist.b_flags &= ~B_WANTED;
237: wakeup((caddr_t)&bfreelist);
238: }
239: if ((bp->b_flags&B_ERROR) && bp->b_dev != NODEV) {
240: bunhash(bp);
241: bp->b_dev = NODEV; /* no assoc. on error */
242: }
243: s = spl6();
244: if(bp->b_flags & (B_AGE|B_ERROR)) {
245: backp = &bfreelist.av_forw;
246: (*backp)->av_back = bp;
247: bp->av_forw = *backp;
248: *backp = bp;
249: bp->av_back = &bfreelist;
250: } else {
251: backp = &bfreelist.av_back;
252: (*backp)->av_forw = bp;
253: bp->av_back = *backp;
254: *backp = bp;
255: bp->av_forw = &bfreelist;
256: }
257: bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE);
258: splx(s);
259: }
260:
261: /*
262: * See if the block is associated with some buffer
263: * (mainly to avoid getting hung up on a wait in breada)
264: */
265: incore(dev, blkno)
266: dev_t dev;
267: daddr_t blkno;
268: {
269: register struct buf *bp;
270: register int dblkno = fsbtodb(blkno);
271:
272: for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1];
273: bp = &buf[bp->b_hlink])
274: if (bp->b_blkno == dblkno && bp->b_dev == dev)
275: return (1);
276: return (0);
277: }
278:
279: struct buf *
280: baddr(dev, blkno)
281: dev_t dev;
282: daddr_t blkno;
283: {
284:
285: if (incore(dev, blkno))
286: return (bread(dev, blkno));
287: return (0);
288: }
289:
290: /*
291: * Assign a buffer for the given block. If the appropriate
292: * block is already associated, return it; otherwise search
293: * for the oldest non-busy buffer and reassign it.
294: */
295: struct buf *
296: getblk(dev, blkno)
297: dev_t dev;
298: daddr_t blkno;
299: {
300: register struct buf *bp, *dp, *ep;
301: register int i, x;
302: register int dblkno = fsbtodb(blkno);
303:
304: loop:
305: (void) spl0();
306: for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1];
307: bp = &buf[bp->b_hlink]) {
308: if (bp->b_blkno != dblkno || bp->b_dev != dev)
309: continue;
310: (void) spl6();
311: if (bp->b_flags&B_BUSY) {
312: bp->b_flags |= B_WANTED;
313: sleep((caddr_t)bp, PRIBIO+1);
314: goto loop;
315: }
316: (void) spl0();
317: #ifdef DISKMON
318: i = 0;
319: dp = bp->av_forw;
320: while (dp != &bfreelist) {
321: i++;
322: dp = dp->av_forw;
323: }
324: if (i<NBUF)
325: io_info.bufcount[i]++;
326: #endif
327: notavail(bp);
328: bp->b_flags |= B_CACHE;
329: return(bp);
330: }
331: if (major(dev) >= nblkdev)
332: panic("blkdev");
333: dp = bdevsw[major(dev)].d_tab;
334: if (dp == NULL)
335: panic("devtab");
336: (void) spl6();
337: if (bfreelist.av_forw == &bfreelist) {
338: bfreelist.b_flags |= B_WANTED;
339: sleep((caddr_t)&bfreelist, PRIBIO+1);
340: goto loop;
341: }
342: (void) spl0();
343: bp = bfreelist.av_forw;
344: notavail(bp);
345: if (bp->b_flags & B_DELWRI) {
346: bp->b_flags |= B_ASYNC;
347: bwrite(bp);
348: goto loop;
349: }
350: if (bp->b_dev == NODEV)
351: goto done;
352: /* INLINE EXPANSION OF bunhash(bp) */
353: (void) spl6();
354: i = BUFHASH(dbtofsb(bp->b_blkno));
355: x = bp - buf;
356: if (bufhash[i] == x) {
357: bufhash[i] = bp->b_hlink;
358: } else {
359: for (ep = &buf[bufhash[i]]; ep != &buf[-1];
360: ep = &buf[ep->b_hlink])
361: if (ep->b_hlink == x) {
362: ep->b_hlink = bp->b_hlink;
363: goto done;
364: }
365: panic("getblk");
366: }
367: done:
368: (void) spl0();
369: /* END INLINE EXPANSION */
370: bp->b_flags = B_BUSY;
371: bp->b_back->b_forw = bp->b_forw;
372: bp->b_forw->b_back = bp->b_back;
373: bp->b_forw = dp->b_forw;
374: bp->b_back = dp;
375: dp->b_forw->b_back = bp;
376: dp->b_forw = bp;
377: bp->b_dev = dev;
378: bp->b_blkno = dblkno;
379: i = BUFHASH(blkno);
380: bp->b_hlink = bufhash[i];
381: bufhash[i] = bp - buf;
382: return(bp);
383: }
384:
385: /*
386: * get an empty block,
387: * not assigned to any particular device
388: */
389: struct buf *
390: geteblk()
391: {
392: register struct buf *bp, *dp;
393:
394: loop:
395: (void) spl6();
396: while (bfreelist.av_forw == &bfreelist) {
397: bfreelist.b_flags |= B_WANTED;
398: sleep((caddr_t)&bfreelist, PRIBIO+1);
399: }
400: (void) spl0();
401: dp = &bfreelist;
402: bp = bfreelist.av_forw;
403: notavail(bp);
404: if (bp->b_flags & B_DELWRI) {
405: bp->b_flags |= B_ASYNC;
406: bwrite(bp);
407: goto loop;
408: }
409: if (bp->b_dev != NODEV)
410: bunhash(bp);
411: bp->b_flags = B_BUSY;
412: bp->b_back->b_forw = bp->b_forw;
413: bp->b_forw->b_back = bp->b_back;
414: bp->b_forw = dp->b_forw;
415: bp->b_back = dp;
416: dp->b_forw->b_back = bp;
417: dp->b_forw = bp;
418: bp->b_dev = (dev_t)NODEV;
419: bp->b_hlink = -1;
420: return(bp);
421: }
422:
423: bunhash(bp)
424: register struct buf *bp;
425: {
426: register struct buf *ep;
427: register int i, x, s;
428:
429: if (bp->b_dev == NODEV)
430: return;
431: s = spl6();
432: i = BUFHASH(dbtofsb(bp->b_blkno));
433: x = bp - buf;
434: if (bufhash[i] == x) {
435: bufhash[i] = bp->b_hlink;
436: goto ret;
437: }
438: for (ep = &buf[bufhash[i]]; ep != &buf[-1];
439: ep = &buf[ep->b_hlink])
440: if (ep->b_hlink == x) {
441: ep->b_hlink = bp->b_hlink;
442: goto ret;
443: }
444: panic("bunhash");
445: ret:
446: splx(s);
447: }
448:
449: /*
450: * Wait for I/O completion on the buffer; return errors
451: * to the user.
452: */
453: iowait(bp)
454: register struct buf *bp;
455: {
456:
457: (void) spl6();
458: while ((bp->b_flags&B_DONE)==0)
459: sleep((caddr_t)bp, PRIBIO);
460: (void) spl0();
461: geterror(bp);
462: }
463:
464: #ifndef FASTVAX
465: /*
466: * Unlink a buffer from the available list and mark it busy.
467: * (internal interface)
468: */
469: notavail(bp)
470: register struct buf *bp;
471: {
472: register s;
473:
474: s = spl6();
475: bp->av_back->av_forw = bp->av_forw;
476: bp->av_forw->av_back = bp->av_back;
477: bp->b_flags |= B_BUSY;
478: splx(s);
479: }
480: #endif
481:
482: /*
483: * Mark I/O complete on a buffer. If the header
484: * indicates a dirty page push completion, the
485: * header is inserted into the ``cleaned'' list
486: * to be processed by the pageout daemon. Otherwise
487: * release it if I/O is asynchronous, and wake
488: * up anyone waiting for it.
489: */
490: iodone(bp)
491: register struct buf *bp;
492: {
493: register int s;
494:
495: if (bp->b_flags & B_DONE)
496: panic("dup iodone");
497: bp->b_flags |= B_DONE;
498: if (bp->b_flags & B_DIRTY) {
499: if (bp->b_flags & B_ERROR)
500: panic("IO err in push");
501: s = spl6();
502: cnt.v_pgout++;
503: bp->av_forw = bclnlist;
504: bp->b_bcount = swsize[bp - swbuf];
505: bp->b_pfcent = swpf[bp - swbuf];
506: bclnlist = bp;
507: if (bswlist.b_flags & B_WANTED)
508: wakeup((caddr_t)&proc[2]);
509: splx(s);
510: return;
511: }
512: if (bp->b_flags&B_ASYNC)
513: brelse(bp);
514: else {
515: bp->b_flags &= ~B_WANTED;
516: wakeup((caddr_t)bp);
517: }
518: }
519:
520: /*
521: * Zero the core associated with a buffer.
522: */
523: clrbuf(bp)
524: struct buf *bp;
525: {
526: register *p;
527: register c;
528:
529: p = bp->b_un.b_words;
530: c = BSIZE/sizeof(int);
531: do
532: *p++ = 0;
533: while (--c);
534: bp->b_resid = 0;
535: }
536:
537: /*
538: * swap I/O -
539: *
540: * If the flag indicates a dirty page push initiated
541: * by the pageout daemon, we map the page into the i th
542: * virtual page of process 2 (the daemon itself) where i is
543: * the index of the swap header that has been allocated.
544: * We simply initialize the header and queue the I/O but
545: * do not wait for completion. When the I/O completes,
546: * iodone() will link the header to a list of cleaned
547: * pages to be processed by the pageout daemon.
548: */
549: swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent)
550: struct proc *p;
551: swblk_t dblkno;
552: caddr_t addr;
553: int flag, nbytes;
554: dev_t dev;
555: unsigned pfcent;
556: {
557: register struct buf *bp;
558: register int c;
559: int p2dp;
560: register struct pte *dpte, *vpte;
561:
562: (void) spl6();
563: while (bswlist.av_forw == NULL) {
564: bswlist.b_flags |= B_WANTED;
565: sleep((caddr_t)&bswlist, PSWP+1);
566: }
567: bp = bswlist.av_forw;
568: bswlist.av_forw = bp->av_forw;
569: (void) spl0();
570:
571: bp->b_flags = B_BUSY | B_PHYS | rdflg | flag;
572: if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0)
573: if (rdflg == B_READ)
574: sum.v_pswpin += btoc(nbytes);
575: else
576: sum.v_pswpout += btoc(nbytes);
577: bp->b_proc = p;
578: if (flag & B_DIRTY) {
579: p2dp = ((bp - swbuf) * CLSIZE) * KLMAX;
580: dpte = dptopte(&proc[2], p2dp);
581: vpte = vtopte(p, btop(addr));
582: for (c = 0; c < nbytes; c += NBPG) {
583: if (vpte->pg_pfnum == 0 || vpte->pg_fod)
584: panic("swap bad pte");
585: *dpte++ = *vpte++;
586: }
587: bp->b_un.b_addr = (caddr_t)ctob(p2dp);
588: } else
589: bp->b_un.b_addr = addr;
590: while (nbytes > 0) {
591: c = imin(ctob(120), nbytes);
592: bp->b_bcount = c;
593: bp->b_blkno = dblkno;
594: bp->b_dev = dev;
595: if (flag & B_DIRTY) {
596: swpf[bp - swbuf] = pfcent;
597: swsize[bp - swbuf] = nbytes;
598: }
599: (*bdevsw[major(dev)].d_strategy)(bp);
600: if (flag & B_DIRTY) {
601: if (c < nbytes)
602: panic("big push");
603: return;
604: }
605: (void) spl6();
606: while((bp->b_flags&B_DONE)==0)
607: sleep((caddr_t)bp, PSWP);
608: (void) spl0();
609: bp->b_un.b_addr += c;
610: bp->b_flags &= ~B_DONE;
611: if (bp->b_flags & B_ERROR) {
612: if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE)
613: panic("hard IO err in swap");
614: swkill(p, (char *)0);
615: }
616: nbytes -= c;
617: dblkno += btoc(c);
618: }
619: (void) spl6();
620: bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
621: bp->av_forw = bswlist.av_forw;
622: bswlist.av_forw = bp;
623: if (bswlist.b_flags & B_WANTED) {
624: bswlist.b_flags &= ~B_WANTED;
625: wakeup((caddr_t)&bswlist);
626: wakeup((caddr_t)&proc[2]);
627: }
628: (void) spl0();
629: }
630:
631: /*
632: * If rout == 0 then killed on swap error, else
633: * rout is the name of the routine where we ran out of
634: * swap space.
635: */
636: swkill(p, rout)
637: struct proc *p;
638: char *rout;
639: {
640:
641: printf("%d: ", p->p_pid);
642: if (rout)
643: printf("out of swap space in %s\n", rout);
644: else
645: printf("killed on swap error\n");
646: /*
647: * To be sure no looping (e.g. in vmsched trying to
648: * swap out) mark process locked in core (as though
649: * done by user) after killing it so noone will try
650: * to swap it out.
651: */
652: psignal(p, SIGKILL);
653: p->p_flag |= SULOCK;
654: }
655:
656: /*
657: * make sure all write-behind blocks
658: * on dev (or NODEV for all)
659: * are flushed out.
660: * (from umount and update)
661: */
662: bflush(dev)
663: dev_t dev;
664: {
665: register struct buf *bp;
666:
667: loop:
668: (void) spl6();
669: for (bp = bfreelist.av_forw; bp != &bfreelist; bp = bp->av_forw) {
670: if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) {
671: bp->b_flags |= B_ASYNC;
672: notavail(bp);
673: bwrite(bp);
674: goto loop;
675: }
676: }
677: (void) spl0();
678: }
679:
680: /*
681: * Raw I/O. The arguments are
682: * The strategy routine for the device
683: * A buffer, which will always be a special buffer
684: * header owned exclusively by the device for this purpose
685: * The device number
686: * Read/write flag
687: * Essentially all the work is computing physical addresses and
688: * validating them.
689: * If the user has the proper access privilidges, the process is
690: * marked 'delayed unlock' and the pages involved in the I/O are
691: * faulted and locked. After the completion of the I/O, the above pages
692: * are unlocked.
693: */
694: physio(strat, bp, dev, rw, mincnt)
695: int (*strat)();
696: register struct buf *bp;
697: unsigned (*mincnt)();
698: {
699: register int c;
700: char *a;
701:
702: if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) {
703: u.u_error = EFAULT;
704: return;
705: }
706: (void) spl6();
707: while (bp->b_flags&B_BUSY) {
708: bp->b_flags |= B_WANTED;
709: sleep((caddr_t)bp, PRIBIO+1);
710: }
711: bp->b_error = 0;
712: bp->b_proc = u.u_procp;
713: bp->b_un.b_addr = u.u_base;
714: while (u.u_count != 0 && bp->b_error==0) {
715: bp->b_flags = B_BUSY | B_PHYS | rw;
716: bp->b_dev = dev;
717: bp->b_blkno = u.u_offset >> PGSHIFT;
718: bp->b_bcount = u.u_count;
719: (*mincnt)(bp);
720: c = bp->b_bcount;
721: u.u_procp->p_flag |= SPHYSIO;
722: vslock(a = bp->b_un.b_addr, c);
723: (*strat)(bp);
724: (void) spl6();
725: while ((bp->b_flags&B_DONE) == 0)
726: sleep((caddr_t)bp, PRIBIO);
727: vsunlock(a, c, rw);
728: u.u_procp->p_flag &= ~SPHYSIO;
729: if (bp->b_flags&B_WANTED)
730: wakeup((caddr_t)bp);
731: (void) spl0();
732: bp->b_un.b_addr += c;
733: u.u_count -= c;
734: u.u_offset += c;
735: }
736: bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS);
737: u.u_count = bp->b_resid;
738: geterror(bp);
739: }
740:
741: /*ARGSUSED*/
742: unsigned
743: minphys(bp)
744: struct buf *bp;
745: {
746:
747: if (bp->b_bcount > 60 * 1024)
748: bp->b_bcount = 60 * 1024;
749: }
750:
751: /*
752: * Pick up the device's error number and pass it to the user;
753: * if there is an error but the number is 0 set a generalized
754: * code. Actually the latter is always true because devices
755: * don't yet return specific errors.
756: */
757: geterror(bp)
758: register struct buf *bp;
759: {
760:
761: if (bp->b_flags&B_ERROR)
762: if ((u.u_error = bp->b_error)==0)
763: u.u_error = EIO;
764: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.