|
|
1.1 root 1: /*
2: * routines that deal closely with
3: * machine checks and the like
4: * VAX-11/780 version
5: */
6:
7: #include "sys/param.h"
8: #include "sys/systm.h"
9: #include "sys/psl.h"
10: #include "sys/mtpr.h"
11: #include "sys/user.h"
12:
13: /*
14: * SBI-related registers
15: */
16:
17: #define SBIFS 48 /* SBI fault and status */
18: #define SBIMT 51 /* SBI and cache maint */
19: #define SBIER 52 /* SBI error */
20: #define SBITA 53 /* SBI timeout address */
21:
22: #define FSNEF 0x2000000 /* nested error flag in fault status */
23: #define FSIE 0x40000 /* interrupt enable for sbi faults */
24:
25: #define ERCLR 0x70c0 /* latches in error register: */
26: /* ib timeout, ib rds, cp timeout, rds, crd */
27:
28: #define TAMODE 0xc0000000 /* mode of failure for wtmo, 3 == user */
29: #define TAPROT 0x20000000 /* protection checked reference; off -> pt */
30:
31: struct mframe {
32: long count;
33: long type;
34: long ces;
35: long upc;
36: long vaddr;
37: long dr;
38: long tb0;
39: long tb1;
40: long paddr;
41: long par;
42: long sbi;
43: long pc;
44: long psl;
45: };
46:
47: #define NMCK 16 /* number of possible type codes */
48: static char *macher[NMCK] = {
49: "read timeout",
50: "control store parity",
51: "trans buffer parity",
52: "cache parity",
53: "-",
54: "read data substitute",
55: "microcode lost",
56: "-",
57: "-",
58: "-",
59: "ib trans buffer parity",
60: "-",
61: "ib read data substitute",
62: "ib read timeout",
63: "-",
64: "ib cache parity",
65: };
66:
67: int mchkcnt[NMCK]; /* number of mchecks of each type */
68: int mchktime[NMCK]; /* time of last mcheck */
69:
70: /*
71: * table of resumable instructions
72: * a table of bits, indexed by opcode
73: * this is taken straight from vms;
74: * don't ask me to justify choices
75: */
76:
77: static char mrestab[256/8] = {
78: 0x3b, /* rei ldpctx svpctx */
79: 0x3c, /* insque remque cvtps cvtsp */
80: 0xff,
81: 0xff,
82: 0x0, /* packed decimal stuff */
83: 0xff,
84: 0xff,
85: 0xfe, /* editpc */
86: 0xff,
87: 0xff,
88: 0x2f, /* emodf cvtfd adawi */
89: 0x0, /* more interlocked instructions */
90: 0x0, /* double precision floating point */
91: 0x0f, /* more double stuff */
92: 0x4a, /* more double/quad */
93: 0xc1, /* .. */
94: 0xff,
95: 0xff,
96: 0xff,
97: 0xff,
98: 0xff,
99: 0xff,
100: 0xff,
101: 0xf3, /* pushr popr */
102: 0xff,
103: 0xff,
104: 0xff,
105: 0xf4, /* adwc sbwc mfpr */
106: 0x3f, /* bbssi bbcci */
107: 0xff,
108: 0xff,
109: 0x0, /* ashp cvtlp callg calls xfc reserved */
110: };
111:
112: /*
113: * bits in machine check type code
114: */
115:
116: #define TYPE 0xf /* the real type code */
117: #define IB 0x8 /* set if detected by instruction prefetch */
118: #define ABORT 0xf0 /* this was an abort */
119: #define TIMEOUT 0x100 /* timeout pending */
120:
121: /*
122: * types
123: */
124:
125: #define CPTIMEOUT 0 /* read timeout */
126: #define CSPAR 1 /* control store parity */
127: #define TBUFPAR 2 /* translation buffer parity */
128: #define CACHEPAR 3 /* cache parity */
129: #define RDSUBST 5 /* read data substitute */
130: #define IBROMCHK 6 /* confused microcode */
131: #define IBTBUF 10 /* ib-detected translation buffer parity */
132: #define IBRDSUB 12 /* ib-detected read substitute */
133: #define IBTIMEOUT 13 /* ib-detected read timeout */
134: #define IBCACHE 15 /* ib-detected cache parity */
135:
136: /*
137: * bits in cache error register
138: */
139:
140: #define G0OK 0x3f8 /* `ok' bits for group 0 */
141:
142: /*
143: * bits in sbi maint register
144: */
145:
146: #define G1REPL 0x2000 /* force replace in group 1 */
147: #define G0REPL 0x4000 /* force replace in group 0 */
148: #define G1MISS 0x8000 /* force miss in group 1 */
149: #define G0MISS 0x10000 /* force miss in group 0 */
150: #define SBIINV 0x200000 /* enable sbi invalidate */
151:
152: #define CACHEOFF (G0REPL|G1MISS|G0MISS|SBIINV)
153:
154: static int mchkkill; /* number of times we've recovered by killing */
155: static int mchklock; /* reentrancy lock */
156: static int mchkcache = SBIINV; /* current state of cache disable bits */
157:
158: /*
159: * sbi fault
160: * yell and hope for the best
161: */
162:
163: sbifault()
164: {
165: register int fs, er;
166:
167: fs = mfpr(SBIFS);
168: er = mfpr(SBIER);
169: machreset();
170: printf("sbi fault: fs %x er %x\n", fs, er);
171: }
172:
173: /*
174: * SBI write timeout
175: * no recovery;
176: * panic if kernel or if updating page table
177: * else just signal
178: */
179:
180: machwtmo()
181: {
182: register int fs, er, ta;
183:
184: fs = mfpr(SBIFS);
185: er = mfpr(SBIER);
186: ta = mfpr(SBITA);
187: machreset();
188: printf("sbi write timeout: fs %x er %x ta %x\n", fs, er, ta);
189: if ((ta & (TAMODE | TAPROT)) != (TAMODE | TAPROT))
190: panic("wtmo");
191: runrun++;
192: aston();
193: psignal(u.u_procp, SIGBUS);
194: }
195:
196: /*
197: * clear latches in sbi error registers
198: */
199:
200: machreset()
201: {
202: mtpr(SBIFS, (mfpr(SBIFS) &~ FSNEF)|FSIE);
203: mtpr(SBIER, mfpr(SBIER) | ERCLR);
204: }
205:
206: /*
207: * Machine check.
208: * If possible, recover and return;
209: * if not but in user mode, send a signal;
210: * if not and in kernel mode, panic.
211: */
212:
213: machinecheck(ps, f)
214: long ps;
215: struct mframe *f;
216: {
217: int ok;
218:
219: ok = mckrec(f);
220: machreset();
221: printf("\nMachine check, type %x\n", f->type);
222: if ((f->type & ABORT) == 0)
223: printf("%s fault\n", macher[f->type & TYPE]);
224: else
225: printf("%s abort\n", macher[f->type & TYPE]);
226: printf("pc %x psl %x\n", f->pc, f->psl);
227: printf("v/p addr %x/%x\n", f->vaddr, f->paddr<<2);
228: printf("ces %x sbi %x\n", f->ces, f->sbi);
229: if (ok)
230: return;
231: if (USERMODE(ps)) {
232: /*
233: * code stolen from setrun
234: */
235: runrun++;
236: aston();
237: psignal(u.u_procp, SIGBUS);
238: return;
239: }
240: panic("mchk");
241: }
242:
243: /*
244: * here to look at machine check type
245: * and arrange for recovery if possible
246: * cache is still turned off when we start
247: * return nonzero if we recovered
248: */
249:
250: int
251: mckrec(f)
252: register struct mframe *f;
253: {
254: register int x;
255:
256: mchkcnt[f->type & TYPE]++;
257: switch (f->type & TYPE) {
258: case TBUFPAR: /* trans buffer parity */
259: case IBTBUF:
260: mtpr(TBIA, 0); /* clear the buffer */
261: mtpr(SBIMT, mchkcache);
262: break;
263:
264: case IBROMCHK: /* unexpected microcode confusion */
265: case CSPAR: /* control store parity */
266: mtpr(SBIMT, mchkcache);
267: return (0); /* can never recover */
268:
269: case CACHEPAR: /* cache parity error */
270: case IBCACHE:
271: /*
272: * force bad stuff to be replaced
273: * in both cache groups
274: */
275: x = *(char *)f->vaddr;
276: mtpr(SBIMT, CACHEOFF);
277: x = 0; /* defeat optimizer */
278: x = *(char *)f->vaddr;
279: if ((f->par & G0OK) != G0OK) {
280: printf("group 0 off\n");
281: mchkcache |= G0MISS | G0REPL;
282: mchkcache &=~ G1REPL; /* don't replace both groups */
283: }
284: else {
285: printf("group 1 off\n");
286: mchkcache |= G1MISS | G1REPL;
287: mchkcache &=~ G0REPL;
288: }
289: mtpr(SBIMT, mchkcache);
290: break;
291:
292: case CPTIMEOUT:
293: case IBTIMEOUT:
294: mtpr(SBIMT, mchkcache);
295: break;
296:
297: case RDSUBST: /* read data substitute == hard mem err */
298: case IBRDSUB:
299: mtpr(SBIMT, mchkcache);
300: memerr();
301: return (0);
302:
303: default:
304: mtpr(SBIMT, mchkcache);
305: panic("weird mcheck");
306: }
307: /*
308: * if we get here, it's potentially recoverable
309: * may recover if:
310: * haven't had one in the last second
311: * not an abort or timeout
312: * detected by lookahead, or resumable instruction
313: */
314: if (mchktime[f->type & TYPE] == time)
315: return (0);
316: mchktime[f->type & TYPE] = time;
317: if (f->type & (ABORT | TIMEOUT))
318: return (0);
319: if (f->type & IB)
320: return (1);
321: x = *(char *)f->pc & 0377;
322: if (mrestab[x >> 3] & (1 << (x & 07)))
323: return (1);
324: return (0);
325: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.