|
|
1.1 ! root 1: #include <u.h> ! 2: #include <libc.h> ! 3: #include <ctype.h> ! 4: #ifdef plan9 ! 5: Dir mbuf; ! 6: #else ! 7: #define print printf ! 8: struct stat mbuf; ! 9: #define OREAD 0 ! 10: #endif ! 11: /* ! 12: * file - determine type of file ! 13: */ ! 14: ! 15: uchar buf[6000]; ! 16: short cfreq[140]; ! 17: short wfreq[50]; ! 18: int nbuf; ! 19: int flag; ! 20: int (*call[])(void); ! 21: ! 22: enum ! 23: { ! 24: Cword, ! 25: Fword, ! 26: Aword, ! 27: I1, ! 28: I2, ! 29: I3, ! 30: Clatin = 128, ! 31: Cbinary, ! 32: Cnull, ! 33: Ceascii, ! 34: }; ! 35: struct ! 36: { ! 37: char* word; ! 38: int flag; ! 39: } dict[] = ! 40: { ! 41: "TEXT", Aword, ! 42: "block", Fword, ! 43: "char", Cword, ! 44: "common", Fword, ! 45: "data", Fword, ! 46: "dimension", Fword, ! 47: "double", Cword, ! 48: "extern", Cword, ! 49: "fio", I2, ! 50: "float", Cword, ! 51: "function", Fword, ! 52: "h", I3, ! 53: "include", I1, ! 54: "int", Cword, ! 55: "integer", Fword, ! 56: "libc", I2, ! 57: "long", Cword, ! 58: "real", Fword, ! 59: "register", Cword, ! 60: "short", Cword, ! 61: "static", Cword, ! 62: "stdio", I2, ! 63: "struct", Cword, ! 64: "subroutine", Fword, ! 65: "u", I2, ! 66: "void", Cword, ! 67: }; ! 68: ! 69: enum ! 70: { ! 71: Short = 1<<0, /* size < 100 */ ! 72: Long = 1<<1, ! 73: ! 74: Fascii = 1<<2, /* printable ascii */ ! 75: Flatin = 1<<3, ! 76: Fbinary = 1<<4, ! 77: Feascii = 1<<5, /* including extended */ ! 78: Fnull = 1<<6, ! 79: }; ! 80: ! 81: void type(char*, int); ! 82: long lendian(uchar*); ! 83: ! 84: int ! 85: main(int argc, char *argv[]) ! 86: { ! 87: int i, l; ! 88: ! 89: l = 0; ! 90: for(i=1; i<argc; i++) ! 91: if(strlen(argv[i]) > l) ! 92: l = strlen(argv[i]); ! 93: for(i=1; i<argc; i++) ! 94: type(argv[i], l); ! 95: exit(0); ! 96: } ! 97: ! 98: void ! 99: type(char *file, int nlen) ! 100: { ! 101: int i, f, l, m, c; ! 102: char *p, *ep, word[20]; ! 103: ! 104: print("%s:%*s", file, nlen-strlen(file)+1, ""); ! 105: #ifdef plan9 ! 106: if(dirstat(file, &mbuf) < 0) { ! 107: print("cannot stat\n"); ! 108: return; ! 109: } ! 110: if(mbuf.mode & CHDIR) { ! 111: print("directory\n"); ! 112: return; ! 113: } ! 114: if(mbuf.type != 'M') { ! 115: print("special file #%c\n", mbuf.type); ! 116: return; ! 117: } ! 118: #else ! 119: if(stat(file, &mbuf) < 0) { ! 120: print("cannot stat\n"); ! 121: return; ! 122: } ! 123: switch(mbuf.st_mode&S_IFMT) { ! 124: case S_IFDIR: ! 125: print("directory\n"); ! 126: return; ! 127: case S_IFCHR: ! 128: print("character special file\n"); ! 129: return; ! 130: case S_IFBLK: ! 131: print("block special file\n"); ! 132: return; ! 133: } ! 134: #endif ! 135: ! 136: f = open(file, OREAD); ! 137: if(f < 0) { ! 138: print("cannot open\n"); ! 139: return; ! 140: } ! 141: nbuf = read(f, buf, sizeof(buf)); ! 142: close(f); ! 143: ! 144: if(nbuf < 0) { ! 145: print("cannot read\n"); ! 146: return; ! 147: } ! 148: if(nbuf == 0) { ! 149: print("empty\n"); ! 150: return; ! 151: } ! 152: ! 153: /* ! 154: * build histogram table ! 155: */ ! 156: memset(cfreq, 0, sizeof(cfreq)); ! 157: flag = 0; ! 158: if(nbuf > 100) ! 159: flag |= Long; ! 160: else ! 161: flag |= Short; ! 162: ! 163: for(i=0; i<nbuf; i++) { ! 164: f = buf[i] & 0xff; ! 165: if(f >= 128) { ! 166: if(f >= 128+32) ! 167: f = Clatin; /* latin */ ! 168: else ! 169: f = Cbinary; /* not latin */ ! 170: } else ! 171: if(!isprint(f) && !isspace(f)) ! 172: if(f == 0) ! 173: f = Cnull; ! 174: else ! 175: f = Ceascii; ! 176: cfreq[f]++; ! 177: } ! 178: ! 179: /* ! 180: * gross classify ! 181: */ ! 182: if(cfreq[Cbinary]) ! 183: flag |= Fbinary; ! 184: else ! 185: if(cfreq[Clatin]) ! 186: flag |= Flatin; ! 187: else ! 188: if(cfreq[Ceascii]) ! 189: flag |= Feascii; ! 190: else ! 191: if(cfreq[Cnull]) ! 192: flag |= Fnull; ! 193: else ! 194: flag |= Fascii; ! 195: ! 196: if(flag & Fnull) { ! 197: print("null\n"); ! 198: return; ! 199: } ! 200: ! 201: /* ! 202: * lookup dictionary words ! 203: */ ! 204: memset(wfreq, 0, sizeof(wfreq)); ! 205: if(flag & Fascii) { ! 206: ep = word+sizeof(word)-2; ! 207: for(i=0; i<nbuf; i++) { ! 208: f = buf[i]; ! 209: if(!isalpha(f)) ! 210: continue; ! 211: p = word; ! 212: for(; i<nbuf; i++) { ! 213: f = buf[i]; ! 214: if(!isalnum(f)) ! 215: break; ! 216: *p++ = f; ! 217: if(p >= ep) ! 218: break; ! 219: } ! 220: *p = 0; ! 221: f = 0; ! 222: l = sizeof(dict)/sizeof(dict[0]); ! 223: for(;;) { ! 224: if(f >= l) ! 225: break; ! 226: m = (f+l)/2; ! 227: c = strcmp(dict[m].word, word); ! 228: if(c == 0) { ! 229: wfreq[dict[m].flag]++; ! 230: break; ! 231: } ! 232: if(c < 0) ! 233: f = m+1; ! 234: else ! 235: l = m; ! 236: } ! 237: } ! 238: } ! 239: ! 240: /* ! 241: * call individual classify routines ! 242: */ ! 243: for(i=0; call[i]; i++) ! 244: if((*call[i])()) ! 245: return; ! 246: ! 247: /* ! 248: * if all else fails, ! 249: * print out gross classification ! 250: */ ! 251: if(flag & Short) ! 252: print("short "); ! 253: if(flag & Fascii) ! 254: print("ascii\n"); ! 255: else ! 256: if(flag & Feascii) ! 257: print("extended ascii\n"); ! 258: else ! 259: if(flag & Flatin) ! 260: print("latin ascii\n"); ! 261: else ! 262: print("binary\n"); ! 263: } ! 264: ! 265: long ! 266: lendian(uchar *p) ! 267: { ! 268: ! 269: return (p[0]) | ! 270: (p[1] << 8) | ! 271: (p[2] << 16) | ! 272: (p[3] << 24); ! 273: } ! 274: ! 275: int ! 276: long0(void) ! 277: { ! 278: ! 279: switch((unsigned)lendian(buf)) { ! 280: default: ! 281: return 0; ! 282: ! 283: case 0413: ! 284: print("demand paged "); ! 285: ! 286: case 0410: ! 287: print("pure "); ! 288: goto exec; ! 289: ! 290: case 0406: ! 291: print("mpx 68000 "); ! 292: goto exec; ! 293: ! 294: exec: ! 295: case 0407: ! 296: print("unix vax executable"); ! 297: if(lendian(buf+4) != 0) ! 298: print(" not stripped"); ! 299: print("\n"); ! 300: break; ! 301: ! 302: case 0411: ! 303: print("jfr 411 executable\n"); ! 304: break; ! 305: ! 306: case 0177555: ! 307: print("very old archive\n"); ! 308: break; ! 309: ! 310: case 0177545: ! 311: print("old archive\n"); ! 312: break; ! 313: ! 314: case 0135246: /* andrew/ehg */ ! 315: print("view2d input file\n"); ! 316: break; ! 317: ! 318: case 0135256: /* andrew */ ! 319: print("apl file\n"); ! 320: break; ! 321: ! 322: case 0164200: /* td */ ! 323: print("Lucasfilm picture\n"); ! 324: break; ! 325: ! 326: case 0600560: ! 327: print("mux downloadable file\n"); ! 328: break; ! 329: ! 330: case 0x07010000: ! 331: print("68020 plan9 executable\n"); ! 332: break; ! 333: ! 334: case 0x07040000: ! 335: print("mips plan9 executable\n"); ! 336: break; ! 337: ! 338: case 0x97010000: ! 339: print("hobbit plan9 executable\n"); ! 340: break; ! 341: ! 342: case 0xab020000: ! 343: print("sparc plan9 executable\n"); ! 344: break; ! 345: ! 346: case 0xeb010000: ! 347: print("386 plan9 executable\n"); ! 348: break; ! 349: case 0x0b1f1bdc: ! 350: print("daisy\n"); ! 351: break; ! 352: case 0x64205300: ! 353: print("S data object\n"); ! 354: break; ! 355: } ! 356: return 1; ! 357: } ! 358: ! 359: int ! 360: short0(void) ! 361: { ! 362: ! 363: switch(lendian(buf) & 0xffff) { ! 364: default: ! 365: return 0; ! 366: ! 367: case 070707: ! 368: print("cpio archive\n"); ! 369: break; ! 370: ! 371: case 0x02f7: ! 372: print("tex dvi\n"); ! 373: break; ! 374: ! 375: case 0405: ! 376: case 0407: ! 377: case 0410: ! 378: case 0411: ! 379: print("pdp-11 executable\n"); ! 380: break; ! 381: case 0x0000: ! 382: print("bitmap\n"); ! 383: break; ! 384: } ! 385: return 1; ! 386: } ! 387: ! 388: /* ! 389: * initial words to classify file ! 390: */ ! 391: char* iwords[] = ! 392: { ! 393: "!<arch>\n__.SYMDEF", ! 394: "archive random library", ! 395: "!<arch>\n", ! 396: "archive", ! 397: "070707", ! 398: "cpio archive - ascii header", ! 399: "#FIG", ! 400: "fig ouput", ! 401: "#!/bin/echo", ! 402: "cyntax object file", ! 403: "#!/bin/rc", ! 404: "rc executable file", ! 405: "#!/bin/sh", ! 406: "sh executable file", ! 407: "%!", ! 408: "postscript", ! 409: "@document(", ! 410: "imagen", ! 411: "x T i300", ! 412: "troff output for i300", ! 413: "x T im300", ! 414: "troff output for im300", ! 415: "x T post", ! 416: "troff output for post", ! 417: "x T opost", ! 418: "troff output for opost", ! 419: "x T Latin1", ! 420: "troff output for Latin1", ! 421: "x T 202", ! 422: "troff output for 202", ! 423: "x T aps", ! 424: "troff output for aps", ! 425: 0,0 ! 426: }; ! 427: ! 428: int ! 429: istring(void) ! 430: { ! 431: int i, n; ! 432: char *p; ! 433: ! 434: for(i=0; p=iwords[i]; i+=2) { ! 435: n = strlen(p); ! 436: if(nbuf >= n && !strncmp((char*)buf, p, n)) { ! 437: print("%s\n", iwords[i+1]); ! 438: return 1; ! 439: } ! 440: } ! 441: if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */ ! 442: for(i=5; i<nbuf; i++) ! 443: if(buf[i] == '\n') ! 444: break; ! 445: print("%.*s picture\n", i-5, buf+5); ! 446: return 1; ! 447: } ! 448: return 0; ! 449: } ! 450: ! 451: /* ! 452: * low entropy means encrypted ! 453: */ ! 454: int ! 455: ismung(void) ! 456: { ! 457: int i, bucket[8]; ! 458: float cs; ! 459: ! 460: if(nbuf < 64) ! 461: return 0; ! 462: memset(bucket, 0, sizeof(bucket)); ! 463: for(i=0; i<64; i++) ! 464: bucket[(buf[i]>>5)&07] += 1; ! 465: ! 466: cs = 0.; ! 467: for(i=0; i<8; i++) ! 468: cs += (bucket[i]-8)*(bucket[i]-8); ! 469: cs /= 8.; ! 470: if(cs <= 24.322) { ! 471: if(buf[0]==037 && buf[1]==0235) ! 472: print("compressed\n"); ! 473: else ! 474: print("encrypted\n"); ! 475: return 1; ! 476: } ! 477: return 0; ! 478: } ! 479: ! 480: /* ! 481: * english by punctuation and frequencies ! 482: */ ! 483: int ! 484: isenglish(void) ! 485: { ! 486: int i, vow, comm, rare, badpun, punct; ! 487: char *p; ! 488: ! 489: if(!(flag & (Fascii|Feascii))) ! 490: return 0; ! 491: badpun = 0; ! 492: punct = 0; ! 493: for(i=0; i<nbuf-1; i++) ! 494: switch(buf[i]) { ! 495: case '.': ! 496: case ',': ! 497: case ')': ! 498: case '%': ! 499: case ';': ! 500: case ':': ! 501: case '?': ! 502: punct++; ! 503: if(buf[i+1] != ' ' && buf[i+1] != '\n') ! 504: badpun++; ! 505: } ! 506: if(badpun*5 > punct) ! 507: return 0; ! 508: if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shell file test */ ! 509: return 0; ! 510: if(2*cfreq[';'] > cfreq['e']) ! 511: return 0; ! 512: ! 513: vow = 0; ! 514: for(p="AEIOU"; *p; p++) { ! 515: vow += cfreq[*p]; ! 516: vow += cfreq[tolower(*p)]; ! 517: } ! 518: comm = 0; ! 519: for(p="ETAION"; *p; p++) { ! 520: comm += cfreq[*p]; ! 521: comm += cfreq[tolower(*p)]; ! 522: } ! 523: rare = 0; ! 524: for(p="VJKQXZ"; *p; p++) { ! 525: rare += cfreq[*p]; ! 526: rare += cfreq[tolower(*p)]; ! 527: } ! 528: if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) { ! 529: print("English text\n"); ! 530: return 1; ! 531: } ! 532: return 0; ! 533: } ! 534: ! 535: int ! 536: isc(void) ! 537: { ! 538: int n; ! 539: ! 540: n = wfreq[I1]; ! 541: /* ! 542: * includes ! 543: */ ! 544: if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n) ! 545: goto yes; ! 546: /* ! 547: * declarations ! 548: */ ! 549: if(wfreq[Cword] >= 5 && cfreq[';'] >= 5) ! 550: goto yes; ! 551: /* ! 552: * assignments ! 553: */ ! 554: if(cfreq[';'] >= 10 && cfreq['='] >= 10) ! 555: goto yes; ! 556: return 0; ! 557: ! 558: yes: ! 559: print("c program text\n"); ! 560: return 1; ! 561: } ! 562: ! 563: int ! 564: isas(void) ! 565: { ! 566: ! 567: /* ! 568: * includes ! 569: */ ! 570: if(wfreq[Aword] >= 2) ! 571: goto yes; ! 572: return 0; ! 573: ! 574: yes: ! 575: print("assembler program text\n"); ! 576: return 1; ! 577: } ! 578: ! 579: int ! 580: iscint(void) ! 581: { ! 582: ! 583: if(buf[0] == 0x3a) /* as = ANAME */ ! 584: if(buf[1] == 0x11) /* type = D_FILE */ ! 585: if(buf[2] == 1) /* sym */ ! 586: if(buf[3] == '<') { /* name of file */ ! 587: print("mips .v intermediate\n"); ! 588: return 1; ! 589: } ! 590: ! 591: if(buf[0] == 0x4d) /* aslo = ANAME */ ! 592: if(buf[1] == 0x01) /* ashi = ANAME */ ! 593: if(buf[2] == 0x32) /* type = D_FILE */ ! 594: if(buf[3] == 1) /* sym */ ! 595: if(buf[4] == '<') { /* name of file */ ! 596: print("68020 .2 intermediate\n"); ! 597: return 1; ! 598: } ! 599: ! 600: if(buf[0] == 0x43) /* as = ANAME */ ! 601: if(buf[1] == 0x0d) /* type */ ! 602: if(buf[2] == 1) /* sym */ ! 603: if(buf[3] == '<') { /* name of file */ ! 604: print("hobbit .z intermediate\n"); ! 605: return 1; ! 606: } ! 607: ! 608: if(buf[0] == 0x74) /* as = ANAME */ ! 609: if(buf[1] == 0x10) /* type */ ! 610: if(buf[2] == 1) /* sym */ ! 611: if(buf[3] == '<') { /* name of file */ ! 612: print("sparc .k intermediate\n"); ! 613: return 1; ! 614: } ! 615: ! 616: if(buf[0] == 0x7e) /* aslo = ANAME */ ! 617: if(buf[1] == 0x00) /* ashi = ANAME */ ! 618: if(buf[2] == 0x45) /* type = D_FILE */ ! 619: if(buf[3] == 1) /* sym */ ! 620: if(buf[4] == '<') { /* name of file */ ! 621: print("386 .8 intermediate\n"); ! 622: return 1; ! 623: } ! 624: ! 625: return 0; ! 626: } ! 627: ! 628: /* ! 629: * pick up a number with ! 630: * syntax _*[0-9]+_ ! 631: */ ! 632: #define P9BITLEN 12 ! 633: int ! 634: p9bitnum(uchar *bp) ! 635: { ! 636: int n, c, len; ! 637: ! 638: len = P9BITLEN; ! 639: while(*bp == ' ') { ! 640: bp++; ! 641: len--; ! 642: if(len <= 0) ! 643: return -1; ! 644: } ! 645: n = 0; ! 646: while(len > 1) { ! 647: c = *bp++; ! 648: if(!isdigit(c)) ! 649: return -1; ! 650: n = n*10 + c-'0'; ! 651: len--; ! 652: } ! 653: if(*bp != ' ') ! 654: return -1; ! 655: return n; ! 656: } ! 657: ! 658: int ! 659: isp9bit(void) ! 660: { ! 661: int ldep, lox, loy, hix, hiy; ! 662: long len; ! 663: ! 664: ldep = p9bitnum(buf + 0*P9BITLEN); ! 665: lox = p9bitnum(buf + 1*P9BITLEN); ! 666: loy = p9bitnum(buf + 2*P9BITLEN); ! 667: hix = p9bitnum(buf + 3*P9BITLEN); ! 668: hiy = p9bitnum(buf + 4*P9BITLEN); ! 669: ! 670: if(ldep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0) ! 671: return 0; ! 672: ! 673: len = (hix-lox) * (1<<ldep); /* row length */ ! 674: len = (len + 7) / 8; /* rounded to bytes */ ! 675: len *= (hiy-loy); /* col length */ ! 676: len += 60; /* size of initial ascii */ ! 677: ! 678: /* ! 679: * for regular file length is non-zero and must match calculation above ! 680: * for /dev/window and /dev/screen the length is always zero ! 681: */ ! 682: #ifdef plan9 ! 683: if(mbuf.length != len && mbuf.length != 0) ! 684: #else ! 685: if(mbuf.st_size != len && mbuf.st_size != 0) ! 686: #endif ! 687: return 0; ! 688: print("plan 9 bitmap\n"); ! 689: return 1; ! 690: } ! 691: ! 692: int (*call[])(void) = ! 693: { ! 694: long0, /* recognizable by first 4 bytes */ ! 695: short0, /* recognizable by first 2 bytes */ ! 696: istring, /* recognizable by first string */ ! 697: iscint, /* c intermediate */ ! 698: isc, /* c compiler key words */ ! 699: isas, /* assembler key words */ ! 700: ismung, /* entropy compressed/encrypted */ ! 701: isenglish, /* char frequency English */ ! 702: isp9bit, /* plan 9 bitmap (as from /dev/window) */ ! 703: 0 ! 704: };
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.