|
|
1.1 ! root 1: /* File-name wildcard pattern matching for GNU. ! 2: Copyright (C) 1985, 1988, 1989, 1991 Free Software Foundation, Inc. ! 3: ! 4: This program is free software; you can redistribute it and/or modify ! 5: it under the terms of the GNU General Public License as published by ! 6: the Free Software Foundation; either version 2 of the License, or ! 7: (at your option) any later version. ! 8: ! 9: This program is distributed in the hope that it will be useful, ! 10: but WITHOUT ANY WARRANTY; without even the implied warranty of ! 11: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ! 12: GNU General Public License for more details. ! 13: ! 14: You should have received a copy of the GNU General Public License ! 15: along with this program; if not, write to the Free Software ! 16: Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ ! 17: ! 18: ! 19: /* To whomever it may concern: I have never seen the code which most ! 20: Unix programs use to perform this function. I wrote this from scratch ! 21: based on specifications for the pattern matching. --RMS. */ ! 22: ! 23: #if defined (SHELL) ! 24: # include <config.h> ! 25: #endif ! 26: ! 27: #if defined (USG) && !defined (Xenix) ! 28: # if !defined (USGr3) && ! defined (USGr4) ! 29: # define USGr3 ! 30: #endif /* USGr3 */ ! 31: #endif /* USG && !Xenix */ ! 32: ! 33: #include <sys/types.h> ! 34: ! 35: #if defined (_POSIX_VERSION) || defined (USGr3) || defined (USGr4) || defined (DIRENT) ! 36: # include <dirent.h> ! 37: # define direct dirent ! 38: # define D_NAMLEN(d) strlen((d)->d_name) ! 39: #else ! 40: # define D_NAMLEN(d) ((d)->d_namlen) ! 41: # if defined (Xenix) ! 42: # include <sys/ndir.h> ! 43: # else ! 44: # if defined (USG) ! 45: # include "ndir.h" ! 46: # else ! 47: # include <sys/dir.h> ! 48: # endif ! 49: # endif ! 50: #endif /* USGr3 || DIRENT. */ ! 51: ! 52: #if defined (_POSIX_SOURCE) ! 53: /* Posix does not require that the d_ino field be present, and some ! 54: systems do not provide it. */ ! 55: #define REAL_DIR_ENTRY(dp) 1 ! 56: #else ! 57: #define REAL_DIR_ENTRY(dp) (dp->d_ino != 0) ! 58: #endif /* _POSIX_SOURCE */ ! 59: ! 60: ! 61: #if defined (NeXT) ! 62: #include <string.h> ! 63: #else ! 64: #if defined (USG) ! 65: #if !defined (isc386) ! 66: # include <memory.h> ! 67: #endif ! 68: #include <string.h> ! 69: #if defined (RISC6000) ! 70: extern void bcopy (); ! 71: #else /* RISC6000 */ ! 72: #define bcopy(s, d, n) ((void) memcpy ((d), (s), (n))) ! 73: #endif /* RISC6000 */ ! 74: #define rindex strrchr ! 75: ! 76: #else /* !USG */ ! 77: #include <strings.h> ! 78: ! 79: extern void bcopy (); ! 80: #endif /* !USG */ ! 81: #endif /* !NeXT */ ! 82: ! 83: /* If the opendir () on your system lets you open non-directory files, ! 84: then we consider that not robust. Define OPENDIR_NOT_ROBUST in the ! 85: SYSDEP_CFLAGS for your machines entry in machines.h. */ ! 86: #if defined (OPENDIR_NOT_ROBUST) ! 87: #if defined (SHELL) ! 88: # include "posixstat.h" ! 89: #else ! 90: # include <sys/stat.h> ! 91: #endif /* SHELL */ ! 92: #endif /* OPENDIR_NOT_ROBUST */ ! 93: ! 94: extern char *malloc (), *realloc (); ! 95: extern void free (); ! 96: ! 97: #ifndef NULL ! 98: #define NULL 0 ! 99: #endif ! 100: ! 101: /* Global variable which controls whether or not * matches .*. ! 102: Non-zero means don't match .*. */ ! 103: int noglob_dot_filenames = 1; ! 104: ! 105: ! 106: static int glob_match_after_star (); ! 107: ! 108: /* Return nonzero if PATTERN has any special globbing chars in it. */ ! 109: int ! 110: glob_pattern_p (pattern) ! 111: char *pattern; ! 112: { ! 113: register char *p = pattern; ! 114: register char c; ! 115: int open = 0; ! 116: ! 117: while ((c = *p++) != '\0') ! 118: switch (c) ! 119: { ! 120: case '?': ! 121: case '*': ! 122: return 1; ! 123: ! 124: case '[': /* Only accept an open brace if there is a close */ ! 125: open++; /* brace to match it. Bracket expressions must be */ ! 126: continue; /* complete, according to Posix.2 */ ! 127: case ']': ! 128: if (open) ! 129: return 1; ! 130: continue; ! 131: ! 132: case '\\': ! 133: if (*p++ == '\0') ! 134: return 0; ! 135: } ! 136: ! 137: return 0; ! 138: } ! 139: ! 140: /* Match the pattern PATTERN against the string TEXT; ! 141: return 1 if it matches, 0 otherwise. ! 142: ! 143: A match means the entire string TEXT is used up in matching. ! 144: ! 145: In the pattern string, `*' matches any sequence of characters, ! 146: `?' matches any character, [SET] matches any character in the specified set, ! 147: [!SET] matches any character not in the specified set. ! 148: ! 149: A set is composed of characters or ranges; a range looks like ! 150: character hyphen character (as in 0-9 or A-Z). ! 151: [0-9a-zA-Z_] is the set of characters allowed in C identifiers. ! 152: Any other character in the pattern must be matched exactly. ! 153: ! 154: To suppress the special syntactic significance of any of `[]*?!-\', ! 155: and match the character exactly, precede it with a `\'. ! 156: ! 157: If DOT_SPECIAL is nonzero, ! 158: `*' and `?' do not match `.' at the beginning of TEXT. */ ! 159: int ! 160: glob_match (pattern, text, dot_special) ! 161: char *pattern, *text; ! 162: int dot_special; ! 163: { ! 164: register char *p = pattern, *t = text; ! 165: register char c; ! 166: ! 167: while ((c = *p++) != '\0') ! 168: switch (c) ! 169: { ! 170: case '?': ! 171: if (*t == '\0' || (dot_special && t == text && *t == '.')) ! 172: return 0; ! 173: else ! 174: ++t; ! 175: break; ! 176: ! 177: case '\\': ! 178: if (*p++ != *t++) ! 179: return 0; ! 180: break; ! 181: ! 182: case '*': ! 183: if (dot_special && t == text && *t == '.') ! 184: return 0; ! 185: return glob_match_after_star (p, t); ! 186: ! 187: case '[': ! 188: { ! 189: register char c1 = *t++; ! 190: int invert; ! 191: ! 192: if (!c1) ! 193: return (0); ! 194: ! 195: invert = ((*p == '!') || (*p == '^')); ! 196: if (invert) ! 197: p++; ! 198: ! 199: c = *p++; ! 200: while (1) ! 201: { ! 202: register char cstart = c, cend = c; ! 203: ! 204: if (c == '\\') ! 205: { ! 206: cstart = *p++; ! 207: cend = cstart; ! 208: } ! 209: ! 210: if (c == '\0') ! 211: return 0; ! 212: ! 213: c = *p++; ! 214: if (c == '-' && *p != ']') ! 215: { ! 216: cend = *p++; ! 217: if (cend == '\\') ! 218: cend = *p++; ! 219: if (cend == '\0') ! 220: return 0; ! 221: c = *p++; ! 222: } ! 223: if (c1 >= cstart && c1 <= cend) ! 224: goto match; ! 225: if (c == ']') ! 226: break; ! 227: } ! 228: if (!invert) ! 229: return 0; ! 230: break; ! 231: ! 232: match: ! 233: /* Skip the rest of the [...] construct that already matched. */ ! 234: while (c != ']') ! 235: { ! 236: if (c == '\0') ! 237: return 0; ! 238: c = *p++; ! 239: if (c == '\0') ! 240: return 0; ! 241: else if (c == '\\') ! 242: ++p; ! 243: } ! 244: if (invert) ! 245: return 0; ! 246: break; ! 247: } ! 248: ! 249: default: ! 250: if (c != *t++) ! 251: return 0; ! 252: } ! 253: ! 254: return *t == '\0'; ! 255: } ! 256: ! 257: /* Like glob_match, but match PATTERN against any final segment of TEXT. */ ! 258: ! 259: static int ! 260: glob_match_after_star (pattern, text) ! 261: char *pattern, *text; ! 262: { ! 263: register char *p = pattern, *t = text; ! 264: register char c, c1; ! 265: ! 266: while ((c = *p++) == '?' || c == '*') ! 267: if (c == '?' && *t++ == '\0') ! 268: return 0; ! 269: ! 270: if (c == '\0') ! 271: return 1; ! 272: ! 273: if (c == '\\') ! 274: c1 = *p; ! 275: else ! 276: c1 = c; ! 277: ! 278: while (1) ! 279: { ! 280: if ((c == '[' || *t == c1) && glob_match (p - 1, t, 0)) ! 281: return 1; ! 282: if (*t++ == '\0') ! 283: return 0; ! 284: } ! 285: } ! 286: ! 287: /* Return a vector of names of files in directory DIR ! 288: whose names match glob pattern PAT. ! 289: The names are not in any particular order. ! 290: Wildcards at the beginning of PAT do not match an initial period. ! 291: ! 292: The vector is terminated by an element that is a null pointer. ! 293: ! 294: To free the space allocated, first free the vector's elements, ! 295: then free the vector. ! 296: ! 297: Return 0 if cannot get enough memory to hold the pointer ! 298: and the names. ! 299: ! 300: Return -1 if cannot access directory DIR. ! 301: Look in errno for more information. */ ! 302: ! 303: char ** ! 304: glob_vector (pat, dir) ! 305: char *pat; ! 306: char *dir; ! 307: { ! 308: struct globval ! 309: { ! 310: struct globval *next; ! 311: char *name; ! 312: }; ! 313: ! 314: DIR *d; ! 315: register struct direct *dp; ! 316: struct globval *lastlink; ! 317: register struct globval *nextlink; ! 318: register char *nextname; ! 319: unsigned int count; ! 320: int lose; ! 321: register char **name_vector; ! 322: register unsigned int i; ! 323: #if defined (OPENDIR_NOT_ROBUST) ! 324: struct stat finfo; ! 325: ! 326: if (stat (dir, &finfo) < 0) ! 327: return ((char **)-1); ! 328: ! 329: if (!S_ISDIR (finfo.st_mode)) ! 330: return ((char **)-1); ! 331: #endif /* OPENDIR_NOT_ROBUST */ ! 332: ! 333: d = opendir (dir); ! 334: if (d == NULL) ! 335: return (char **) -1; ! 336: ! 337: lastlink = 0; ! 338: count = 0; ! 339: lose = 0; ! 340: ! 341: /* Scan the directory, finding all names that match. ! 342: For each name that matches, allocate a struct globval ! 343: on the stack and store the name in it. ! 344: Chain those structs together; lastlink is the front of the chain. */ ! 345: while (1) ! 346: { ! 347: #if defined (SHELL) ! 348: /* Make globbing interruptible in the bash shell. */ ! 349: extern int interrupt_state; ! 350: ! 351: if (interrupt_state) ! 352: { ! 353: closedir (d); ! 354: lose = 1; ! 355: goto lost; ! 356: } ! 357: #endif /* SHELL */ ! 358: ! 359: dp = readdir (d); ! 360: if (dp == NULL) ! 361: break; ! 362: ! 363: /* If this directory entry is not to be used, try again. */ ! 364: if (!REAL_DIR_ENTRY (dp)) ! 365: continue; ! 366: ! 367: /* If a dot must be explicity matched, check to see if they do. */ ! 368: if (noglob_dot_filenames && dp->d_name[0] == '.' && pat[0] != '.') ! 369: continue; ! 370: ! 371: if (glob_match (pat, dp->d_name, noglob_dot_filenames)) ! 372: { ! 373: nextlink = (struct globval *) alloca (sizeof (struct globval)); ! 374: nextlink->next = lastlink; ! 375: nextname = (char *) malloc (strlen(dp->d_name) + 1); ! 376: if (nextname == NULL) ! 377: { ! 378: lose = 1; ! 379: break; ! 380: } ! 381: lastlink = nextlink; ! 382: nextlink->name = nextname; ! 383: bcopy (dp->d_name, nextname, strlen(dp->d_name) + 1); ! 384: ++count; ! 385: } ! 386: } ! 387: (void) closedir (d); ! 388: ! 389: if (!lose) ! 390: { ! 391: name_vector = (char **) malloc ((count + 1) * sizeof (char *)); ! 392: lose |= name_vector == NULL; ! 393: } ! 394: ! 395: /* Have we run out of memory? */ ! 396: lost: ! 397: if (lose) ! 398: { ! 399: /* Here free the strings we have got. */ ! 400: while (lastlink) ! 401: { ! 402: free (lastlink->name); ! 403: lastlink = lastlink->next; ! 404: } ! 405: return NULL; ! 406: } ! 407: ! 408: /* Copy the name pointers from the linked list into the vector. */ ! 409: for (i = 0; i < count; ++i) ! 410: { ! 411: name_vector[i] = lastlink->name; ! 412: lastlink = lastlink->next; ! 413: } ! 414: ! 415: name_vector[count] = NULL; ! 416: return name_vector; ! 417: } ! 418: ! 419: /* Return a new array which is the concatenation ! 420: of each string in ARRAY to DIR. */ ! 421: ! 422: static char ** ! 423: glob_dir_to_array (dir, array) ! 424: char *dir, **array; ! 425: { ! 426: register unsigned int i, l; ! 427: int add_slash; ! 428: char **result; ! 429: ! 430: l = strlen (dir); ! 431: if (l == 0) ! 432: return array; ! 433: ! 434: add_slash = dir[l - 1] != '/'; ! 435: ! 436: i = 0; ! 437: while (array[i] != NULL) ! 438: ++i; ! 439: ! 440: result = (char **) malloc ((i + 1) * sizeof (char *)); ! 441: if (result == NULL) ! 442: return NULL; ! 443: ! 444: for (i = 0; array[i] != NULL; i++) ! 445: { ! 446: result[i] = (char *) malloc (l + (add_slash ? 1 : 0) ! 447: + strlen (array[i]) + 1); ! 448: if (result[i] == NULL) ! 449: return NULL; ! 450: sprintf (result[i], "%s%s%s", dir, add_slash ? "/" : "", array[i]); ! 451: } ! 452: result[i] = NULL; ! 453: ! 454: /* Free the input array. */ ! 455: for (i = 0; array[i] != NULL; i++) ! 456: free (array[i]); ! 457: free ((char *) array); ! 458: ! 459: return result; ! 460: } ! 461: ! 462: /* Do globbing on PATHNAME. Return an array of pathnames that match, ! 463: marking the end of the array with a null-pointer as an element. ! 464: If no pathnames match, then the array is empty (first element is null). ! 465: If there isn't enough memory, then return NULL. ! 466: If a file system error occurs, return -1; `errno' has the error code. */ ! 467: char ** ! 468: glob_filename (pathname) ! 469: char *pathname; ! 470: { ! 471: char **result; ! 472: unsigned int result_size; ! 473: char *directory_name, *filename; ! 474: unsigned int directory_len; ! 475: ! 476: result = (char **) malloc (sizeof (char *)); ! 477: result_size = 1; ! 478: if (result == NULL) ! 479: return NULL; ! 480: ! 481: result[0] = NULL; ! 482: ! 483: /* Find the filename. */ ! 484: filename = rindex (pathname, '/'); ! 485: if (filename == NULL) ! 486: { ! 487: filename = pathname; ! 488: directory_name = ""; ! 489: directory_len = 0; ! 490: } ! 491: else ! 492: { ! 493: directory_len = (filename - pathname) + 1; ! 494: directory_name = (char *) alloca (directory_len + 1); ! 495: ! 496: bcopy (pathname, directory_name, directory_len); ! 497: directory_name[directory_len] = '\0'; ! 498: ++filename; ! 499: } ! 500: ! 501: /* If directory_name contains globbing characters, then we ! 502: have to expand the previous levels. Just recurse. */ ! 503: if (glob_pattern_p (directory_name)) ! 504: { ! 505: char **directories; ! 506: register unsigned int i; ! 507: ! 508: if (directory_name[directory_len - 1] == '/') ! 509: directory_name[directory_len - 1] = '\0'; ! 510: ! 511: directories = glob_filename (directory_name); ! 512: ! 513: if (directories == NULL) ! 514: goto memory_error; ! 515: else if ((int) directories == -1) ! 516: return (char **) -1; ! 517: else if (*directories == NULL) ! 518: { ! 519: free ((char *) directories); ! 520: return (char **) -1; ! 521: } ! 522: ! 523: /* We have successfully globbed the preceding directory name. ! 524: For each name in DIRECTORIES, call glob_vector on it and ! 525: FILENAME. Concatenate the results together. */ ! 526: for (i = 0; directories[i] != NULL; ++i) ! 527: { ! 528: char **temp_results = glob_vector (filename, directories[i]); ! 529: ! 530: /* Handle error cases. */ ! 531: if (temp_results == NULL) ! 532: goto memory_error; ! 533: else if (temp_results == (char **)-1) ! 534: /* This filename is probably not a directory. Ignore it. */ ! 535: ; ! 536: else ! 537: { ! 538: char **array = glob_dir_to_array (directories[i], temp_results); ! 539: register unsigned int l; ! 540: ! 541: l = 0; ! 542: while (array[l] != NULL) ! 543: ++l; ! 544: ! 545: result = ! 546: (char **)realloc (result, (result_size + l) * sizeof (char *)); ! 547: ! 548: if (result == NULL) ! 549: goto memory_error; ! 550: ! 551: for (l = 0; array[l] != NULL; ++l) ! 552: result[result_size++ - 1] = array[l]; ! 553: ! 554: result[result_size - 1] = NULL; ! 555: ! 556: /* Note that the elements of ARRAY are not freed. */ ! 557: free ((char *) array); ! 558: } ! 559: } ! 560: /* Free the directories. */ ! 561: for (i = 0; directories[i]; i++) ! 562: free (directories[i]); ! 563: ! 564: free ((char *) directories); ! 565: ! 566: return result; ! 567: } ! 568: ! 569: /* If there is only a directory name, return it. */ ! 570: if (*filename == '\0') ! 571: { ! 572: result = (char **) realloc ((char *) result, 2 * sizeof (char *)); ! 573: if (result == NULL) ! 574: return NULL; ! 575: result[0] = (char *) malloc (directory_len + 1); ! 576: if (result[0] == NULL) ! 577: goto memory_error; ! 578: bcopy (directory_name, result[0], directory_len + 1); ! 579: result[1] = NULL; ! 580: return result; ! 581: } ! 582: else ! 583: { ! 584: /* Otherwise, just return what glob_vector ! 585: returns appended to the directory name. */ ! 586: char **temp_results = glob_vector (filename, ! 587: (directory_len == 0 ! 588: ? "." : directory_name)); ! 589: ! 590: if (temp_results == NULL || temp_results == (char **)-1) ! 591: return temp_results; ! 592: ! 593: return (glob_dir_to_array (directory_name, temp_results)); ! 594: } ! 595: ! 596: /* We get to memory error if the program has run out of memory, or ! 597: if this is the shell, and we have been interrupted. */ ! 598: memory_error: ! 599: if (result != NULL) ! 600: { ! 601: register unsigned int i; ! 602: for (i = 0; result[i] != NULL; ++i) ! 603: free (result[i]); ! 604: free ((char *) result); ! 605: } ! 606: #if defined (SHELL) ! 607: { ! 608: extern int interrupt_state; ! 609: ! 610: if (interrupt_state) ! 611: throw_to_top_level (); ! 612: } ! 613: #endif /* SHELL */ ! 614: return NULL; ! 615: } ! 616: ! 617: #ifdef TEST ! 618: ! 619: main (argc, argv) ! 620: int argc; ! 621: char **argv; ! 622: { ! 623: unsigned int i; ! 624: ! 625: for (i = 1; i < argc; ++i) ! 626: { ! 627: char **value = glob_filename (argv[i]); ! 628: if (value == NULL) ! 629: puts ("Out of memory."); ! 630: else if ((int) value == -1) ! 631: perror (argv[i]); ! 632: else ! 633: for (i = 0; value[i] != NULL; i++) ! 634: puts (value[i]); ! 635: } ! 636: ! 637: exit (0); ! 638: } ! 639: #endif /* TEST. */
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.