|
|
1.1 root 1: /* File-name wildcard pattern matching for GNU.
2: Copyright (C) 1985, 1988, 1989, 1991 Free Software Foundation, Inc.
3:
4: This program is free software; you can redistribute it and/or modify
5: it under the terms of the GNU General Public License as published by
6: the Free Software Foundation; either version 2 of the License, or
7: (at your option) any later version.
8:
9: This program is distributed in the hope that it will be useful,
10: but WITHOUT ANY WARRANTY; without even the implied warranty of
11: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12: GNU General Public License for more details.
13:
14: You should have received a copy of the GNU General Public License
15: along with this program; if not, write to the Free Software
16: Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
17:
18:
19: /* To whomever it may concern: I have never seen the code which most
20: Unix programs use to perform this function. I wrote this from scratch
21: based on specifications for the pattern matching. --RMS. */
22:
23: #if defined (SHELL)
24: # include <config.h>
25: #endif
26:
27: #if defined (USG) && !defined (Xenix)
28: # if !defined (USGr3) && ! defined (USGr4)
29: # define USGr3
30: #endif /* USGr3 */
31: #endif /* USG && !Xenix */
32:
33: #include <sys/types.h>
34:
35: #if defined (_POSIX_VERSION) || defined (USGr3) || defined (USGr4) || defined (DIRENT)
36: # include <dirent.h>
37: # define direct dirent
38: # define D_NAMLEN(d) strlen((d)->d_name)
39: #else
40: # define D_NAMLEN(d) ((d)->d_namlen)
41: # if defined (Xenix)
42: # include <sys/ndir.h>
43: # else
44: # if defined (USG)
45: # include "ndir.h"
46: # else
47: # include <sys/dir.h>
48: # endif
49: # endif
50: #endif /* USGr3 || DIRENT. */
51:
52: #if defined (_POSIX_SOURCE)
53: /* Posix does not require that the d_ino field be present, and some
54: systems do not provide it. */
55: #define REAL_DIR_ENTRY(dp) 1
56: #else
57: #define REAL_DIR_ENTRY(dp) (dp->d_ino != 0)
58: #endif /* _POSIX_SOURCE */
59:
60:
61: #if defined (NeXT)
62: #include <string.h>
63: #else
64: #if defined (USG)
65: #if !defined (isc386)
66: # include <memory.h>
67: #endif
68: #include <string.h>
69: #if defined (RISC6000)
70: extern void bcopy ();
71: #else /* RISC6000 */
72: #define bcopy(s, d, n) ((void) memcpy ((d), (s), (n)))
73: #endif /* RISC6000 */
74: #define rindex strrchr
75:
76: #else /* !USG */
77: #include <strings.h>
78:
79: extern void bcopy ();
80: #endif /* !USG */
81: #endif /* !NeXT */
82:
83: /* If the opendir () on your system lets you open non-directory files,
84: then we consider that not robust. Define OPENDIR_NOT_ROBUST in the
85: SYSDEP_CFLAGS for your machines entry in machines.h. */
86: #if defined (OPENDIR_NOT_ROBUST)
87: #if defined (SHELL)
88: # include "posixstat.h"
89: #else
90: # include <sys/stat.h>
91: #endif /* SHELL */
92: #endif /* OPENDIR_NOT_ROBUST */
93:
94: extern char *malloc (), *realloc ();
95: extern void free ();
96:
97: #ifndef NULL
98: #define NULL 0
99: #endif
100:
101: /* Global variable which controls whether or not * matches .*.
102: Non-zero means don't match .*. */
103: int noglob_dot_filenames = 1;
104:
105:
106: static int glob_match_after_star ();
107:
108: /* Return nonzero if PATTERN has any special globbing chars in it. */
109: int
110: glob_pattern_p (pattern)
111: char *pattern;
112: {
113: register char *p = pattern;
114: register char c;
115: int open = 0;
116:
117: while ((c = *p++) != '\0')
118: switch (c)
119: {
120: case '?':
121: case '*':
122: return 1;
123:
124: case '[': /* Only accept an open brace if there is a close */
125: open++; /* brace to match it. Bracket expressions must be */
126: continue; /* complete, according to Posix.2 */
127: case ']':
128: if (open)
129: return 1;
130: continue;
131:
132: case '\\':
133: if (*p++ == '\0')
134: return 0;
135: }
136:
137: return 0;
138: }
139:
140: /* Match the pattern PATTERN against the string TEXT;
141: return 1 if it matches, 0 otherwise.
142:
143: A match means the entire string TEXT is used up in matching.
144:
145: In the pattern string, `*' matches any sequence of characters,
146: `?' matches any character, [SET] matches any character in the specified set,
147: [!SET] matches any character not in the specified set.
148:
149: A set is composed of characters or ranges; a range looks like
150: character hyphen character (as in 0-9 or A-Z).
151: [0-9a-zA-Z_] is the set of characters allowed in C identifiers.
152: Any other character in the pattern must be matched exactly.
153:
154: To suppress the special syntactic significance of any of `[]*?!-\',
155: and match the character exactly, precede it with a `\'.
156:
157: If DOT_SPECIAL is nonzero,
158: `*' and `?' do not match `.' at the beginning of TEXT. */
159: int
160: glob_match (pattern, text, dot_special)
161: char *pattern, *text;
162: int dot_special;
163: {
164: register char *p = pattern, *t = text;
165: register char c;
166:
167: while ((c = *p++) != '\0')
168: switch (c)
169: {
170: case '?':
171: if (*t == '\0' || (dot_special && t == text && *t == '.'))
172: return 0;
173: else
174: ++t;
175: break;
176:
177: case '\\':
178: if (*p++ != *t++)
179: return 0;
180: break;
181:
182: case '*':
183: if (dot_special && t == text && *t == '.')
184: return 0;
185: return glob_match_after_star (p, t);
186:
187: case '[':
188: {
189: register char c1 = *t++;
190: int invert;
191:
192: if (!c1)
193: return (0);
194:
195: invert = ((*p == '!') || (*p == '^'));
196: if (invert)
197: p++;
198:
199: c = *p++;
200: while (1)
201: {
202: register char cstart = c, cend = c;
203:
204: if (c == '\\')
205: {
206: cstart = *p++;
207: cend = cstart;
208: }
209:
210: if (c == '\0')
211: return 0;
212:
213: c = *p++;
214: if (c == '-' && *p != ']')
215: {
216: cend = *p++;
217: if (cend == '\\')
218: cend = *p++;
219: if (cend == '\0')
220: return 0;
221: c = *p++;
222: }
223: if (c1 >= cstart && c1 <= cend)
224: goto match;
225: if (c == ']')
226: break;
227: }
228: if (!invert)
229: return 0;
230: break;
231:
232: match:
233: /* Skip the rest of the [...] construct that already matched. */
234: while (c != ']')
235: {
236: if (c == '\0')
237: return 0;
238: c = *p++;
239: if (c == '\0')
240: return 0;
241: else if (c == '\\')
242: ++p;
243: }
244: if (invert)
245: return 0;
246: break;
247: }
248:
249: default:
250: if (c != *t++)
251: return 0;
252: }
253:
254: return *t == '\0';
255: }
256:
257: /* Like glob_match, but match PATTERN against any final segment of TEXT. */
258:
259: static int
260: glob_match_after_star (pattern, text)
261: char *pattern, *text;
262: {
263: register char *p = pattern, *t = text;
264: register char c, c1;
265:
266: while ((c = *p++) == '?' || c == '*')
267: if (c == '?' && *t++ == '\0')
268: return 0;
269:
270: if (c == '\0')
271: return 1;
272:
273: if (c == '\\')
274: c1 = *p;
275: else
276: c1 = c;
277:
278: while (1)
279: {
280: if ((c == '[' || *t == c1) && glob_match (p - 1, t, 0))
281: return 1;
282: if (*t++ == '\0')
283: return 0;
284: }
285: }
286:
287: /* Return a vector of names of files in directory DIR
288: whose names match glob pattern PAT.
289: The names are not in any particular order.
290: Wildcards at the beginning of PAT do not match an initial period.
291:
292: The vector is terminated by an element that is a null pointer.
293:
294: To free the space allocated, first free the vector's elements,
295: then free the vector.
296:
297: Return 0 if cannot get enough memory to hold the pointer
298: and the names.
299:
300: Return -1 if cannot access directory DIR.
301: Look in errno for more information. */
302:
303: char **
304: glob_vector (pat, dir)
305: char *pat;
306: char *dir;
307: {
308: struct globval
309: {
310: struct globval *next;
311: char *name;
312: };
313:
314: DIR *d;
315: register struct direct *dp;
316: struct globval *lastlink;
317: register struct globval *nextlink;
318: register char *nextname;
319: unsigned int count;
320: int lose;
321: register char **name_vector;
322: register unsigned int i;
323: #if defined (OPENDIR_NOT_ROBUST)
324: struct stat finfo;
325:
326: if (stat (dir, &finfo) < 0)
327: return ((char **)-1);
328:
329: if (!S_ISDIR (finfo.st_mode))
330: return ((char **)-1);
331: #endif /* OPENDIR_NOT_ROBUST */
332:
333: d = opendir (dir);
334: if (d == NULL)
335: return (char **) -1;
336:
337: lastlink = 0;
338: count = 0;
339: lose = 0;
340:
341: /* Scan the directory, finding all names that match.
342: For each name that matches, allocate a struct globval
343: on the stack and store the name in it.
344: Chain those structs together; lastlink is the front of the chain. */
345: while (1)
346: {
347: #if defined (SHELL)
348: /* Make globbing interruptible in the bash shell. */
349: extern int interrupt_state;
350:
351: if (interrupt_state)
352: {
353: closedir (d);
354: lose = 1;
355: goto lost;
356: }
357: #endif /* SHELL */
358:
359: dp = readdir (d);
360: if (dp == NULL)
361: break;
362:
363: /* If this directory entry is not to be used, try again. */
364: if (!REAL_DIR_ENTRY (dp))
365: continue;
366:
367: /* If a dot must be explicity matched, check to see if they do. */
368: if (noglob_dot_filenames && dp->d_name[0] == '.' && pat[0] != '.')
369: continue;
370:
371: if (glob_match (pat, dp->d_name, noglob_dot_filenames))
372: {
373: nextlink = (struct globval *) alloca (sizeof (struct globval));
374: nextlink->next = lastlink;
375: nextname = (char *) malloc (strlen(dp->d_name) + 1);
376: if (nextname == NULL)
377: {
378: lose = 1;
379: break;
380: }
381: lastlink = nextlink;
382: nextlink->name = nextname;
383: bcopy (dp->d_name, nextname, strlen(dp->d_name) + 1);
384: ++count;
385: }
386: }
387: (void) closedir (d);
388:
389: if (!lose)
390: {
391: name_vector = (char **) malloc ((count + 1) * sizeof (char *));
392: lose |= name_vector == NULL;
393: }
394:
395: /* Have we run out of memory? */
396: lost:
397: if (lose)
398: {
399: /* Here free the strings we have got. */
400: while (lastlink)
401: {
402: free (lastlink->name);
403: lastlink = lastlink->next;
404: }
405: return NULL;
406: }
407:
408: /* Copy the name pointers from the linked list into the vector. */
409: for (i = 0; i < count; ++i)
410: {
411: name_vector[i] = lastlink->name;
412: lastlink = lastlink->next;
413: }
414:
415: name_vector[count] = NULL;
416: return name_vector;
417: }
418:
419: /* Return a new array which is the concatenation
420: of each string in ARRAY to DIR. */
421:
422: static char **
423: glob_dir_to_array (dir, array)
424: char *dir, **array;
425: {
426: register unsigned int i, l;
427: int add_slash;
428: char **result;
429:
430: l = strlen (dir);
431: if (l == 0)
432: return array;
433:
434: add_slash = dir[l - 1] != '/';
435:
436: i = 0;
437: while (array[i] != NULL)
438: ++i;
439:
440: result = (char **) malloc ((i + 1) * sizeof (char *));
441: if (result == NULL)
442: return NULL;
443:
444: for (i = 0; array[i] != NULL; i++)
445: {
446: result[i] = (char *) malloc (l + (add_slash ? 1 : 0)
447: + strlen (array[i]) + 1);
448: if (result[i] == NULL)
449: return NULL;
450: sprintf (result[i], "%s%s%s", dir, add_slash ? "/" : "", array[i]);
451: }
452: result[i] = NULL;
453:
454: /* Free the input array. */
455: for (i = 0; array[i] != NULL; i++)
456: free (array[i]);
457: free ((char *) array);
458:
459: return result;
460: }
461:
462: /* Do globbing on PATHNAME. Return an array of pathnames that match,
463: marking the end of the array with a null-pointer as an element.
464: If no pathnames match, then the array is empty (first element is null).
465: If there isn't enough memory, then return NULL.
466: If a file system error occurs, return -1; `errno' has the error code. */
467: char **
468: glob_filename (pathname)
469: char *pathname;
470: {
471: char **result;
472: unsigned int result_size;
473: char *directory_name, *filename;
474: unsigned int directory_len;
475:
476: result = (char **) malloc (sizeof (char *));
477: result_size = 1;
478: if (result == NULL)
479: return NULL;
480:
481: result[0] = NULL;
482:
483: /* Find the filename. */
484: filename = rindex (pathname, '/');
485: if (filename == NULL)
486: {
487: filename = pathname;
488: directory_name = "";
489: directory_len = 0;
490: }
491: else
492: {
493: directory_len = (filename - pathname) + 1;
494: directory_name = (char *) alloca (directory_len + 1);
495:
496: bcopy (pathname, directory_name, directory_len);
497: directory_name[directory_len] = '\0';
498: ++filename;
499: }
500:
501: /* If directory_name contains globbing characters, then we
502: have to expand the previous levels. Just recurse. */
503: if (glob_pattern_p (directory_name))
504: {
505: char **directories;
506: register unsigned int i;
507:
508: if (directory_name[directory_len - 1] == '/')
509: directory_name[directory_len - 1] = '\0';
510:
511: directories = glob_filename (directory_name);
512:
513: if (directories == NULL)
514: goto memory_error;
515: else if ((int) directories == -1)
516: return (char **) -1;
517: else if (*directories == NULL)
518: {
519: free ((char *) directories);
520: return (char **) -1;
521: }
522:
523: /* We have successfully globbed the preceding directory name.
524: For each name in DIRECTORIES, call glob_vector on it and
525: FILENAME. Concatenate the results together. */
526: for (i = 0; directories[i] != NULL; ++i)
527: {
528: char **temp_results = glob_vector (filename, directories[i]);
529:
530: /* Handle error cases. */
531: if (temp_results == NULL)
532: goto memory_error;
533: else if (temp_results == (char **)-1)
534: /* This filename is probably not a directory. Ignore it. */
535: ;
536: else
537: {
538: char **array = glob_dir_to_array (directories[i], temp_results);
539: register unsigned int l;
540:
541: l = 0;
542: while (array[l] != NULL)
543: ++l;
544:
545: result =
546: (char **)realloc (result, (result_size + l) * sizeof (char *));
547:
548: if (result == NULL)
549: goto memory_error;
550:
551: for (l = 0; array[l] != NULL; ++l)
552: result[result_size++ - 1] = array[l];
553:
554: result[result_size - 1] = NULL;
555:
556: /* Note that the elements of ARRAY are not freed. */
557: free ((char *) array);
558: }
559: }
560: /* Free the directories. */
561: for (i = 0; directories[i]; i++)
562: free (directories[i]);
563:
564: free ((char *) directories);
565:
566: return result;
567: }
568:
569: /* If there is only a directory name, return it. */
570: if (*filename == '\0')
571: {
572: result = (char **) realloc ((char *) result, 2 * sizeof (char *));
573: if (result == NULL)
574: return NULL;
575: result[0] = (char *) malloc (directory_len + 1);
576: if (result[0] == NULL)
577: goto memory_error;
578: bcopy (directory_name, result[0], directory_len + 1);
579: result[1] = NULL;
580: return result;
581: }
582: else
583: {
584: /* Otherwise, just return what glob_vector
585: returns appended to the directory name. */
586: char **temp_results = glob_vector (filename,
587: (directory_len == 0
588: ? "." : directory_name));
589:
590: if (temp_results == NULL || temp_results == (char **)-1)
591: return temp_results;
592:
593: return (glob_dir_to_array (directory_name, temp_results));
594: }
595:
596: /* We get to memory error if the program has run out of memory, or
597: if this is the shell, and we have been interrupted. */
598: memory_error:
599: if (result != NULL)
600: {
601: register unsigned int i;
602: for (i = 0; result[i] != NULL; ++i)
603: free (result[i]);
604: free ((char *) result);
605: }
606: #if defined (SHELL)
607: {
608: extern int interrupt_state;
609:
610: if (interrupt_state)
611: throw_to_top_level ();
612: }
613: #endif /* SHELL */
614: return NULL;
615: }
616:
617: #ifdef TEST
618:
619: main (argc, argv)
620: int argc;
621: char **argv;
622: {
623: unsigned int i;
624:
625: for (i = 1; i < argc; ++i)
626: {
627: char **value = glob_filename (argv[i]);
628: if (value == NULL)
629: puts ("Out of memory.");
630: else if ((int) value == -1)
631: perror (argv[i]);
632: else
633: for (i = 0; value[i] != NULL; i++)
634: puts (value[i]);
635: }
636:
637: exit (0);
638: }
639: #endif /* TEST. */
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.