/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 75 - (show annotations) (download)
Sat Feb 24 21:40:37 2007 UTC (7 years, 2 months ago) by nigel
File MIME type: text/plain
File size: 17061 byte(s)
Load pcre-5.0 into code/trunk.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2004 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #include <ctype.h>
41 #include <stdio.h>
42 #include <string.h>
43 #include <stdlib.h>
44 #include <errno.h>
45 #include "config.h"
46 #include "pcre.h"
47
48 #define FALSE 0
49 #define TRUE 1
50
51 typedef int BOOL;
52
53 #define VERSION "3.0 14-Jan-2003"
54 #define MAX_PATTERN_COUNT 100
55
56
57 /*************************************************
58 * Global variables *
59 *************************************************/
60
61 static char *pattern_filename = NULL;
62 static int pattern_count = 0;
63 static pcre **pattern_list;
64 static pcre_extra **hints_list;
65
66 static BOOL count_only = FALSE;
67 static BOOL filenames = TRUE;
68 static BOOL filenames_only = FALSE;
69 static BOOL invert = FALSE;
70 static BOOL number = FALSE;
71 static BOOL recurse = FALSE;
72 static BOOL silent = FALSE;
73 static BOOL whole_lines = FALSE;
74
75 /* Structure for options and list of them */
76
77 typedef struct option_item {
78 int one_char;
79 const char *long_name;
80 const char *help_text;
81 } option_item;
82
83 static option_item optionlist[] = {
84 { -1, "help", "display this help and exit" },
85 { 'c', "count", "print only a count of matching lines per FILE" },
86 { 'h', "no-filename", "suppress the prefixing filename on output" },
87 { 'i', "ignore-case", "ignore case distinctions" },
88 { 'l', "files-with-matches", "print only FILE names containing matches" },
89 { 'n', "line-number", "print line number with output lines" },
90 { 'r', "recursive", "recursively scan sub-directories" },
91 { 's', "no-messages", "suppress error messages" },
92 { 'u', "utf-8", "use UTF-8 mode" },
93 { 'V', "version", "print version information and exit" },
94 { 'v', "invert-match", "select non-matching lines" },
95 { 'x', "line-regex", "force PATTERN to match only whole lines" },
96 { 'x', "line-regexp", "force PATTERN to match only whole lines" },
97 { 0, NULL, NULL }
98 };
99
100
101 /*************************************************
102 * Functions for directory scanning *
103 *************************************************/
104
105 /* These functions are defined so that they can be made system specific,
106 although at present the only ones are for Unix, Win32, and for "no directory
107 recursion support". */
108
109
110 /************* Directory scanning in Unix ***********/
111
112 #if IS_UNIX
113 #include <sys/types.h>
114 #include <sys/stat.h>
115 #include <dirent.h>
116
117 typedef DIR directory_type;
118
119 static int
120 isdirectory(char *filename)
121 {
122 struct stat statbuf;
123 if (stat(filename, &statbuf) < 0)
124 return 0; /* In the expectation that opening as a file will fail */
125 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
126 }
127
128 static directory_type *
129 opendirectory(char *filename)
130 {
131 return opendir(filename);
132 }
133
134 static char *
135 readdirectory(directory_type *dir)
136 {
137 for (;;)
138 {
139 struct dirent *dent = readdir(dir);
140 if (dent == NULL) return NULL;
141 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
142 return dent->d_name;
143 }
144 return NULL; /* Keep compiler happy; never executed */
145 }
146
147 static void
148 closedirectory(directory_type *dir)
149 {
150 closedir(dir);
151 }
152
153
154 /************* Directory scanning in Win32 ***********/
155
156 /* I (Philip Hazel) have no means of testing this code. It was contributed by
157 Lionel Fourquaux. */
158
159
160 #elif HAVE_WIN32API
161
162 #ifndef STRICT
163 # define STRICT
164 #endif
165 #ifndef WIN32_LEAN_AND_MEAN
166 # define WIN32_LEAN_AND_MEAN
167 #endif
168 #include <windows.h>
169
170 typedef struct directory_type
171 {
172 HANDLE handle;
173 BOOL first;
174 WIN32_FIND_DATA data;
175 } directory_type;
176
177 int
178 isdirectory(char *filename)
179 {
180 DWORD attr = GetFileAttributes(filename);
181 if (attr == INVALID_FILE_ATTRIBUTES)
182 return 0;
183 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
184 }
185
186 directory_type *
187 opendirectory(char *filename)
188 {
189 size_t len;
190 char *pattern;
191 directory_type *dir;
192 DWORD err;
193 len = strlen(filename);
194 pattern = (char *) malloc(len + 3);
195 dir = (directory_type *) malloc(sizeof(*dir));
196 if ((pattern == NULL) || (dir == NULL))
197 {
198 fprintf(stderr, "pcregrep: malloc failed\n");
199 exit(2);
200 }
201 memcpy(pattern, filename, len);
202 memcpy(&(pattern[len]), "\\*", 3);
203 dir->handle = FindFirstFile(pattern, &(dir->data));
204 if (dir->handle != INVALID_HANDLE_VALUE)
205 {
206 free(pattern);
207 dir->first = TRUE;
208 return dir;
209 }
210 err = GetLastError();
211 free(pattern);
212 free(dir);
213 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
214 return NULL;
215 }
216
217 char *
218 readdirectory(directory_type *dir)
219 {
220 for (;;)
221 {
222 if (!dir->first)
223 {
224 if (!FindNextFile(dir->handle, &(dir->data)))
225 return NULL;
226 }
227 else
228 {
229 dir->first = FALSE;
230 }
231 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
232 return dir->data.cFileName;
233 }
234 #ifndef _MSC_VER
235 return NULL; /* Keep compiler happy; never executed */
236 #endif
237 }
238
239 void
240 closedirectory(directory_type *dir)
241 {
242 FindClose(dir->handle);
243 free(dir);
244 }
245
246
247 /************* Directory scanning when we can't do it ***********/
248
249 /* The type is void, and apart from isdirectory(), the functions do nothing. */
250
251 #else
252
253 typedef void directory_type;
254
255 int isdirectory(char *filename) { return FALSE; }
256 directory_type * opendirectory(char *filename) {}
257 char *readdirectory(directory_type *dir) {}
258 void closedirectory(directory_type *dir) {}
259
260 #endif
261
262
263
264 #if ! HAVE_STRERROR
265 /*************************************************
266 * Provide strerror() for non-ANSI libraries *
267 *************************************************/
268
269 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
270 in their libraries, but can provide the same facility by this simple
271 alternative function. */
272
273 extern int sys_nerr;
274 extern char *sys_errlist[];
275
276 char *
277 strerror(int n)
278 {
279 if (n < 0 || n >= sys_nerr) return "unknown error number";
280 return sys_errlist[n];
281 }
282 #endif /* HAVE_STRERROR */
283
284
285
286 /*************************************************
287 * Grep an individual file *
288 *************************************************/
289
290 static int
291 pcregrep(FILE *in, char *name)
292 {
293 int rc = 1;
294 int linenumber = 0;
295 int count = 0;
296 int offsets[99];
297 char buffer[BUFSIZ];
298
299 while (fgets(buffer, sizeof(buffer), in) != NULL)
300 {
301 BOOL match = FALSE;
302 int i;
303 int length = (int)strlen(buffer);
304 if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;
305 linenumber++;
306
307 for (i = 0; !match && i < pattern_count; i++)
308 {
309 match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,
310 offsets, 99) >= 0;
311 if (match && whole_lines && offsets[1] != length) match = FALSE;
312 }
313
314 if (match != invert)
315 {
316 if (count_only) count++;
317
318 else if (filenames_only)
319 {
320 fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);
321 return 0;
322 }
323
324 else if (silent) return 0;
325
326 else
327 {
328 if (name != NULL) fprintf(stdout, "%s:", name);
329 if (number) fprintf(stdout, "%d:", linenumber);
330 fprintf(stdout, "%s\n", buffer);
331 }
332
333 rc = 0;
334 }
335 }
336
337 if (count_only)
338 {
339 if (name != NULL) fprintf(stdout, "%s:", name);
340 fprintf(stdout, "%d\n", count);
341 }
342
343 return rc;
344 }
345
346
347
348
349 /*************************************************
350 * Grep a file or recurse into a directory *
351 *************************************************/
352
353 static int
354 grep_or_recurse(char *filename, BOOL dir_recurse, BOOL show_filenames,
355 BOOL only_one_at_top)
356 {
357 int rc = 1;
358 int sep;
359 FILE *in;
360
361 /* If the file is a directory and we are recursing, scan each file within it.
362 The scanning code is localized so it can be made system-specific. */
363
364 if ((sep = isdirectory(filename)) != 0 && dir_recurse)
365 {
366 char buffer[1024];
367 char *nextfile;
368 directory_type *dir = opendirectory(filename);
369
370 if (dir == NULL)
371 {
372 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,
373 strerror(errno));
374 return 2;
375 }
376
377 while ((nextfile = readdirectory(dir)) != NULL)
378 {
379 int frc;
380 sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);
381 frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);
382 if (frc == 0 && rc == 1) rc = 0;
383 }
384
385 closedirectory(dir);
386 return rc;
387 }
388
389 /* If the file is not a directory, or we are not recursing, scan it. If this is
390 the first and only argument at top level, we don't show the file name (unless
391 we are only showing the file name). Otherwise, control is via the
392 show_filenames variable. */
393
394 in = fopen(filename, "r");
395 if (in == NULL)
396 {
397 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));
398 return 2;
399 }
400
401 rc = pcregrep(in, (filenames_only || (show_filenames && !only_one_at_top))?
402 filename : NULL);
403 fclose(in);
404 return rc;
405 }
406
407
408
409
410 /*************************************************
411 * Usage function *
412 *************************************************/
413
414 static int
415 usage(int rc)
416 {
417 fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] [pattern] [file1 file2 ...]\n");
418 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
419 return rc;
420 }
421
422
423
424
425 /*************************************************
426 * Help function *
427 *************************************************/
428
429 static void
430 help(void)
431 {
432 option_item *op;
433
434 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
435 printf("Search for PATTERN in each FILE or standard input.\n");
436 printf("PATTERN must be present if -f is not used.\n");
437 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
438
439 printf("Options:\n");
440
441 for (op = optionlist; op->one_char != 0; op++)
442 {
443 int n;
444 char s[4];
445 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
446 printf(" %s --%s%n", s, op->long_name, &n);
447 n = 30 - n;
448 if (n < 1) n = 1;
449 printf("%.*s%s\n", n, " ", op->help_text);
450 }
451
452 printf("\n -f<filename> or --file=<filename>\n");
453 printf(" Read patterns from <filename> instead of using a command line option.\n");
454 printf(" Trailing white space is removed; blanks lines are ignored.\n");
455 printf(" There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
456
457 printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");
458 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
459 }
460
461
462
463
464 /*************************************************
465 * Handle an option *
466 *************************************************/
467
468 static int
469 handle_option(int letter, int options)
470 {
471 switch(letter)
472 {
473 case -1: help(); exit(0);
474 case 'c': count_only = TRUE; break;
475 case 'h': filenames = FALSE; break;
476 case 'i': options |= PCRE_CASELESS; break;
477 case 'l': filenames_only = TRUE;
478 case 'n': number = TRUE; break;
479 case 'r': recurse = TRUE; break;
480 case 's': silent = TRUE; break;
481 case 'u': options |= PCRE_UTF8; break;
482 case 'v': invert = TRUE; break;
483 case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;
484
485 case 'V':
486 fprintf(stderr, "pcregrep version %s using ", VERSION);
487 fprintf(stderr, "PCRE version %s\n", pcre_version());
488 exit(0);
489 break;
490
491 default:
492 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
493 exit(usage(2));
494 }
495
496 return options;
497 }
498
499
500
501
502 /*************************************************
503 * Main program *
504 *************************************************/
505
506 int
507 main(int argc, char **argv)
508 {
509 int i, j;
510 int rc = 1;
511 int options = 0;
512 int errptr;
513 const char *error;
514 BOOL only_one_at_top;
515
516 /* Process the options */
517
518 for (i = 1; i < argc; i++)
519 {
520 if (argv[i][0] != '-') break;
521
522 /* Missing options */
523
524 if (argv[i][1] == 0) exit(usage(2));
525
526 /* Long name options */
527
528 if (argv[i][1] == '-')
529 {
530 option_item *op;
531
532 if (strncmp(argv[i]+2, "file=", 5) == 0)
533 {
534 pattern_filename = argv[i] + 7;
535 continue;
536 }
537
538 for (op = optionlist; op->one_char != 0; op++)
539 {
540 if (strcmp(argv[i]+2, op->long_name) == 0)
541 {
542 options = handle_option(op->one_char, options);
543 break;
544 }
545 }
546 if (op->one_char == 0)
547 {
548 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
549 exit(usage(2));
550 }
551 }
552
553 /* One-char options */
554
555 else
556 {
557 char *s = argv[i] + 1;
558 while (*s != 0)
559 {
560 if (*s == 'f')
561 {
562 pattern_filename = s + 1;
563 if (pattern_filename[0] == 0)
564 {
565 if (i >= argc - 1)
566 {
567 fprintf(stderr, "pcregrep: File name missing after -f\n");
568 exit(usage(2));
569 }
570 pattern_filename = argv[++i];
571 }
572 break;
573 }
574 else options = handle_option(*s++, options);
575 }
576 }
577 }
578
579 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
580 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
581
582 if (pattern_list == NULL || hints_list == NULL)
583 {
584 fprintf(stderr, "pcregrep: malloc failed\n");
585 return 2;
586 }
587
588 /* Compile the regular expression(s). */
589
590 if (pattern_filename != NULL)
591 {
592 FILE *f = fopen(pattern_filename, "r");
593 char buffer[BUFSIZ];
594 if (f == NULL)
595 {
596 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
597 strerror(errno));
598 return 2;
599 }
600 while (fgets(buffer, sizeof(buffer), f) != NULL)
601 {
602 char *s = buffer + (int)strlen(buffer);
603 if (pattern_count >= MAX_PATTERN_COUNT)
604 {
605 fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",
606 MAX_PATTERN_COUNT);
607 return 2;
608 }
609 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
610 if (s == buffer) continue;
611 *s = 0;
612 pattern_list[pattern_count] = pcre_compile(buffer, options, &error,
613 &errptr, NULL);
614 if (pattern_list[pattern_count++] == NULL)
615 {
616 fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",
617 pattern_count, errptr, error);
618 return 2;
619 }
620 }
621 fclose(f);
622 }
623
624 /* If no file name, a single regex must be given inline */
625
626 else
627 {
628 if (i >= argc) return usage(2);
629 pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);
630 if (pattern_list[0] == NULL)
631 {
632 fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,
633 error);
634 return 2;
635 }
636 pattern_count++;
637 }
638
639 /* Study the regular expressions, as we will be running them may times */
640
641 for (j = 0; j < pattern_count; j++)
642 {
643 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
644 if (error != NULL)
645 {
646 char s[16];
647 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
648 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
649 return 2;
650 }
651 }
652
653 /* If there are no further arguments, do the business on stdin and exit */
654
655 if (i >= argc) return pcregrep(stdin, NULL);
656
657 /* Otherwise, work through the remaining arguments as files or directories.
658 Pass in the fact that there is only one argument at top level - this suppresses
659 the file name if the argument is not a directory. */
660
661 only_one_at_top = (i == argc - 1);
662 if (filenames_only) filenames = TRUE;
663
664 for (; i < argc; i++)
665 {
666 int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);
667 if (frc == 0 && rc == 1) rc = 0;
668 }
669
670 return rc;
671 }
672
673 /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12