| 6 |
its pattern matching. On a Unix or Win32 system it can recurse into |
its pattern matching. On a Unix or Win32 system it can recurse into |
| 7 |
directories. |
directories. |
| 8 |
|
|
| 9 |
Copyright (c) 1997-2005 University of Cambridge |
Copyright (c) 1997-2006 University of Cambridge |
| 10 |
|
|
| 11 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
| 12 |
Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
| 38 |
*/ |
*/ |
| 39 |
|
|
| 40 |
#include <ctype.h> |
#include <ctype.h> |
| 41 |
|
#include <locale.h> |
| 42 |
#include <stdio.h> |
#include <stdio.h> |
| 43 |
#include <string.h> |
#include <string.h> |
| 44 |
#include <stdlib.h> |
#include <stdlib.h> |
| 56 |
|
|
| 57 |
typedef int BOOL; |
typedef int BOOL; |
| 58 |
|
|
| 59 |
#define VERSION "4.1 05-Sep-2005" |
#define VERSION "4.2 09-Jan-2006" |
| 60 |
#define MAX_PATTERN_COUNT 100 |
#define MAX_PATTERN_COUNT 100 |
| 61 |
|
|
| 62 |
#if BUFSIZ > 8192 |
#if BUFSIZ > 8192 |
| 66 |
#endif |
#endif |
| 67 |
|
|
| 68 |
|
|
| 69 |
|
/* Values for the "filenames" variable, which specifies options for file name |
| 70 |
|
output. The order is important; it is assumed that a file name is wanted for |
| 71 |
|
all values greater than FN_DEFAULT. */ |
| 72 |
|
|
| 73 |
|
enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE }; |
| 74 |
|
|
| 75 |
|
/* Actions for the -d and -D options */ |
| 76 |
|
|
| 77 |
|
enum { dee_READ, dee_SKIP, dee_RECURSE }; |
| 78 |
|
enum { DEE_READ, DEE_SKIP }; |
| 79 |
|
|
| 80 |
|
/* Actions for special processing options (flag bits) */ |
| 81 |
|
|
| 82 |
|
#define PO_WORD_MATCH 0x0001 |
| 83 |
|
#define PO_LINE_MATCH 0x0002 |
| 84 |
|
#define PO_FIXED_STRINGS 0x0004 |
| 85 |
|
|
| 86 |
|
|
| 87 |
|
|
| 88 |
/************************************************* |
/************************************************* |
| 89 |
* Global variables * |
* Global variables * |
| 90 |
*************************************************/ |
*************************************************/ |
| 91 |
|
|
| 92 |
|
/* Jeffrey Friedl has some debugging requirements that are not part of the |
| 93 |
|
regular code. */ |
| 94 |
|
|
| 95 |
|
#ifdef JFRIEDL_DEBUG |
| 96 |
|
static int S_arg = -1; |
| 97 |
|
#endif |
| 98 |
|
|
| 99 |
|
static char *colour_string = (char *)"1;31"; |
| 100 |
|
static char *colour_option = NULL; |
| 101 |
|
static char *dee_option = NULL; |
| 102 |
|
static char *DEE_option = NULL; |
| 103 |
static char *pattern_filename = NULL; |
static char *pattern_filename = NULL; |
| 104 |
static char *stdin_name = (char *)"(standard input)"; |
static char *stdin_name = (char *)"(standard input)"; |
| 105 |
|
static char *locale = NULL; |
| 106 |
|
|
| 107 |
|
static const unsigned char *pcretables = NULL; |
| 108 |
|
|
| 109 |
static int pattern_count = 0; |
static int pattern_count = 0; |
| 110 |
static pcre **pattern_list; |
static pcre **pattern_list; |
| 111 |
static pcre_extra **hints_list; |
static pcre_extra **hints_list; |
| 119 |
static int after_context = 0; |
static int after_context = 0; |
| 120 |
static int before_context = 0; |
static int before_context = 0; |
| 121 |
static int both_context = 0; |
static int both_context = 0; |
| 122 |
|
static int dee_action = dee_READ; |
| 123 |
|
static int DEE_action = DEE_READ; |
| 124 |
|
static int error_count = 0; |
| 125 |
|
static int filenames = FN_DEFAULT; |
| 126 |
|
static int process_options = 0; |
| 127 |
|
|
| 128 |
static BOOL count_only = FALSE; |
static BOOL count_only = FALSE; |
| 129 |
static BOOL filenames = TRUE; |
static BOOL do_colour = FALSE; |
|
static BOOL filenames_only = FALSE; |
|
|
static BOOL filenames_nomatch_only = FALSE; |
|
| 130 |
static BOOL hyphenpending = FALSE; |
static BOOL hyphenpending = FALSE; |
| 131 |
static BOOL invert = FALSE; |
static BOOL invert = FALSE; |
| 132 |
static BOOL multiline = FALSE; |
static BOOL multiline = FALSE; |
| 133 |
static BOOL number = FALSE; |
static BOOL number = FALSE; |
| 134 |
|
static BOOL only_matching = FALSE; |
| 135 |
static BOOL quiet = FALSE; |
static BOOL quiet = FALSE; |
|
static BOOL recurse = FALSE; |
|
| 136 |
static BOOL silent = FALSE; |
static BOOL silent = FALSE; |
|
static BOOL whole_lines = FALSE; |
|
|
static BOOL word_match = FALSE; |
|
| 137 |
|
|
| 138 |
/* Structure for options and list of them */ |
/* Structure for options and list of them */ |
| 139 |
|
|
| 140 |
enum { OP_NODATA, OP_STRING, OP_NUMBER }; |
enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER, |
| 141 |
|
OP_PATLIST }; |
| 142 |
|
|
| 143 |
typedef struct option_item { |
typedef struct option_item { |
| 144 |
int type; |
int type; |
| 148 |
const char *help_text; |
const char *help_text; |
| 149 |
} option_item; |
} option_item; |
| 150 |
|
|
| 151 |
|
/* Options without a single-letter equivalent get a negative value. This can be |
| 152 |
|
used to identify them. */ |
| 153 |
|
|
| 154 |
|
#define N_COLOUR (-1) |
| 155 |
|
#define N_EXCLUDE (-2) |
| 156 |
|
#define N_HELP (-3) |
| 157 |
|
#define N_INCLUDE (-4) |
| 158 |
|
#define N_LABEL (-5) |
| 159 |
|
#define N_LOCALE (-6) |
| 160 |
|
#define N_NULL (-7) |
| 161 |
|
|
| 162 |
static option_item optionlist[] = { |
static option_item optionlist[] = { |
| 163 |
{ OP_NODATA, -1, NULL, "", " terminate options" }, |
{ OP_NODATA, N_NULL, NULL, "", " terminate options" }, |
| 164 |
{ OP_NODATA, -1, NULL, "help", "display this help and exit" }, |
{ OP_NODATA, N_HELP, NULL, "help", "display this help and exit" }, |
| 165 |
{ OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" }, |
{ OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" }, |
| 166 |
{ OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" }, |
{ OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" }, |
| 167 |
{ OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" }, |
{ OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" }, |
| 168 |
{ OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" }, |
{ OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" }, |
| 169 |
{ OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" }, |
{ OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" }, |
| 170 |
{ OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" }, |
{ OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" }, |
| 171 |
{ OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" }, |
{ OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" }, |
| 172 |
{ OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" }, |
{ OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" }, |
| 173 |
{ OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" }, |
{ OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" }, |
| 174 |
{ OP_STRING, -1, &stdin_name, "label=name", "set name for standard input" }, |
{ OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" }, |
| 175 |
{ OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" }, |
{ OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" }, |
| 176 |
{ OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" }, |
{ OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" }, |
| 177 |
{ OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" }, |
{ OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" }, |
| 178 |
{ OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" }, |
{ OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" }, |
| 179 |
{ OP_STRING, -1, &exclude_pattern, "exclude=pattern","exclude matching files when recursing" }, |
{ OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" }, |
| 180 |
{ OP_STRING, -1, &include_pattern, "include=pattern","include matching files when recursing" }, |
{ OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" }, |
| 181 |
{ OP_NODATA, 's', NULL, "no-messages", "suppress error messages" }, |
{ OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" }, |
| 182 |
{ OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" }, |
{ OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" }, |
| 183 |
{ OP_NODATA, 'V', NULL, "version", "print version information and exit" }, |
{ OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" }, |
| 184 |
{ OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" }, |
{ OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" }, |
| 185 |
{ OP_NODATA, 'w', NULL, "word-regex(p)", "force PATTERN to match only as a word" }, |
{ OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" }, |
| 186 |
{ OP_NODATA, 'x', NULL, "line-regex(p)", "force PATTERN to match only whole lines" }, |
{ OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" }, |
| 187 |
{ OP_NODATA, 0, NULL, NULL, NULL } |
{ OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" }, |
| 188 |
|
{ OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" }, |
| 189 |
|
{ OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" }, |
| 190 |
|
#ifdef JFRIEDL_DEBUG |
| 191 |
|
{ OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" }, |
| 192 |
|
#endif |
| 193 |
|
{ OP_NODATA, 's', NULL, "no-messages", "suppress error messages" }, |
| 194 |
|
{ OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" }, |
| 195 |
|
{ OP_NODATA, 'V', NULL, "version", "print version information and exit" }, |
| 196 |
|
{ OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" }, |
| 197 |
|
{ OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" }, |
| 198 |
|
{ OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" }, |
| 199 |
|
{ OP_NODATA, 0, NULL, NULL, NULL } |
| 200 |
}; |
}; |
| 201 |
|
|
| 202 |
|
/* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F |
| 203 |
|
options. These set the 1, 2, and 4 bits in process_options, respectively. Note |
| 204 |
|
that the combination of -w and -x has the same effect as -x on its own, so we |
| 205 |
|
can treat them as the same. */ |
| 206 |
|
|
| 207 |
|
static const char *prefix[] = { |
| 208 |
|
"", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" }; |
| 209 |
|
|
| 210 |
|
static const char *suffix[] = { |
| 211 |
|
"", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" }; |
| 212 |
|
|
| 213 |
|
|
| 214 |
|
|
| 215 |
/************************************************* |
/************************************************* |
| 216 |
* Functions for directory scanning * |
* OS-specific functions * |
| 217 |
*************************************************/ |
*************************************************/ |
| 218 |
|
|
| 219 |
/* These functions are defined so that they can be made system specific, |
/* These functions are defined so that they can be made system specific, |
| 220 |
although at present the only ones are for Unix, Win32, and for "no directory |
although at present the only ones are for Unix, Win32, and for "no support". */ |
|
recursion support". */ |
|
| 221 |
|
|
| 222 |
|
|
| 223 |
/************* Directory scanning in Unix ***********/ |
/************* Directory scanning in Unix ***********/ |
| 264 |
} |
} |
| 265 |
|
|
| 266 |
|
|
| 267 |
|
/************* Test for regular file in Unix **********/ |
| 268 |
|
|
| 269 |
|
static int |
| 270 |
|
isregfile(char *filename) |
| 271 |
|
{ |
| 272 |
|
struct stat statbuf; |
| 273 |
|
if (stat(filename, &statbuf) < 0) |
| 274 |
|
return 1; /* In the expectation that opening as a file will fail */ |
| 275 |
|
return (statbuf.st_mode & S_IFMT) == S_IFREG; |
| 276 |
|
} |
| 277 |
|
|
| 278 |
|
|
| 279 |
|
/************* Test stdout for being a terminal in Unix **********/ |
| 280 |
|
|
| 281 |
|
static BOOL |
| 282 |
|
is_stdout_tty(void) |
| 283 |
|
{ |
| 284 |
|
return isatty(fileno(stdout)); |
| 285 |
|
} |
| 286 |
|
|
| 287 |
|
|
| 288 |
/************* Directory scanning in Win32 ***********/ |
/************* Directory scanning in Win32 ***********/ |
| 289 |
|
|
| 290 |
/* I (Philip Hazel) have no means of testing this code. It was contributed by |
/* I (Philip Hazel) have no means of testing this code. It was contributed by |
| 383 |
} |
} |
| 384 |
|
|
| 385 |
|
|
| 386 |
|
/************* Test for regular file in Win32 **********/ |
| 387 |
|
|
| 388 |
|
/* I don't know how to do this, or if it can be done; assume all paths are |
| 389 |
|
regular if they are not directories. */ |
| 390 |
|
|
| 391 |
|
int isregfile(char *filename) |
| 392 |
|
{ |
| 393 |
|
return !isdirectory(filename) |
| 394 |
|
} |
| 395 |
|
|
| 396 |
|
|
| 397 |
|
/************* Test stdout for being a terminal in Win32 **********/ |
| 398 |
|
|
| 399 |
|
/* I don't know how to do this; assume never */ |
| 400 |
|
|
| 401 |
|
static BOOL |
| 402 |
|
is_stdout_tty(void) |
| 403 |
|
{ |
| 404 |
|
FALSE; |
| 405 |
|
} |
| 406 |
|
|
| 407 |
|
|
| 408 |
/************* Directory scanning when we can't do it ***********/ |
/************* Directory scanning when we can't do it ***********/ |
| 409 |
|
|
| 410 |
/* The type is void, and apart from isdirectory(), the functions do nothing. */ |
/* The type is void, and apart from isdirectory(), the functions do nothing. */ |
| 413 |
|
|
| 414 |
typedef void directory_type; |
typedef void directory_type; |
| 415 |
|
|
| 416 |
int isdirectory(char *filename) { return FALSE; } |
int isdirectory(char *filename) { return 0; } |
| 417 |
directory_type * opendirectory(char *filename) {} |
directory_type * opendirectory(char *filename) {} |
| 418 |
char *readdirectory(directory_type *dir) {} |
char *readdirectory(directory_type *dir) {} |
| 419 |
void closedirectory(directory_type *dir) {} |
void closedirectory(directory_type *dir) {} |
| 420 |
|
|
| 421 |
|
|
| 422 |
|
/************* Test for regular when we can't do it **********/ |
| 423 |
|
|
| 424 |
|
/* Assume all files are regular. */ |
| 425 |
|
|
| 426 |
|
int isregfile(char *filename) { return 1; } |
| 427 |
|
|
| 428 |
|
|
| 429 |
|
/************* Test stdout for being a terminal when we can't do it **********/ |
| 430 |
|
|
| 431 |
|
static BOOL |
| 432 |
|
is_stdout_tty(void) |
| 433 |
|
{ |
| 434 |
|
return FALSE; |
| 435 |
|
} |
| 436 |
|
|
| 437 |
|
|
| 438 |
#endif |
#endif |
| 439 |
|
|
| 440 |
|
|
| 466 |
*************************************************/ |
*************************************************/ |
| 467 |
|
|
| 468 |
/* This is called if we are about to lose said lines because of buffer filling, |
/* This is called if we are about to lose said lines because of buffer filling, |
| 469 |
and at the end of the file. |
and at the end of the file. The data in the line is written using fwrite() so |
| 470 |
|
that a binary zero does not terminate it. |
| 471 |
|
|
| 472 |
Arguments: |
Arguments: |
| 473 |
lastmatchnumber the number of the last matching line, plus one |
lastmatchnumber the number of the last matching line, plus one |
| 490 |
if (printname != NULL) fprintf(stdout, "%s-", printname); |
if (printname != NULL) fprintf(stdout, "%s-", printname); |
| 491 |
if (number) fprintf(stdout, "%d-", lastmatchnumber++); |
if (number) fprintf(stdout, "%d-", lastmatchnumber++); |
| 492 |
while (*pp != '\n') pp++; |
while (*pp != '\n') pp++; |
| 493 |
fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart); |
fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout); |
| 494 |
lastmatchrestart = pp + 1; |
lastmatchrestart = pp + 1; |
| 495 |
} |
} |
| 496 |
hyphenpending = TRUE; |
hyphenpending = TRUE; |
| 549 |
while (ptr < endptr) |
while (ptr < endptr) |
| 550 |
{ |
{ |
| 551 |
int i; |
int i; |
| 552 |
|
int mrc = 0; |
| 553 |
BOOL match = FALSE; |
BOOL match = FALSE; |
| 554 |
char *t = ptr; |
char *t = ptr; |
| 555 |
size_t length, linelength; |
size_t length, linelength; |
| 568 |
/* Run through all the patterns until one matches. Note that we don't include |
/* Run through all the patterns until one matches. Note that we don't include |
| 569 |
the final newline in the subject string. */ |
the final newline in the subject string. */ |
| 570 |
|
|
| 571 |
for (i = 0; !match && i < pattern_count; i++) |
for (i = 0; i < pattern_count; i++) |
| 572 |
{ |
{ |
| 573 |
match = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0, |
mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0, |
| 574 |
offsets, 99) >= 0; |
offsets, 99); |
| 575 |
|
if (mrc >= 0) { match = TRUE; break; } |
| 576 |
|
if (mrc != PCRE_ERROR_NOMATCH) |
| 577 |
|
{ |
| 578 |
|
fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc); |
| 579 |
|
if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1); |
| 580 |
|
fprintf(stderr, "this line:\n"); |
| 581 |
|
fwrite(ptr, 1, linelength, stderr); /* In case binary zero included */ |
| 582 |
|
fprintf(stderr, "\n"); |
| 583 |
|
if (error_count == 0 && |
| 584 |
|
(mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT)) |
| 585 |
|
{ |
| 586 |
|
fprintf(stderr, "pcregrep: error %d means that a resource limit " |
| 587 |
|
"was exceeded\n", mrc); |
| 588 |
|
fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n"); |
| 589 |
|
} |
| 590 |
|
if (error_count++ > 20) |
| 591 |
|
{ |
| 592 |
|
fprintf(stderr, "pcregrep: too many errors - abandoned\n"); |
| 593 |
|
exit(2); |
| 594 |
|
} |
| 595 |
|
match = invert; /* No more matching; don't show the line again */ |
| 596 |
|
break; |
| 597 |
|
} |
| 598 |
} |
} |
| 599 |
|
|
| 600 |
/* If it's a match or a not-match (as required), print what's wanted. */ |
/* If it's a match or a not-match (as required), do what's wanted. */ |
| 601 |
|
|
| 602 |
if (match != invert) |
if (match != invert) |
| 603 |
{ |
{ |
| 604 |
BOOL hyphenprinted = FALSE; |
BOOL hyphenprinted = FALSE; |
| 605 |
|
|
| 606 |
if (filenames_nomatch_only) return 1; |
/* We've failed if we want a file that doesn't have any matches. */ |
| 607 |
|
|
| 608 |
|
if (filenames == FN_NOMATCH_ONLY) return 1; |
| 609 |
|
|
| 610 |
|
/* Just count if just counting is wanted. */ |
| 611 |
|
|
| 612 |
if (count_only) count++; |
if (count_only) count++; |
| 613 |
|
|
| 614 |
else if (filenames_only) |
/* If all we want is a file name, there is no need to scan any more lines |
| 615 |
|
in the file. */ |
| 616 |
|
|
| 617 |
|
else if (filenames == FN_ONLY) |
| 618 |
{ |
{ |
| 619 |
fprintf(stdout, "%s\n", printname); |
fprintf(stdout, "%s\n", printname); |
| 620 |
return 0; |
return 0; |
| 621 |
} |
} |
| 622 |
|
|
| 623 |
|
/* Likewise, if all we want is a yes/no answer. */ |
| 624 |
|
|
| 625 |
else if (quiet) return 0; |
else if (quiet) return 0; |
| 626 |
|
|
| 627 |
|
/* The --only-matching option prints just the substring that matched, and |
| 628 |
|
does not pring any context. */ |
| 629 |
|
|
| 630 |
|
else if (only_matching) |
| 631 |
|
{ |
| 632 |
|
if (printname != NULL) fprintf(stdout, "%s:", printname); |
| 633 |
|
if (number) fprintf(stdout, "%d:", linenumber); |
| 634 |
|
fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout); |
| 635 |
|
fprintf(stdout, "\n"); |
| 636 |
|
} |
| 637 |
|
|
| 638 |
|
/* This is the default case when none of the above options is set. We print |
| 639 |
|
the matching lines(s), possibly preceded and/or followed by other lines of |
| 640 |
|
context. */ |
| 641 |
|
|
| 642 |
else |
else |
| 643 |
{ |
{ |
| 644 |
/* See if there is a requirement to print some "after" lines from a |
/* See if there is a requirement to print some "after" lines from a |
| 657 |
} |
} |
| 658 |
|
|
| 659 |
/* It is important to advance lastmatchrestart during this printing so |
/* It is important to advance lastmatchrestart during this printing so |
| 660 |
that it interacts correctly with any "before" printing below. */ |
that it interacts correctly with any "before" printing below. Print |
| 661 |
|
each line's data using fwrite() in case there are binary zeroes. */ |
| 662 |
|
|
| 663 |
while (lastmatchrestart < p) |
while (lastmatchrestart < p) |
| 664 |
{ |
{ |
| 666 |
if (printname != NULL) fprintf(stdout, "%s-", printname); |
if (printname != NULL) fprintf(stdout, "%s-", printname); |
| 667 |
if (number) fprintf(stdout, "%d-", lastmatchnumber++); |
if (number) fprintf(stdout, "%d-", lastmatchnumber++); |
| 668 |
while (*pp != '\n') pp++; |
while (*pp != '\n') pp++; |
| 669 |
fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart); |
fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout); |
| 670 |
lastmatchrestart = pp + 1; |
lastmatchrestart = pp + 1; |
| 671 |
} |
} |
| 672 |
if (lastmatchrestart != ptr) hyphenpending = TRUE; |
if (lastmatchrestart != ptr) hyphenpending = TRUE; |
| 690 |
char *p = ptr; |
char *p = ptr; |
| 691 |
|
|
| 692 |
while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) && |
while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) && |
| 693 |
linecount++ < before_context) |
linecount < before_context) |
| 694 |
{ |
{ |
| 695 |
|
linecount++; |
| 696 |
p--; |
p--; |
| 697 |
while (p > buffer && p[-1] != '\n') p--; |
while (p > buffer && p[-1] != '\n') p--; |
| 698 |
} |
} |
| 706 |
if (printname != NULL) fprintf(stdout, "%s-", printname); |
if (printname != NULL) fprintf(stdout, "%s-", printname); |
| 707 |
if (number) fprintf(stdout, "%d-", linenumber - linecount--); |
if (number) fprintf(stdout, "%d-", linenumber - linecount--); |
| 708 |
while (*pp != '\n') pp++; |
while (*pp != '\n') pp++; |
| 709 |
fprintf(stdout, "%.*s", pp - p + 1, p); |
fwrite(p, 1, pp - p + 1, stdout); /* In case binary zero */ |
| 710 |
p = pp + 1; |
p = pp + 1; |
| 711 |
} |
} |
| 712 |
} |
} |
| 734 |
linelength = endmatch - ptr; |
linelength = endmatch - ptr; |
| 735 |
} |
} |
| 736 |
|
|
| 737 |
fprintf(stdout, "%.*s\n", linelength, ptr); |
/*** NOTE: Use only fwrite() to output the data line, so that binary |
| 738 |
|
zeroes are treated as just another data character. */ |
| 739 |
|
|
| 740 |
|
/* This extra option, for Jeffrey Friedl's debugging requirements, |
| 741 |
|
replaces the matched string, or a specific captured string if it exists, |
| 742 |
|
with X. When this happens, colouring is ignored. */ |
| 743 |
|
|
| 744 |
|
#ifdef JFRIEDL_DEBUG |
| 745 |
|
if (S_arg >= 0 && S_arg < mrc) |
| 746 |
|
{ |
| 747 |
|
int first = S_arg * 2; |
| 748 |
|
int last = first + 1; |
| 749 |
|
fwrite(ptr, 1, offsets[first], stdout); |
| 750 |
|
fprintf(stdout, "X"); |
| 751 |
|
fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout); |
| 752 |
|
} |
| 753 |
|
else |
| 754 |
|
#endif |
| 755 |
|
|
| 756 |
|
/* We have to split the line(s) up if colouring. */ |
| 757 |
|
|
| 758 |
|
if (do_colour) |
| 759 |
|
{ |
| 760 |
|
fwrite(ptr, 1, offsets[0], stdout); |
| 761 |
|
fprintf(stdout, "%c[%sm", 0x1b, colour_string); |
| 762 |
|
fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout); |
| 763 |
|
fprintf(stdout, "%c[00m", 0x1b); |
| 764 |
|
fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout); |
| 765 |
|
} |
| 766 |
|
else fwrite(ptr, 1, linelength, stdout); |
| 767 |
|
|
| 768 |
|
fprintf(stdout, "\n"); |
| 769 |
} |
} |
| 770 |
|
|
| 771 |
|
/* End of doing what has to be done for a match */ |
| 772 |
|
|
| 773 |
rc = 0; /* Had some success */ |
rc = 0; /* Had some success */ |
| 774 |
|
|
| 775 |
/* Remember where the last match happened for after_context. We remember |
/* Remember where the last match happened for after_context. We remember |
| 815 |
/* End of file; print final "after" lines if wanted; do_after_lines sets |
/* End of file; print final "after" lines if wanted; do_after_lines sets |
| 816 |
hyphenpending if it prints something. */ |
hyphenpending if it prints something. */ |
| 817 |
|
|
| 818 |
do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname); |
if (!only_matching && !count_only) |
| 819 |
hyphenpending |= endhyphenpending; |
{ |
| 820 |
|
do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname); |
| 821 |
|
hyphenpending |= endhyphenpending; |
| 822 |
|
} |
| 823 |
|
|
| 824 |
/* Print the file name if we are looking for those without matches and there |
/* Print the file name if we are looking for those without matches and there |
| 825 |
were none. If we found a match, we won't have got this far. */ |
were none. If we found a match, we won't have got this far. */ |
| 826 |
|
|
| 827 |
if (filenames_nomatch_only) |
if (filenames == FN_NOMATCH_ONLY) |
| 828 |
{ |
{ |
| 829 |
fprintf(stdout, "%s\n", printname); |
fprintf(stdout, "%s\n", printname); |
| 830 |
return 0; |
return 0; |
| 852 |
|
|
| 853 |
Arguments: |
Arguments: |
| 854 |
pathname the path to investigate |
pathname the path to investigate |
| 855 |
dir_recurse TRUE if recursing is wanted (-r) |
dir_recurse TRUE if recursing is wanted (-r or -drecurse) |
|
show_filenames TRUE if file names are wanted for multiple files, except |
|
|
for the only file at top level when not filenames_only |
|
| 856 |
only_one_at_top TRUE if the path is the only one at toplevel |
only_one_at_top TRUE if the path is the only one at toplevel |
| 857 |
|
|
| 858 |
Returns: 0 if there was at least one match |
Returns: 0 if there was at least one match |
| 863 |
*/ |
*/ |
| 864 |
|
|
| 865 |
static int |
static int |
| 866 |
grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL show_filenames, |
grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top) |
|
BOOL only_one_at_top) |
|
| 867 |
{ |
{ |
| 868 |
int rc = 1; |
int rc = 1; |
| 869 |
int sep; |
int sep; |
| 870 |
FILE *in; |
FILE *in; |
|
char *printname; |
|
| 871 |
|
|
| 872 |
/* If the file name is "-" we scan stdin */ |
/* If the file name is "-" we scan stdin */ |
| 873 |
|
|
| 874 |
if (strcmp(pathname, "-") == 0) |
if (strcmp(pathname, "-") == 0) |
| 875 |
{ |
{ |
| 876 |
return pcregrep(stdin, |
return pcregrep(stdin, |
| 877 |
(filenames_only || filenames_nomatch_only || |
(filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))? |
|
(show_filenames && !only_one_at_top))? |
|
| 878 |
stdin_name : NULL); |
stdin_name : NULL); |
| 879 |
} |
} |
| 880 |
|
|
|
/* If the file is a directory and we are recursing, scan each file within it, |
|
|
subject to any include or exclude patterns that were set. The scanning code is |
|
|
localized so it can be made system-specific. */ |
|
| 881 |
|
|
| 882 |
if ((sep = isdirectory(pathname)) != 0 && dir_recurse) |
/* If the file is a directory, skip if skipping or if we are recursing, scan |
| 883 |
{ |
each file within it, subject to any include or exclude patterns that were set. |
| 884 |
char buffer[1024]; |
The scanning code is localized so it can be made system-specific. */ |
|
char *nextfile; |
|
|
directory_type *dir = opendirectory(pathname); |
|
| 885 |
|
|
| 886 |
if (dir == NULL) |
if ((sep = isdirectory(pathname)) != 0) |
| 887 |
{ |
{ |
| 888 |
if (!silent) |
if (dee_action == dee_SKIP) return 1; |
| 889 |
fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname, |
if (dee_action == dee_RECURSE) |
|
strerror(errno)); |
|
|
return 2; |
|
|
} |
|
|
|
|
|
while ((nextfile = readdirectory(dir)) != NULL) |
|
| 890 |
{ |
{ |
| 891 |
int frc, blen; |
char buffer[1024]; |
| 892 |
sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile); |
char *nextfile; |
| 893 |
blen = strlen(buffer); |
directory_type *dir = opendirectory(pathname); |
| 894 |
|
|
| 895 |
if (exclude_compiled != NULL && |
if (dir == NULL) |
| 896 |
pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0) |
{ |
| 897 |
continue; |
if (!silent) |
| 898 |
|
fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname, |
| 899 |
|
strerror(errno)); |
| 900 |
|
return 2; |
| 901 |
|
} |
| 902 |
|
|
| 903 |
if (include_compiled != NULL && |
while ((nextfile = readdirectory(dir)) != NULL) |
| 904 |
pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0) |
{ |
| 905 |
continue; |
int frc, blen; |
| 906 |
|
sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile); |
| 907 |
|
blen = strlen(buffer); |
| 908 |
|
|
| 909 |
|
if (exclude_compiled != NULL && |
| 910 |
|
pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0) |
| 911 |
|
continue; |
| 912 |
|
|
| 913 |
|
if (include_compiled != NULL && |
| 914 |
|
pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0) |
| 915 |
|
continue; |
| 916 |
|
|
| 917 |
|
frc = grep_or_recurse(buffer, dir_recurse, FALSE); |
| 918 |
|
if (frc > 1) rc = frc; |
| 919 |
|
else if (frc == 0 && rc == 1) rc = 0; |
| 920 |
|
} |
| 921 |
|
|
| 922 |
frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE); |
closedirectory(dir); |
| 923 |
if (frc > 1) rc = frc; |
return rc; |
|
else if (frc == 0 && rc == 1) rc = 0; |
|
| 924 |
} |
} |
|
|
|
|
closedirectory(dir); |
|
|
return rc; |
|
| 925 |
} |
} |
| 926 |
|
|
| 927 |
/* If the file is not a directory, or we are not recursing, scan it. If this is |
/* If the file is not a directory and not a regular file, skip it if that's |
| 928 |
the first and only argument at top level, we don't show the file name (unless |
been requested. */ |
| 929 |
we are only showing the file name). Otherwise, control is via the |
|
| 930 |
show_filenames variable. */ |
else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1; |
| 931 |
|
|
| 932 |
|
/* Control reaches here if we have a regular file, or if we have a directory |
| 933 |
|
and recursion or skipping was not requested, or if we have anything else and |
| 934 |
|
skipping was not requested. The scan proceeds. If this is the first and only |
| 935 |
|
argument at top level, we don't show the file name, unless we are only showing |
| 936 |
|
the file name, or the filename was forced (-H). */ |
| 937 |
|
|
| 938 |
in = fopen(pathname, "r"); |
in = fopen(pathname, "r"); |
| 939 |
if (in == NULL) |
if (in == NULL) |
| 944 |
return 2; |
return 2; |
| 945 |
} |
} |
| 946 |
|
|
| 947 |
printname = (filenames_only || filenames_nomatch_only || |
rc = pcregrep(in, (filenames > FN_DEFAULT || |
| 948 |
(show_filenames && !only_one_at_top))? pathname : NULL; |
(filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL); |
|
|
|
|
rc = pcregrep(in, printname); |
|
| 949 |
|
|
| 950 |
fclose(in); |
fclose(in); |
| 951 |
return rc; |
return rc; |
| 961 |
static int |
static int |
| 962 |
usage(int rc) |
usage(int rc) |
| 963 |
{ |
{ |
| 964 |
fprintf(stderr, "Usage: pcregrep [-LMVcfhilnqrsvwx] [long-options] [pattern] [file1 file2 ...]\n"); |
option_item *op; |
| 965 |
|
fprintf(stderr, "Usage: pcregrep [-"); |
| 966 |
|
for (op = optionlist; op->one_char != 0; op++) |
| 967 |
|
{ |
| 968 |
|
if (op->one_char > 0) fprintf(stderr, "%c", op->one_char); |
| 969 |
|
} |
| 970 |
|
fprintf(stderr, "] [long options] [pattern] [files]\n"); |
| 971 |
fprintf(stderr, "Type `pcregrep --help' for more information.\n"); |
fprintf(stderr, "Type `pcregrep --help' for more information.\n"); |
| 972 |
return rc; |
return rc; |
| 973 |
} |
} |
| 986 |
|
|
| 987 |
printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n"); |
printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n"); |
| 988 |
printf("Search for PATTERN in each FILE or standard input.\n"); |
printf("Search for PATTERN in each FILE or standard input.\n"); |
| 989 |
printf("PATTERN must be present if -f is not used.\n"); |
printf("PATTERN must be present if neither -e nor -f is used.\n"); |
| 990 |
printf("\"-\" can be used as a file name to mean STDIN.\n"); |
printf("\"-\" can be used as a file name to mean STDIN.\n\n"); |
| 991 |
printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n"); |
printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n"); |
| 992 |
|
|
| 993 |
printf("Options:\n"); |
printf("Options:\n"); |
| 1023 |
{ |
{ |
| 1024 |
switch(letter) |
switch(letter) |
| 1025 |
{ |
{ |
| 1026 |
case -1: help(); exit(0); |
case N_HELP: help(); exit(0); |
| 1027 |
case 'c': count_only = TRUE; break; |
case 'c': count_only = TRUE; break; |
| 1028 |
case 'h': filenames = FALSE; break; |
case 'F': process_options |= PO_FIXED_STRINGS; break; |
| 1029 |
|
case 'H': filenames = FN_FORCE; break; |
| 1030 |
|
case 'h': filenames = FN_NONE; break; |
| 1031 |
case 'i': options |= PCRE_CASELESS; break; |
case 'i': options |= PCRE_CASELESS; break; |
| 1032 |
case 'l': filenames_only = TRUE; break; |
case 'l': filenames = FN_ONLY; break; |
| 1033 |
case 'L': filenames_nomatch_only = TRUE; break; |
case 'L': filenames = FN_NOMATCH_ONLY; break; |
| 1034 |
case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break; |
case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break; |
| 1035 |
case 'n': number = TRUE; break; |
case 'n': number = TRUE; break; |
| 1036 |
|
case 'o': only_matching = TRUE; break; |
| 1037 |
case 'q': quiet = TRUE; break; |
case 'q': quiet = TRUE; break; |
| 1038 |
case 'r': recurse = TRUE; break; |
case 'r': dee_action = dee_RECURSE; break; |
| 1039 |
case 's': silent = TRUE; break; |
case 's': silent = TRUE; break; |
| 1040 |
case 'u': options |= PCRE_UTF8; break; |
case 'u': options |= PCRE_UTF8; break; |
| 1041 |
case 'v': invert = TRUE; break; |
case 'v': invert = TRUE; break; |
| 1042 |
case 'w': word_match = TRUE; break; |
case 'w': process_options |= PO_WORD_MATCH; break; |
| 1043 |
case 'x': whole_lines = TRUE; break; |
case 'x': process_options |= PO_LINE_MATCH; break; |
| 1044 |
|
|
| 1045 |
case 'V': |
case 'V': |
| 1046 |
fprintf(stderr, "pcregrep version %s using ", VERSION); |
fprintf(stderr, "pcregrep version %s using ", VERSION); |
| 1060 |
|
|
| 1061 |
|
|
| 1062 |
/************************************************* |
/************************************************* |
| 1063 |
|
* Construct printed ordinal * |
| 1064 |
|
*************************************************/ |
| 1065 |
|
|
| 1066 |
|
/* This turns a number into "1st", "3rd", etc. */ |
| 1067 |
|
|
| 1068 |
|
static char * |
| 1069 |
|
ordin(int n) |
| 1070 |
|
{ |
| 1071 |
|
static char buffer[8]; |
| 1072 |
|
char *p = buffer; |
| 1073 |
|
sprintf(p, "%d", n); |
| 1074 |
|
while (*p != 0) p++; |
| 1075 |
|
switch (n%10) |
| 1076 |
|
{ |
| 1077 |
|
case 1: strcpy(p, "st"); break; |
| 1078 |
|
case 2: strcpy(p, "nd"); break; |
| 1079 |
|
case 3: strcpy(p, "rd"); break; |
| 1080 |
|
default: strcpy(p, "th"); break; |
| 1081 |
|
} |
| 1082 |
|
return buffer; |
| 1083 |
|
} |
| 1084 |
|
|
| 1085 |
|
|
| 1086 |
|
|
| 1087 |
|
/************************************************* |
| 1088 |
|
* Compile a single pattern * |
| 1089 |
|
*************************************************/ |
| 1090 |
|
|
| 1091 |
|
/* When the -F option has been used, this is called for each substring. |
| 1092 |
|
Otherwise it's called for each supplied pattern. |
| 1093 |
|
|
| 1094 |
|
Arguments: |
| 1095 |
|
pattern the pattern string |
| 1096 |
|
options the PCRE options |
| 1097 |
|
filename the file name, or NULL for a command-line pattern |
| 1098 |
|
count 0 if this is the only command line pattern, or |
| 1099 |
|
number of the command line pattern, or |
| 1100 |
|
linenumber for a pattern from a file |
| 1101 |
|
|
| 1102 |
|
Returns: TRUE on success, FALSE after an error |
| 1103 |
|
*/ |
| 1104 |
|
|
| 1105 |
|
static BOOL |
| 1106 |
|
compile_single_pattern(char *pattern, int options, char *filename, int count) |
| 1107 |
|
{ |
| 1108 |
|
char buffer[MBUFTHIRD + 16]; |
| 1109 |
|
const char *error; |
| 1110 |
|
int errptr; |
| 1111 |
|
|
| 1112 |
|
if (pattern_count >= MAX_PATTERN_COUNT) |
| 1113 |
|
{ |
| 1114 |
|
fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n", |
| 1115 |
|
(filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT); |
| 1116 |
|
return FALSE; |
| 1117 |
|
} |
| 1118 |
|
|
| 1119 |
|
sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern, |
| 1120 |
|
suffix[process_options]); |
| 1121 |
|
pattern_list[pattern_count] = |
| 1122 |
|
pcre_compile(buffer, options, &error, &errptr, pcretables); |
| 1123 |
|
if (pattern_list[pattern_count++] != NULL) return TRUE; |
| 1124 |
|
|
| 1125 |
|
/* Handle compile errors */ |
| 1126 |
|
|
| 1127 |
|
errptr -= (int)strlen(prefix[process_options]); |
| 1128 |
|
if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern); |
| 1129 |
|
|
| 1130 |
|
if (filename == NULL) |
| 1131 |
|
{ |
| 1132 |
|
if (count == 0) |
| 1133 |
|
fprintf(stderr, "pcregrep: Error in command-line regex " |
| 1134 |
|
"at offset %d: %s\n", errptr, error); |
| 1135 |
|
else |
| 1136 |
|
fprintf(stderr, "pcregrep: Error in %s command-line regex " |
| 1137 |
|
"at offset %d: %s\n", ordin(count), errptr, error); |
| 1138 |
|
} |
| 1139 |
|
else |
| 1140 |
|
{ |
| 1141 |
|
fprintf(stderr, "pcregrep: Error in regex in line %d of %s " |
| 1142 |
|
"at offset %d: %s\n", count, filename, errptr, error); |
| 1143 |
|
} |
| 1144 |
|
|
| 1145 |
|
return FALSE; |
| 1146 |
|
} |
| 1147 |
|
|
| 1148 |
|
|
| 1149 |
|
|
| 1150 |
|
/************************************************* |
| 1151 |
|
* Compile one supplied pattern * |
| 1152 |
|
*************************************************/ |
| 1153 |
|
|
| 1154 |
|
/* When the -F option has been used, each string may be a list of strings, |
| 1155 |
|
separated by newlines. They will be matched literally. |
| 1156 |
|
|
| 1157 |
|
Arguments: |
| 1158 |
|
pattern the pattern string |
| 1159 |
|
options the PCRE options |
| 1160 |
|
filename the file name, or NULL for a command-line pattern |
| 1161 |
|
count 0 if this is the only command line pattern, or |
| 1162 |
|
number of the command line pattern, or |
| 1163 |
|
linenumber for a pattern from a file |
| 1164 |
|
|
| 1165 |
|
Returns: TRUE on success, FALSE after an error |
| 1166 |
|
*/ |
| 1167 |
|
|
| 1168 |
|
static BOOL |
| 1169 |
|
compile_pattern(char *pattern, int options, char *filename, int count) |
| 1170 |
|
{ |
| 1171 |
|
if ((process_options & PO_FIXED_STRINGS) != 0) |
| 1172 |
|
{ |
| 1173 |
|
char buffer[MBUFTHIRD]; |
| 1174 |
|
for(;;) |
| 1175 |
|
{ |
| 1176 |
|
char *p = strchr(pattern, '\n'); |
| 1177 |
|
if (p == NULL) |
| 1178 |
|
return compile_single_pattern(pattern, options, filename, count); |
| 1179 |
|
sprintf(buffer, "%.*s", p - pattern, pattern); |
| 1180 |
|
pattern = p + 1; |
| 1181 |
|
if (!compile_single_pattern(buffer, options, filename, count)) |
| 1182 |
|
return FALSE; |
| 1183 |
|
} |
| 1184 |
|
} |
| 1185 |
|
else return compile_single_pattern(pattern, options, filename, count); |
| 1186 |
|
} |
| 1187 |
|
|
| 1188 |
|
|
| 1189 |
|
|
| 1190 |
|
/************************************************* |
| 1191 |
* Main program * |
* Main program * |
| 1192 |
*************************************************/ |
*************************************************/ |
| 1193 |
|
|
| 1198 |
{ |
{ |
| 1199 |
int i, j; |
int i, j; |
| 1200 |
int rc = 1; |
int rc = 1; |
| 1201 |
int options = 0; |
int pcre_options = 0; |
| 1202 |
|
int cmd_pattern_count = 0; |
| 1203 |
int errptr; |
int errptr; |
|
const char *error; |
|
| 1204 |
BOOL only_one_at_top; |
BOOL only_one_at_top; |
| 1205 |
|
char *patterns[MAX_PATTERN_COUNT]; |
| 1206 |
|
const char *locale_from = "--locale"; |
| 1207 |
|
const char *error; |
| 1208 |
|
|
| 1209 |
/* Process the options */ |
/* Process the options */ |
| 1210 |
|
|
| 1218 |
if (argv[i][0] != '-') break; |
if (argv[i][0] != '-') break; |
| 1219 |
|
|
| 1220 |
/* If we hit an argument that is just "-", it may be a reference to STDIN, |
/* If we hit an argument that is just "-", it may be a reference to STDIN, |
| 1221 |
but only if we have previously had -f to define the patterns. */ |
but only if we have previously had -e or -f to define the patterns. */ |
| 1222 |
|
|
| 1223 |
if (argv[i][1] == 0) |
if (argv[i][1] == 0) |
| 1224 |
{ |
{ |
| 1225 |
if (pattern_filename != NULL) break; |
if (pattern_filename != NULL || pattern_count > 0) break; |
| 1226 |
else exit(usage(2)); |
else exit(usage(2)); |
| 1227 |
} |
} |
| 1228 |
|
|
| 1244 |
/* Some long options have data that follows after =, for example file=name. |
/* Some long options have data that follows after =, for example file=name. |
| 1245 |
Some options have variations in the long name spelling: specifically, we |
Some options have variations in the long name spelling: specifically, we |
| 1246 |
allow "regexp" because GNU grep allows it, though I personally go along |
allow "regexp" because GNU grep allows it, though I personally go along |
| 1247 |
with Jeff Friedl in preferring "regex" without the "p". These options are |
with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p". |
| 1248 |
entered in the table as "regex(p)". No option is in both these categories, |
These options are entered in the table as "regex(p)". No option is in both |
| 1249 |
fortunately. */ |
these categories, fortunately. */ |
| 1250 |
|
|
| 1251 |
for (op = optionlist; op->one_char != 0; op++) |
for (op = optionlist; op->one_char != 0; op++) |
| 1252 |
{ |
{ |
| 1316 |
option_data = s+1; |
option_data = s+1; |
| 1317 |
break; |
break; |
| 1318 |
} |
} |
| 1319 |
options = handle_option(*s++, options); |
pcre_options = handle_option(*s++, pcre_options); |
| 1320 |
} |
} |
| 1321 |
} |
} |
| 1322 |
|
|
| 1323 |
/* At this point we should have op pointing to a matched option */ |
/* At this point we should have op pointing to a matched option. If the type |
| 1324 |
|
is NO_DATA, it means that there is no data, and the option might set |
| 1325 |
|
something in the PCRE options. */ |
| 1326 |
|
|
| 1327 |
if (op->type == OP_NODATA) |
if (op->type == OP_NODATA) |
|
options = handle_option(op->one_char, options); |
|
|
else |
|
| 1328 |
{ |
{ |
| 1329 |
if (*option_data == 0) |
pcre_options = handle_option(op->one_char, pcre_options); |
| 1330 |
|
continue; |
| 1331 |
|
} |
| 1332 |
|
|
| 1333 |
|
/* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that |
| 1334 |
|
either has a value or defaults to something. It cannot have data in a |
| 1335 |
|
separate item. At the moment, the only such options are "colo(u)r" and |
| 1336 |
|
Jeffrey Friedl's special debugging option. */ |
| 1337 |
|
|
| 1338 |
|
if (*option_data == 0 && |
| 1339 |
|
(op->type == OP_OP_STRING || op->type == OP_OP_NUMBER)) |
| 1340 |
|
{ |
| 1341 |
|
switch (op->one_char) |
| 1342 |
{ |
{ |
| 1343 |
if (i >= argc - 1 || longopwasequals) |
case N_COLOUR: |
| 1344 |
{ |
colour_option = (char *)"auto"; |
| 1345 |
fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]); |
break; |
| 1346 |
exit(usage(2)); |
#ifdef JFRIEDL_DEBUG |
| 1347 |
} |
case 'S': |
| 1348 |
option_data = argv[++i]; |
S_arg = 0; |
| 1349 |
|
break; |
| 1350 |
|
#endif |
| 1351 |
} |
} |
| 1352 |
|
continue; |
| 1353 |
|
} |
| 1354 |
|
|
| 1355 |
|
/* Otherwise, find the data string for the option. */ |
| 1356 |
|
|
| 1357 |
if (op->type == OP_STRING) *((char **)op->dataptr) = option_data; else |
if (*option_data == 0) |
| 1358 |
|
{ |
| 1359 |
|
if (i >= argc - 1 || longopwasequals) |
| 1360 |
{ |
{ |
| 1361 |
char *endptr; |
fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]); |
| 1362 |
int n = strtoul(option_data, &endptr, 10); |
exit(usage(2)); |
| 1363 |
if (*endptr != 0) |
} |
| 1364 |
|
option_data = argv[++i]; |
| 1365 |
|
} |
| 1366 |
|
|
| 1367 |
|
/* If the option type is OP_PATLIST, it's the -e option, which can be called |
| 1368 |
|
multiple times to create a list of patterns. */ |
| 1369 |
|
|
| 1370 |
|
if (op->type == OP_PATLIST) |
| 1371 |
|
{ |
| 1372 |
|
if (cmd_pattern_count >= MAX_PATTERN_COUNT) |
| 1373 |
|
{ |
| 1374 |
|
fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n", |
| 1375 |
|
MAX_PATTERN_COUNT); |
| 1376 |
|
return 2; |
| 1377 |
|
} |
| 1378 |
|
patterns[cmd_pattern_count++] = option_data; |
| 1379 |
|
} |
| 1380 |
|
|
| 1381 |
|
/* Otherwise, deal with single string or numeric data values. */ |
| 1382 |
|
|
| 1383 |
|
else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER) |
| 1384 |
|
{ |
| 1385 |
|
*((char **)op->dataptr) = option_data; |
| 1386 |
|
} |
| 1387 |
|
else |
| 1388 |
|
{ |
| 1389 |
|
char *endptr; |
| 1390 |
|
int n = strtoul(option_data, &endptr, 10); |
| 1391 |
|
if (*endptr != 0) |
| 1392 |
|
{ |
| 1393 |
|
if (longop) |
| 1394 |
{ |
{ |
| 1395 |
if (longop) |
char *equals = strchr(op->long_name, '='); |
| 1396 |
fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%s\n", |
int nlen = (equals == NULL)? (int)strlen(op->long_name) : |
| 1397 |
option_data, op->long_name); |
equals - op->long_name; |
| 1398 |
else |
fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n", |
| 1399 |
fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n", |
option_data, nlen, op->long_name); |
|
option_data, op->one_char); |
|
|
exit(usage(2)); |
|
| 1400 |
} |
} |
| 1401 |
*((int *)op->dataptr) = n; |
else |
| 1402 |
|
fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n", |
| 1403 |
|
option_data, op->one_char); |
| 1404 |
|
exit(usage(2)); |
| 1405 |
} |
} |
| 1406 |
|
*((int *)op->dataptr) = n; |
| 1407 |
} |
} |
| 1408 |
} |
} |
| 1409 |
|
|
| 1416 |
if (before_context == 0) before_context = both_context; |
if (before_context == 0) before_context = both_context; |
| 1417 |
} |
} |
| 1418 |
|
|
| 1419 |
pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *)); |
/* If a locale has not been provided as an option, see if the LC_CTYPE or |
| 1420 |
hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *)); |
LC_ALL environment variable is set, and if so, use it. */ |
| 1421 |
|
|
| 1422 |
if (pattern_list == NULL || hints_list == NULL) |
if (locale == NULL) |
| 1423 |
{ |
{ |
| 1424 |
fprintf(stderr, "pcregrep: malloc failed\n"); |
locale = getenv("LC_ALL"); |
| 1425 |
return 2; |
locale_from = "LCC_ALL"; |
| 1426 |
} |
} |
| 1427 |
|
|
| 1428 |
/* Compile the regular expression(s). */ |
if (locale == NULL) |
|
|
|
|
if (pattern_filename != NULL) |
|
| 1429 |
{ |
{ |
| 1430 |
FILE *f = fopen(pattern_filename, "r"); |
locale = getenv("LC_CTYPE"); |
| 1431 |
char buffer[MBUFTHIRD + 16]; |
locale_from = "LC_CTYPE"; |
| 1432 |
char *rdstart; |
} |
|
int adjust = 0; |
|
| 1433 |
|
|
| 1434 |
if (f == NULL) |
/* If a locale has been provided, set it, and generate the tables the PCRE |
| 1435 |
|
needs. Otherwise, pcretables==NULL, which causes the use of default tables. */ |
| 1436 |
|
|
| 1437 |
|
if (locale != NULL) |
| 1438 |
|
{ |
| 1439 |
|
if (setlocale(LC_CTYPE, locale) == NULL) |
| 1440 |
{ |
{ |
| 1441 |
fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename, |
fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n", |
| 1442 |
strerror(errno)); |
locale, locale_from); |
| 1443 |
return 2; |
return 2; |
| 1444 |
} |
} |
| 1445 |
|
pcretables = pcre_maketables(); |
| 1446 |
|
} |
| 1447 |
|
|
| 1448 |
if (whole_lines) |
/* Sort out colouring */ |
| 1449 |
|
|
| 1450 |
|
if (colour_option != NULL && strcmp(colour_option, "never") != 0) |
| 1451 |
|
{ |
| 1452 |
|
if (strcmp(colour_option, "always") == 0) do_colour = TRUE; |
| 1453 |
|
else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty(); |
| 1454 |
|
else |
| 1455 |
{ |
{ |
| 1456 |
strcpy(buffer, "^(?:"); |
fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n", |
| 1457 |
adjust = 4; |
colour_option); |
| 1458 |
|
return 2; |
| 1459 |
} |
} |
| 1460 |
else if (word_match) |
if (do_colour) |
| 1461 |
{ |
{ |
| 1462 |
strcpy(buffer, "\\b"); |
char *cs = getenv("PCREGREP_COLOUR"); |
| 1463 |
adjust = 2; |
if (cs == NULL) cs = getenv("PCREGREP_COLOR"); |
| 1464 |
|
if (cs != NULL) colour_string = cs; |
| 1465 |
} |
} |
| 1466 |
|
} |
| 1467 |
|
|
| 1468 |
|
/* Interpret the text values for -d and -D */ |
| 1469 |
|
|
| 1470 |
rdstart = buffer + adjust; |
if (dee_option != NULL) |
| 1471 |
while (fgets(rdstart, MBUFTHIRD, f) != NULL) |
{ |
| 1472 |
|
if (strcmp(dee_option, "read") == 0) dee_action = dee_READ; |
| 1473 |
|
else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE; |
| 1474 |
|
else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP; |
| 1475 |
|
else |
| 1476 |
{ |
{ |
| 1477 |
char *s = rdstart + (int)strlen(rdstart); |
fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option); |
| 1478 |
if (pattern_count >= MAX_PATTERN_COUNT) |
return 2; |
| 1479 |
{ |
} |
| 1480 |
fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n", |
} |
| 1481 |
MAX_PATTERN_COUNT); |
|
| 1482 |
return 2; |
if (DEE_option != NULL) |
| 1483 |
} |
{ |
| 1484 |
while (s > rdstart && isspace((unsigned char)(s[-1]))) s--; |
if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ; |
| 1485 |
if (s == rdstart) continue; |
else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP; |
| 1486 |
if (whole_lines) strcpy(s, ")$"); |
else |
| 1487 |
else if (word_match)strcpy(s, "\\b"); |
{ |
| 1488 |
else *s = 0; |
fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option); |
| 1489 |
pattern_list[pattern_count] = pcre_compile(buffer, options, &error, |
return 2; |
|
&errptr, NULL); |
|
|
if (pattern_list[pattern_count++] == NULL) |
|
|
{ |
|
|
fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n", |
|
|
pattern_count, errptr - adjust, error); |
|
|
return 2; |
|
|
} |
|
| 1490 |
} |
} |
|
fclose(f); |
|
| 1491 |
} |
} |
| 1492 |
|
|
| 1493 |
/* If no file name, a single regex must be given inline. */ |
/* Check the value for Jeff Friedl's debugging option. */ |
| 1494 |
|
|
| 1495 |
else |
#ifdef JFRIEDL_DEBUG |
| 1496 |
|
if (S_arg > 9) |
| 1497 |
|
{ |
| 1498 |
|
fprintf(stderr, "pcregrep: bad value for -S option\n"); |
| 1499 |
|
return 2; |
| 1500 |
|
} |
| 1501 |
|
#endif |
| 1502 |
|
|
| 1503 |
|
/* Get memory to store the pattern and hints lists. */ |
| 1504 |
|
|
| 1505 |
|
pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *)); |
| 1506 |
|
hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *)); |
| 1507 |
|
|
| 1508 |
|
if (pattern_list == NULL || hints_list == NULL) |
| 1509 |
{ |
{ |
| 1510 |
char buffer[MBUFTHIRD + 16]; |
fprintf(stderr, "pcregrep: malloc failed\n"); |
| 1511 |
char *pat; |
return 2; |
| 1512 |
int adjust = 0; |
} |
| 1513 |
|
|
| 1514 |
|
/* If no patterns were provided by -e, and there is no file provided by -f, |
| 1515 |
|
the first argument is the one and only pattern, and it must exist. */ |
| 1516 |
|
|
| 1517 |
|
if (cmd_pattern_count == 0 && pattern_filename == NULL) |
| 1518 |
|
{ |
| 1519 |
if (i >= argc) return usage(2); |
if (i >= argc) return usage(2); |
| 1520 |
|
patterns[cmd_pattern_count++] = argv[i++]; |
| 1521 |
|
} |
| 1522 |
|
|
| 1523 |
|
/* Compile the patterns that were provided on the command line, either by |
| 1524 |
|
multiple uses of -e or as a single unkeyed pattern. */ |
| 1525 |
|
|
| 1526 |
if (whole_lines) |
for (j = 0; j < cmd_pattern_count; j++) |
| 1527 |
|
{ |
| 1528 |
|
if (!compile_pattern(patterns[j], pcre_options, NULL, |
| 1529 |
|
(j == 0 && cmd_pattern_count == 1)? 0 : j + 1)) |
| 1530 |
|
return 2; |
| 1531 |
|
} |
| 1532 |
|
|
| 1533 |
|
/* Compile the regular expressions that are provided in a file. */ |
| 1534 |
|
|
| 1535 |
|
if (pattern_filename != NULL) |
| 1536 |
|
{ |
| 1537 |
|
int linenumber = 0; |
| 1538 |
|
FILE *f; |
| 1539 |
|
char *filename; |
| 1540 |
|
char buffer[MBUFTHIRD]; |
| 1541 |
|
|
| 1542 |
|
if (strcmp(pattern_filename, "-") == 0) |
| 1543 |
{ |
{ |
| 1544 |
sprintf(buffer, "^(?:%.*s)$", MBUFTHIRD, argv[i++]); |
f = stdin; |
| 1545 |
pat = buffer; |
filename = stdin_name; |
|
adjust = 4; |
|
| 1546 |
} |
} |
| 1547 |
else if (word_match) |
else |
| 1548 |
{ |
{ |
| 1549 |
sprintf(buffer, "\\b%.*s\\b", MBUFTHIRD, argv[i++]); |
f = fopen(pattern_filename, "r"); |
| 1550 |
pat = buffer; |
if (f == NULL) |
| 1551 |
adjust = 2; |
{ |
| 1552 |
|
fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename, |
| 1553 |
|
strerror(errno)); |
| 1554 |
|
return 2; |
| 1555 |
|
} |
| 1556 |
|
filename = pattern_filename; |
| 1557 |
} |
} |
|
else pat = argv[i++]; |
|
|
|
|
|
pattern_list[0] = pcre_compile(pat, options, &error, &errptr, NULL); |
|
| 1558 |
|
|
| 1559 |
if (pattern_list[0] == NULL) |
while (fgets(buffer, MBUFTHIRD, f) != NULL) |
| 1560 |
{ |
{ |
| 1561 |
fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", |
char *s = buffer + (int)strlen(buffer); |
| 1562 |
errptr - adjust, error); |
while (s > buffer && isspace((unsigned char)(s[-1]))) s--; |
| 1563 |
return 2; |
*s = 0; |
| 1564 |
|
linenumber++; |
| 1565 |
|
if (buffer[0] == 0) continue; /* Skip blank lines */ |
| 1566 |
|
if (!compile_pattern(buffer, pcre_options, filename, linenumber)) |
| 1567 |
|
return 2; |
| 1568 |
} |
} |
| 1569 |
pattern_count++; |
|
| 1570 |
|
if (f != stdin) fclose(f); |
| 1571 |
} |
} |
| 1572 |
|
|
| 1573 |
/* Study the regular expressions, as we will be running them many times */ |
/* Study the regular expressions, as we will be running them many times */ |
| 1588 |
|
|
| 1589 |
if (exclude_pattern != NULL) |
if (exclude_pattern != NULL) |
| 1590 |
{ |
{ |
| 1591 |
exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, NULL); |
exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, |
| 1592 |
|
pcretables); |
| 1593 |
if (exclude_compiled == NULL) |
if (exclude_compiled == NULL) |
| 1594 |
{ |
{ |
| 1595 |
fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n", |
fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n", |
| 1600 |
|
|
| 1601 |
if (include_pattern != NULL) |
if (include_pattern != NULL) |
| 1602 |
{ |
{ |
| 1603 |
include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, NULL); |
include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, |
| 1604 |
|
pcretables); |
| 1605 |
if (include_compiled == NULL) |
if (include_compiled == NULL) |
| 1606 |
{ |
{ |
| 1607 |
fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n", |
fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n", |
| 1610 |
} |
} |
| 1611 |
} |
} |
| 1612 |
|
|
| 1613 |
/* If there are no further arguments, do the business on stdin and exit */ |
/* If there are no further arguments, do the business on stdin and exit. */ |
| 1614 |
|
|
| 1615 |
if (i >= argc) return pcregrep(stdin, |
if (i >= argc) |
| 1616 |
(filenames_only || filenames_nomatch_only)? stdin_name : NULL); |
return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL); |
| 1617 |
|
|
| 1618 |
/* Otherwise, work through the remaining arguments as files or directories. |
/* Otherwise, work through the remaining arguments as files or directories. |
| 1619 |
Pass in the fact that there is only one argument at top level - this suppresses |
Pass in the fact that there is only one argument at top level - this suppresses |
| 1620 |
the file name if the argument is not a directory and filenames_only is not set. |
the file name if the argument is not a directory and filenames are not |
| 1621 |
*/ |
otherwise forced. */ |
| 1622 |
|
|
| 1623 |
only_one_at_top = (i == argc - 1); |
only_one_at_top = i == argc - 1; /* Catch initial value of i */ |
| 1624 |
|
|
| 1625 |
for (; i < argc; i++) |
for (; i < argc; i++) |
| 1626 |
{ |
{ |
| 1627 |
int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top); |
int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE, |
| 1628 |
|
only_one_at_top); |
| 1629 |
if (frc > 1) rc = frc; |
if (frc > 1) rc = frc; |
| 1630 |
else if (frc == 0 && rc == 1) rc = 0; |
else if (frc == 0 && rc == 1) rc = 0; |
| 1631 |
} |
} |