| 1 |
/*************************************************
|
| 2 |
* pcregrep program *
|
| 3 |
*************************************************/
|
| 4 |
|
| 5 |
/* This is a grep program that uses the PCRE regular expression library to do
|
| 6 |
its pattern matching. On a Unix or Win32 system it can recurse into
|
| 7 |
directories.
|
| 8 |
|
| 9 |
Copyright (c) 1997-2005 University of Cambridge
|
| 10 |
|
| 11 |
-----------------------------------------------------------------------------
|
| 12 |
Redistribution and use in source and binary forms, with or without
|
| 13 |
modification, are permitted provided that the following conditions are met:
|
| 14 |
|
| 15 |
* Redistributions of source code must retain the above copyright notice,
|
| 16 |
this list of conditions and the following disclaimer.
|
| 17 |
|
| 18 |
* Redistributions in binary form must reproduce the above copyright
|
| 19 |
notice, this list of conditions and the following disclaimer in the
|
| 20 |
documentation and/or other materials provided with the distribution.
|
| 21 |
|
| 22 |
* Neither the name of the University of Cambridge nor the names of its
|
| 23 |
contributors may be used to endorse or promote products derived from
|
| 24 |
this software without specific prior written permission.
|
| 25 |
|
| 26 |
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
| 27 |
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
| 28 |
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
| 29 |
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
| 30 |
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
| 31 |
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
| 32 |
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
| 33 |
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
| 34 |
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
| 35 |
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
| 36 |
POSSIBILITY OF SUCH DAMAGE.
|
| 37 |
-----------------------------------------------------------------------------
|
| 38 |
*/
|
| 39 |
|
| 40 |
#include <ctype.h>
|
| 41 |
#include <stdio.h>
|
| 42 |
#include <string.h>
|
| 43 |
#include <stdlib.h>
|
| 44 |
#include <errno.h>
|
| 45 |
|
| 46 |
#include <sys/types.h>
|
| 47 |
#include <sys/stat.h>
|
| 48 |
#include <unistd.h>
|
| 49 |
|
| 50 |
#include "config.h"
|
| 51 |
#include "pcre.h"
|
| 52 |
|
| 53 |
#define FALSE 0
|
| 54 |
#define TRUE 1
|
| 55 |
|
| 56 |
typedef int BOOL;
|
| 57 |
|
| 58 |
#define VERSION "4.0 07-Jun-2005"
|
| 59 |
#define MAX_PATTERN_COUNT 100
|
| 60 |
|
| 61 |
#if BUFSIZ > 8192
|
| 62 |
#define MBUFTHIRD BUFSIZ
|
| 63 |
#else
|
| 64 |
#define MBUFTHIRD 8192
|
| 65 |
#endif
|
| 66 |
|
| 67 |
|
| 68 |
|
| 69 |
/*************************************************
|
| 70 |
* Global variables *
|
| 71 |
*************************************************/
|
| 72 |
|
| 73 |
static char *pattern_filename = NULL;
|
| 74 |
static char *stdin_name = (char *)"(standard input)";
|
| 75 |
static int pattern_count = 0;
|
| 76 |
static pcre **pattern_list;
|
| 77 |
static pcre_extra **hints_list;
|
| 78 |
|
| 79 |
static char *include_pattern = NULL;
|
| 80 |
static char *exclude_pattern = NULL;
|
| 81 |
|
| 82 |
static pcre *include_compiled = NULL;
|
| 83 |
static pcre *exclude_compiled = NULL;
|
| 84 |
|
| 85 |
static int after_context = 0;
|
| 86 |
static int before_context = 0;
|
| 87 |
static int both_context = 0;
|
| 88 |
|
| 89 |
static BOOL count_only = FALSE;
|
| 90 |
static BOOL filenames = TRUE;
|
| 91 |
static BOOL filenames_only = FALSE;
|
| 92 |
static BOOL filenames_nomatch_only = FALSE;
|
| 93 |
static BOOL hyphenpending = FALSE;
|
| 94 |
static BOOL invert = FALSE;
|
| 95 |
static BOOL multiline = FALSE;
|
| 96 |
static BOOL number = FALSE;
|
| 97 |
static BOOL quiet = FALSE;
|
| 98 |
static BOOL recurse = FALSE;
|
| 99 |
static BOOL silent = FALSE;
|
| 100 |
static BOOL whole_lines = FALSE;
|
| 101 |
static BOOL word_match = FALSE;
|
| 102 |
|
| 103 |
/* Structure for options and list of them */
|
| 104 |
|
| 105 |
enum { OP_NODATA, OP_STRING, OP_NUMBER };
|
| 106 |
|
| 107 |
typedef struct option_item {
|
| 108 |
int type;
|
| 109 |
int one_char;
|
| 110 |
void *dataptr;
|
| 111 |
const char *long_name;
|
| 112 |
const char *help_text;
|
| 113 |
} option_item;
|
| 114 |
|
| 115 |
static option_item optionlist[] = {
|
| 116 |
{ OP_NODATA, -1, NULL, "", " terminate options" },
|
| 117 |
{ OP_NODATA, -1, NULL, "help", "display this help and exit" },
|
| 118 |
{ OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
|
| 119 |
{ OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
|
| 120 |
{ OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
|
| 121 |
{ OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
|
| 122 |
{ OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
|
| 123 |
{ OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
|
| 124 |
{ OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
|
| 125 |
{ OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
|
| 126 |
{ OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
|
| 127 |
{ OP_STRING, -1, &stdin_name, "label=name", "set name for standard input" },
|
| 128 |
{ OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
|
| 129 |
{ OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
|
| 130 |
{ OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
|
| 131 |
{ OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
|
| 132 |
{ OP_STRING, -1, &exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
|
| 133 |
{ OP_STRING, -1, &include_pattern, "include=pattern","include matching files when recursing" },
|
| 134 |
{ OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
|
| 135 |
{ OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
|
| 136 |
{ OP_NODATA, 'V', NULL, "version", "print version information and exit" },
|
| 137 |
{ OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
|
| 138 |
{ OP_NODATA, 'w', NULL, "word-regex(p)", "force PATTERN to match only as a word" },
|
| 139 |
{ OP_NODATA, 'x', NULL, "line-regex(p)", "force PATTERN to match only whole lines" },
|
| 140 |
{ OP_NODATA, 0, NULL, NULL, NULL }
|
| 141 |
};
|
| 142 |
|
| 143 |
|
| 144 |
/*************************************************
|
| 145 |
* Functions for directory scanning *
|
| 146 |
*************************************************/
|
| 147 |
|
| 148 |
/* These functions are defined so that they can be made system specific,
|
| 149 |
although at present the only ones are for Unix, Win32, and for "no directory
|
| 150 |
recursion support". */
|
| 151 |
|
| 152 |
|
| 153 |
/************* Directory scanning in Unix ***********/
|
| 154 |
|
| 155 |
#if IS_UNIX
|
| 156 |
#include <sys/types.h>
|
| 157 |
#include <sys/stat.h>
|
| 158 |
#include <dirent.h>
|
| 159 |
|
| 160 |
typedef DIR directory_type;
|
| 161 |
|
| 162 |
static int
|
| 163 |
isdirectory(char *filename)
|
| 164 |
{
|
| 165 |
struct stat statbuf;
|
| 166 |
if (stat(filename, &statbuf) < 0)
|
| 167 |
return 0; /* In the expectation that opening as a file will fail */
|
| 168 |
return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
|
| 169 |
}
|
| 170 |
|
| 171 |
static directory_type *
|
| 172 |
opendirectory(char *filename)
|
| 173 |
{
|
| 174 |
return opendir(filename);
|
| 175 |
}
|
| 176 |
|
| 177 |
static char *
|
| 178 |
readdirectory(directory_type *dir)
|
| 179 |
{
|
| 180 |
for (;;)
|
| 181 |
{
|
| 182 |
struct dirent *dent = readdir(dir);
|
| 183 |
if (dent == NULL) return NULL;
|
| 184 |
if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
|
| 185 |
return dent->d_name;
|
| 186 |
}
|
| 187 |
return NULL; /* Keep compiler happy; never executed */
|
| 188 |
}
|
| 189 |
|
| 190 |
static void
|
| 191 |
closedirectory(directory_type *dir)
|
| 192 |
{
|
| 193 |
closedir(dir);
|
| 194 |
}
|
| 195 |
|
| 196 |
|
| 197 |
/************* Directory scanning in Win32 ***********/
|
| 198 |
|
| 199 |
/* I (Philip Hazel) have no means of testing this code. It was contributed by
|
| 200 |
Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
|
| 201 |
when it did not exist. */
|
| 202 |
|
| 203 |
|
| 204 |
#elif HAVE_WIN32API
|
| 205 |
|
| 206 |
#ifndef STRICT
|
| 207 |
# define STRICT
|
| 208 |
#endif
|
| 209 |
#ifndef WIN32_LEAN_AND_MEAN
|
| 210 |
# define WIN32_LEAN_AND_MEAN
|
| 211 |
#endif
|
| 212 |
#ifndef INVALID_FILE_ATTRIBUTES
|
| 213 |
#define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
|
| 214 |
#endif
|
| 215 |
|
| 216 |
#include <windows.h>
|
| 217 |
|
| 218 |
typedef struct directory_type
|
| 219 |
{
|
| 220 |
HANDLE handle;
|
| 221 |
BOOL first;
|
| 222 |
WIN32_FIND_DATA data;
|
| 223 |
} directory_type;
|
| 224 |
|
| 225 |
int
|
| 226 |
isdirectory(char *filename)
|
| 227 |
{
|
| 228 |
DWORD attr = GetFileAttributes(filename);
|
| 229 |
if (attr == INVALID_FILE_ATTRIBUTES)
|
| 230 |
return 0;
|
| 231 |
return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
|
| 232 |
}
|
| 233 |
|
| 234 |
directory_type *
|
| 235 |
opendirectory(char *filename)
|
| 236 |
{
|
| 237 |
size_t len;
|
| 238 |
char *pattern;
|
| 239 |
directory_type *dir;
|
| 240 |
DWORD err;
|
| 241 |
len = strlen(filename);
|
| 242 |
pattern = (char *) malloc(len + 3);
|
| 243 |
dir = (directory_type *) malloc(sizeof(*dir));
|
| 244 |
if ((pattern == NULL) || (dir == NULL))
|
| 245 |
{
|
| 246 |
fprintf(stderr, "pcregrep: malloc failed\n");
|
| 247 |
exit(2);
|
| 248 |
}
|
| 249 |
memcpy(pattern, filename, len);
|
| 250 |
memcpy(&(pattern[len]), "\\*", 3);
|
| 251 |
dir->handle = FindFirstFile(pattern, &(dir->data));
|
| 252 |
if (dir->handle != INVALID_HANDLE_VALUE)
|
| 253 |
{
|
| 254 |
free(pattern);
|
| 255 |
dir->first = TRUE;
|
| 256 |
return dir;
|
| 257 |
}
|
| 258 |
err = GetLastError();
|
| 259 |
free(pattern);
|
| 260 |
free(dir);
|
| 261 |
errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
|
| 262 |
return NULL;
|
| 263 |
}
|
| 264 |
|
| 265 |
char *
|
| 266 |
readdirectory(directory_type *dir)
|
| 267 |
{
|
| 268 |
for (;;)
|
| 269 |
{
|
| 270 |
if (!dir->first)
|
| 271 |
{
|
| 272 |
if (!FindNextFile(dir->handle, &(dir->data)))
|
| 273 |
return NULL;
|
| 274 |
}
|
| 275 |
else
|
| 276 |
{
|
| 277 |
dir->first = FALSE;
|
| 278 |
}
|
| 279 |
if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
|
| 280 |
return dir->data.cFileName;
|
| 281 |
}
|
| 282 |
#ifndef _MSC_VER
|
| 283 |
return NULL; /* Keep compiler happy; never executed */
|
| 284 |
#endif
|
| 285 |
}
|
| 286 |
|
| 287 |
void
|
| 288 |
closedirectory(directory_type *dir)
|
| 289 |
{
|
| 290 |
FindClose(dir->handle);
|
| 291 |
free(dir);
|
| 292 |
}
|
| 293 |
|
| 294 |
|
| 295 |
/************* Directory scanning when we can't do it ***********/
|
| 296 |
|
| 297 |
/* The type is void, and apart from isdirectory(), the functions do nothing. */
|
| 298 |
|
| 299 |
#else
|
| 300 |
|
| 301 |
typedef void directory_type;
|
| 302 |
|
| 303 |
int isdirectory(char *filename) { return FALSE; }
|
| 304 |
directory_type * opendirectory(char *filename) {}
|
| 305 |
char *readdirectory(directory_type *dir) {}
|
| 306 |
void closedirectory(directory_type *dir) {}
|
| 307 |
|
| 308 |
#endif
|
| 309 |
|
| 310 |
|
| 311 |
|
| 312 |
#if ! HAVE_STRERROR
|
| 313 |
/*************************************************
|
| 314 |
* Provide strerror() for non-ANSI libraries *
|
| 315 |
*************************************************/
|
| 316 |
|
| 317 |
/* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
|
| 318 |
in their libraries, but can provide the same facility by this simple
|
| 319 |
alternative function. */
|
| 320 |
|
| 321 |
extern int sys_nerr;
|
| 322 |
extern char *sys_errlist[];
|
| 323 |
|
| 324 |
char *
|
| 325 |
strerror(int n)
|
| 326 |
{
|
| 327 |
if (n < 0 || n >= sys_nerr) return "unknown error number";
|
| 328 |
return sys_errlist[n];
|
| 329 |
}
|
| 330 |
#endif /* HAVE_STRERROR */
|
| 331 |
|
| 332 |
|
| 333 |
|
| 334 |
/*************************************************
|
| 335 |
* Print the previous "after" lines *
|
| 336 |
*************************************************/
|
| 337 |
|
| 338 |
/* This is called if we are about to lose said lines because of buffer filling,
|
| 339 |
and at the end of the file.
|
| 340 |
|
| 341 |
Arguments:
|
| 342 |
lastmatchnumber the number of the last matching line, plus one
|
| 343 |
lastmatchrestart where we restarted after the last match
|
| 344 |
endptr end of available data
|
| 345 |
printname filename for printing
|
| 346 |
|
| 347 |
Returns: nothing
|
| 348 |
*/
|
| 349 |
|
| 350 |
static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
|
| 351 |
char *endptr, char *printname)
|
| 352 |
{
|
| 353 |
if (after_context > 0 && lastmatchnumber > 0)
|
| 354 |
{
|
| 355 |
int count = 0;
|
| 356 |
while (lastmatchrestart < endptr && count++ < after_context)
|
| 357 |
{
|
| 358 |
char *pp = lastmatchrestart;
|
| 359 |
if (printname != NULL) fprintf(stdout, "%s-", printname);
|
| 360 |
if (number) fprintf(stdout, "%d-", lastmatchnumber++);
|
| 361 |
while (*pp != '\n') pp++;
|
| 362 |
fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);
|
| 363 |
lastmatchrestart = pp + 1;
|
| 364 |
}
|
| 365 |
hyphenpending = TRUE;
|
| 366 |
}
|
| 367 |
}
|
| 368 |
|
| 369 |
|
| 370 |
|
| 371 |
/*************************************************
|
| 372 |
* Grep an individual file *
|
| 373 |
*************************************************/
|
| 374 |
|
| 375 |
/* This is called from grep_or_recurse() below. It uses a buffer that is three
|
| 376 |
times the value of MBUFTHIRD. The matching point is never allowed to stray into
|
| 377 |
the top third of the buffer, thus keeping more of the file available for
|
| 378 |
context printing or for multiline scanning. For large files, the pointer will
|
| 379 |
be in the middle third most of the time, so the bottom third is available for
|
| 380 |
"before" context printing.
|
| 381 |
|
| 382 |
Arguments:
|
| 383 |
in the fopened FILE stream
|
| 384 |
printname the file name if it is to be printed for each match
|
| 385 |
or NULL if the file name is not to be printed
|
| 386 |
it cannot be NULL if filenames[_nomatch]_only is set
|
| 387 |
|
| 388 |
Returns: 0 if there was at least one match
|
| 389 |
1 otherwise (no matches)
|
| 390 |
*/
|
| 391 |
|
| 392 |
static int
|
| 393 |
pcregrep(FILE *in, char *printname)
|
| 394 |
{
|
| 395 |
int rc = 1;
|
| 396 |
int linenumber = 1;
|
| 397 |
int lastmatchnumber = 0;
|
| 398 |
int count = 0;
|
| 399 |
int offsets[99];
|
| 400 |
char *lastmatchrestart = NULL;
|
| 401 |
char buffer[3*MBUFTHIRD];
|
| 402 |
char *ptr = buffer;
|
| 403 |
char *endptr;
|
| 404 |
size_t bufflength;
|
| 405 |
BOOL endhyphenpending = FALSE;
|
| 406 |
|
| 407 |
/* Do the first read into the start of the buffer and set up the pointer to
|
| 408 |
end of what we have. */
|
| 409 |
|
| 410 |
bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
|
| 411 |
endptr = buffer + bufflength;
|
| 412 |
|
| 413 |
/* Loop while the current pointer is not at the end of the file. For large
|
| 414 |
files, endptr will be at the end of the buffer when we are in the middle of the
|
| 415 |
file, but ptr will never get there, because as soon as it gets over 2/3 of the
|
| 416 |
way, the buffer is shifted left and re-filled. */
|
| 417 |
|
| 418 |
while (ptr < endptr)
|
| 419 |
{
|
| 420 |
int i;
|
| 421 |
BOOL match = FALSE;
|
| 422 |
char *t = ptr;
|
| 423 |
size_t length, linelength;
|
| 424 |
|
| 425 |
/* At this point, ptr is at the start of a line. We need to find the length
|
| 426 |
of the subject string to pass to pcre_exec(). In multiline mode, it is the
|
| 427 |
length remainder of the data in the buffer. Otherwise, it is the length of
|
| 428 |
the next line. After matching, we always advance by the length of the next
|
| 429 |
line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
|
| 430 |
that any match is constrained to be in the first line. */
|
| 431 |
|
| 432 |
linelength = 0;
|
| 433 |
while (t < endptr && *t++ != '\n') linelength++;
|
| 434 |
length = multiline? endptr - ptr : linelength;
|
| 435 |
|
| 436 |
/* Run through all the patterns until one matches. Note that we don't include
|
| 437 |
the final newline in the subject string. */
|
| 438 |
|
| 439 |
for (i = 0; !match && i < pattern_count; i++)
|
| 440 |
{
|
| 441 |
match = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
|
| 442 |
offsets, 99) >= 0;
|
| 443 |
}
|
| 444 |
|
| 445 |
/* If it's a match or a not-match (as required), print what's wanted. */
|
| 446 |
|
| 447 |
if (match != invert)
|
| 448 |
{
|
| 449 |
BOOL hyphenprinted = FALSE;
|
| 450 |
|
| 451 |
if (filenames_nomatch_only) return 1;
|
| 452 |
|
| 453 |
if (count_only) count++;
|
| 454 |
|
| 455 |
else if (filenames_only)
|
| 456 |
{
|
| 457 |
fprintf(stdout, "%s\n", printname);
|
| 458 |
return 0;
|
| 459 |
}
|
| 460 |
|
| 461 |
else if (quiet) return 0;
|
| 462 |
|
| 463 |
else
|
| 464 |
{
|
| 465 |
/* See if there is a requirement to print some "after" lines from a
|
| 466 |
previous match. We never print any overlaps. */
|
| 467 |
|
| 468 |
if (after_context > 0 && lastmatchnumber > 0)
|
| 469 |
{
|
| 470 |
int linecount = 0;
|
| 471 |
char *p = lastmatchrestart;
|
| 472 |
|
| 473 |
while (p < ptr && linecount < after_context)
|
| 474 |
{
|
| 475 |
while (*p != '\n') p++;
|
| 476 |
p++;
|
| 477 |
linecount++;
|
| 478 |
}
|
| 479 |
|
| 480 |
/* It is important to advance lastmatchrestart during this printing so
|
| 481 |
that it interacts correctly with any "before" printing below. */
|
| 482 |
|
| 483 |
while (lastmatchrestart < p)
|
| 484 |
{
|
| 485 |
char *pp = lastmatchrestart;
|
| 486 |
if (printname != NULL) fprintf(stdout, "%s-", printname);
|
| 487 |
if (number) fprintf(stdout, "%d-", lastmatchnumber++);
|
| 488 |
while (*pp != '\n') pp++;
|
| 489 |
fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);
|
| 490 |
lastmatchrestart = pp + 1;
|
| 491 |
}
|
| 492 |
if (lastmatchrestart != ptr) hyphenpending = TRUE;
|
| 493 |
}
|
| 494 |
|
| 495 |
/* If there were non-contiguous lines printed above, insert hyphens. */
|
| 496 |
|
| 497 |
if (hyphenpending)
|
| 498 |
{
|
| 499 |
fprintf(stdout, "--\n");
|
| 500 |
hyphenpending = FALSE;
|
| 501 |
hyphenprinted = TRUE;
|
| 502 |
}
|
| 503 |
|
| 504 |
/* See if there is a requirement to print some "before" lines for this
|
| 505 |
match. Again, don't print overlaps. */
|
| 506 |
|
| 507 |
if (before_context > 0)
|
| 508 |
{
|
| 509 |
int linecount = 0;
|
| 510 |
char *p = ptr;
|
| 511 |
|
| 512 |
while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
|
| 513 |
linecount++ < before_context)
|
| 514 |
{
|
| 515 |
p--;
|
| 516 |
while (p > buffer && p[-1] != '\n') p--;
|
| 517 |
}
|
| 518 |
|
| 519 |
if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
|
| 520 |
fprintf(stdout, "--\n");
|
| 521 |
|
| 522 |
while (p < ptr)
|
| 523 |
{
|
| 524 |
char *pp = p;
|
| 525 |
if (printname != NULL) fprintf(stdout, "%s-", printname);
|
| 526 |
if (number) fprintf(stdout, "%d-", linenumber - linecount--);
|
| 527 |
while (*pp != '\n') pp++;
|
| 528 |
fprintf(stdout, "%.*s", pp - p + 1, p);
|
| 529 |
p = pp + 1;
|
| 530 |
}
|
| 531 |
}
|
| 532 |
|
| 533 |
/* Now print the matching line(s); ensure we set hyphenpending at the end
|
| 534 |
of the file. */
|
| 535 |
|
| 536 |
endhyphenpending = TRUE;
|
| 537 |
if (printname != NULL) fprintf(stdout, "%s:", printname);
|
| 538 |
if (number) fprintf(stdout, "%d:", linenumber);
|
| 539 |
|
| 540 |
/* In multiline mode, we want to print to the end of the line in which
|
| 541 |
the end of the matched string is found, so we adjust linelength and the
|
| 542 |
line number appropriately. Because the PCRE_FIRSTLINE option is set, the
|
| 543 |
start of the match will always be before the first \n character. */
|
| 544 |
|
| 545 |
if (multiline)
|
| 546 |
{
|
| 547 |
char *endmatch = ptr + offsets[1];
|
| 548 |
t = ptr;
|
| 549 |
while (t < endmatch) { if (*t++ == '\n') linenumber++; }
|
| 550 |
while (endmatch < endptr && *endmatch != '\n') endmatch++;
|
| 551 |
linelength = endmatch - ptr;
|
| 552 |
}
|
| 553 |
|
| 554 |
fprintf(stdout, "%.*s\n", linelength, ptr);
|
| 555 |
}
|
| 556 |
|
| 557 |
rc = 0; /* Had some success */
|
| 558 |
|
| 559 |
/* Remember where the last match happened for after_context. We remember
|
| 560 |
where we are about to restart, and that line's number. */
|
| 561 |
|
| 562 |
lastmatchrestart = ptr + linelength + 1;
|
| 563 |
lastmatchnumber = linenumber + 1;
|
| 564 |
}
|
| 565 |
|
| 566 |
/* Advance to after the newline and increment the line number. */
|
| 567 |
|
| 568 |
ptr += linelength + 1;
|
| 569 |
linenumber++;
|
| 570 |
|
| 571 |
/* If we haven't yet reached the end of the file (the buffer is full), and
|
| 572 |
the current point is in the top 1/3 of the buffer, slide the buffer down by
|
| 573 |
1/3 and refill it. Before we do this, if some unprinted "after" lines are
|
| 574 |
about to be lost, print them. */
|
| 575 |
|
| 576 |
if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
|
| 577 |
{
|
| 578 |
if (after_context > 0 &&
|
| 579 |
lastmatchnumber > 0 &&
|
| 580 |
lastmatchrestart < buffer + MBUFTHIRD)
|
| 581 |
{
|
| 582 |
do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
|
| 583 |
lastmatchnumber = 0;
|
| 584 |
}
|
| 585 |
|
| 586 |
/* Now do the shuffle */
|
| 587 |
|
| 588 |
memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
|
| 589 |
ptr -= MBUFTHIRD;
|
| 590 |
bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
|
| 591 |
endptr = buffer + bufflength;
|
| 592 |
|
| 593 |
/* Adjust any last match point */
|
| 594 |
|
| 595 |
if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
|
| 596 |
}
|
| 597 |
} /* Loop through the whole file */
|
| 598 |
|
| 599 |
/* End of file; print final "after" lines if wanted; do_after_lines sets
|
| 600 |
hyphenpending if it prints something. */
|
| 601 |
|
| 602 |
do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
|
| 603 |
hyphenpending |= endhyphenpending;
|
| 604 |
|
| 605 |
/* Print the file name if we are looking for those without matches and there
|
| 606 |
were none. If we found a match, we won't have got this far. */
|
| 607 |
|
| 608 |
if (filenames_nomatch_only)
|
| 609 |
{
|
| 610 |
fprintf(stdout, "%s\n", printname);
|
| 611 |
return 0;
|
| 612 |
}
|
| 613 |
|
| 614 |
/* Print the match count if wanted */
|
| 615 |
|
| 616 |
if (count_only)
|
| 617 |
{
|
| 618 |
if (printname != NULL) fprintf(stdout, "%s:", printname);
|
| 619 |
fprintf(stdout, "%d\n", count);
|
| 620 |
}
|
| 621 |
|
| 622 |
return rc;
|
| 623 |
}
|
| 624 |
|
| 625 |
|
| 626 |
|
| 627 |
/*************************************************
|
| 628 |
* Grep a file or recurse into a directory *
|
| 629 |
*************************************************/
|
| 630 |
|
| 631 |
/* Given a path name, if it's a directory, scan all the files if we are
|
| 632 |
recursing; if it's a file, grep it.
|
| 633 |
|
| 634 |
Arguments:
|
| 635 |
pathname the path to investigate
|
| 636 |
dir_recurse TRUE if recursing is wanted (-r)
|
| 637 |
show_filenames TRUE if file names are wanted for multiple files, except
|
| 638 |
for the only file at top level when not filenames_only
|
| 639 |
only_one_at_top TRUE if the path is the only one at toplevel
|
| 640 |
|
| 641 |
Returns: 0 if there was at least one match
|
| 642 |
1 if there were no matches
|
| 643 |
2 there was some kind of error
|
| 644 |
|
| 645 |
However, file opening failures are suppressed if "silent" is set.
|
| 646 |
*/
|
| 647 |
|
| 648 |
static int
|
| 649 |
grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL show_filenames,
|
| 650 |
BOOL only_one_at_top)
|
| 651 |
{
|
| 652 |
int rc = 1;
|
| 653 |
int sep;
|
| 654 |
FILE *in;
|
| 655 |
char *printname;
|
| 656 |
|
| 657 |
/* If the file name is "-" we scan stdin */
|
| 658 |
|
| 659 |
if (strcmp(pathname, "-") == 0)
|
| 660 |
{
|
| 661 |
return pcregrep(stdin,
|
| 662 |
(filenames_only || filenames_nomatch_only ||
|
| 663 |
(show_filenames && !only_one_at_top))?
|
| 664 |
stdin_name : NULL);
|
| 665 |
}
|
| 666 |
|
| 667 |
/* If the file is a directory and we are recursing, scan each file within it,
|
| 668 |
subject to any include or exclude patterns that were set. The scanning code is
|
| 669 |
localized so it can be made system-specific. */
|
| 670 |
|
| 671 |
if ((sep = isdirectory(pathname)) != 0 && dir_recurse)
|
| 672 |
{
|
| 673 |
char buffer[1024];
|
| 674 |
char *nextfile;
|
| 675 |
directory_type *dir = opendirectory(pathname);
|
| 676 |
|
| 677 |
if (dir == NULL)
|
| 678 |
{
|
| 679 |
if (!silent)
|
| 680 |
fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
|
| 681 |
strerror(errno));
|
| 682 |
return 2;
|
| 683 |
}
|
| 684 |
|
| 685 |
while ((nextfile = readdirectory(dir)) != NULL)
|
| 686 |
{
|
| 687 |
int frc, blen;
|
| 688 |
sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
|
| 689 |
blen = strlen(buffer);
|
| 690 |
|
| 691 |
if (exclude_compiled != NULL &&
|
| 692 |
pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
|
| 693 |
continue;
|
| 694 |
|
| 695 |
if (include_compiled != NULL &&
|
| 696 |
pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
|
| 697 |
continue;
|
| 698 |
|
| 699 |
frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);
|
| 700 |
if (frc > 1) rc = frc;
|
| 701 |
else if (frc == 0 && rc == 1) rc = 0;
|
| 702 |
}
|
| 703 |
|
| 704 |
closedirectory(dir);
|
| 705 |
return rc;
|
| 706 |
}
|
| 707 |
|
| 708 |
/* If the file is not a directory, or we are not recursing, scan it. If this is
|
| 709 |
the first and only argument at top level, we don't show the file name (unless
|
| 710 |
we are only showing the file name). Otherwise, control is via the
|
| 711 |
show_filenames variable. */
|
| 712 |
|
| 713 |
in = fopen(pathname, "r");
|
| 714 |
if (in == NULL)
|
| 715 |
{
|
| 716 |
if (!silent)
|
| 717 |
fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
|
| 718 |
strerror(errno));
|
| 719 |
return 2;
|
| 720 |
}
|
| 721 |
|
| 722 |
printname = (filenames_only || filenames_nomatch_only ||
|
| 723 |
(show_filenames && !only_one_at_top))? pathname : NULL;
|
| 724 |
|
| 725 |
rc = pcregrep(in, printname);
|
| 726 |
|
| 727 |
fclose(in);
|
| 728 |
return rc;
|
| 729 |
}
|
| 730 |
|
| 731 |
|
| 732 |
|
| 733 |
|
| 734 |
/*************************************************
|
| 735 |
* Usage function *
|
| 736 |
*************************************************/
|
| 737 |
|
| 738 |
static int
|
| 739 |
usage(int rc)
|
| 740 |
{
|
| 741 |
fprintf(stderr, "Usage: pcregrep [-LMVcfhilnqrsvwx] [long-options] [pattern] [file1 file2 ...]\n");
|
| 742 |
fprintf(stderr, "Type `pcregrep --help' for more information.\n");
|
| 743 |
return rc;
|
| 744 |
}
|
| 745 |
|
| 746 |
|
| 747 |
|
| 748 |
|
| 749 |
/*************************************************
|
| 750 |
* Help function *
|
| 751 |
*************************************************/
|
| 752 |
|
| 753 |
static void
|
| 754 |
help(void)
|
| 755 |
{
|
| 756 |
option_item *op;
|
| 757 |
|
| 758 |
printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
|
| 759 |
printf("Search for PATTERN in each FILE or standard input.\n");
|
| 760 |
printf("PATTERN must be present if -f is not used.\n");
|
| 761 |
printf("\"-\" can be used as a file name to mean STDIN.\n");
|
| 762 |
printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
|
| 763 |
|
| 764 |
printf("Options:\n");
|
| 765 |
|
| 766 |
for (op = optionlist; op->one_char != 0; op++)
|
| 767 |
{
|
| 768 |
int n;
|
| 769 |
char s[4];
|
| 770 |
if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
|
| 771 |
printf(" %s --%s%n", s, op->long_name, &n);
|
| 772 |
n = 30 - n;
|
| 773 |
if (n < 1) n = 1;
|
| 774 |
printf("%.*s%s\n", n, " ", op->help_text);
|
| 775 |
}
|
| 776 |
|
| 777 |
printf("\nWhen reading patterns from a file instead of using a command line option,\n");
|
| 778 |
printf("trailing white space is removed and blank lines are ignored.\n");
|
| 779 |
printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
|
| 780 |
|
| 781 |
printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
|
| 782 |
printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
|
| 783 |
}
|
| 784 |
|
| 785 |
|
| 786 |
|
| 787 |
|
| 788 |
/*************************************************
|
| 789 |
* Handle a single-letter, no data option *
|
| 790 |
*************************************************/
|
| 791 |
|
| 792 |
static int
|
| 793 |
handle_option(int letter, int options)
|
| 794 |
{
|
| 795 |
switch(letter)
|
| 796 |
{
|
| 797 |
case -1: help(); exit(0);
|
| 798 |
case 'c': count_only = TRUE; break;
|
| 799 |
case 'h': filenames = FALSE; break;
|
| 800 |
case 'i': options |= PCRE_CASELESS; break;
|
| 801 |
case 'l': filenames_only = TRUE; break;
|
| 802 |
case 'L': filenames_nomatch_only = TRUE; break;
|
| 803 |
case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
|
| 804 |
case 'n': number = TRUE; break;
|
| 805 |
case 'q': quiet = TRUE; break;
|
| 806 |
case 'r': recurse = TRUE; break;
|
| 807 |
case 's': silent = TRUE; break;
|
| 808 |
case 'u': options |= PCRE_UTF8; break;
|
| 809 |
case 'v': invert = TRUE; break;
|
| 810 |
case 'w': word_match = TRUE; break;
|
| 811 |
case 'x': whole_lines = TRUE; break;
|
| 812 |
|
| 813 |
case 'V':
|
| 814 |
fprintf(stderr, "pcregrep version %s using ", VERSION);
|
| 815 |
fprintf(stderr, "PCRE version %s\n", pcre_version());
|
| 816 |
exit(0);
|
| 817 |
break;
|
| 818 |
|
| 819 |
default:
|
| 820 |
fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
|
| 821 |
exit(usage(2));
|
| 822 |
}
|
| 823 |
|
| 824 |
return options;
|
| 825 |
}
|
| 826 |
|
| 827 |
|
| 828 |
|
| 829 |
|
| 830 |
/*************************************************
|
| 831 |
* Main program *
|
| 832 |
*************************************************/
|
| 833 |
|
| 834 |
/* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
|
| 835 |
|
| 836 |
int
|
| 837 |
main(int argc, char **argv)
|
| 838 |
{
|
| 839 |
int i, j;
|
| 840 |
int rc = 1;
|
| 841 |
int options = 0;
|
| 842 |
int errptr;
|
| 843 |
const char *error;
|
| 844 |
BOOL only_one_at_top;
|
| 845 |
|
| 846 |
/* Process the options */
|
| 847 |
|
| 848 |
for (i = 1; i < argc; i++)
|
| 849 |
{
|
| 850 |
option_item *op = NULL;
|
| 851 |
char *option_data = (char *)""; /* default to keep compiler happy */
|
| 852 |
BOOL longop;
|
| 853 |
BOOL longopwasequals = FALSE;
|
| 854 |
|
| 855 |
if (argv[i][0] != '-') break;
|
| 856 |
|
| 857 |
/* If we hit an argument that is just "-", it may be a reference to STDIN,
|
| 858 |
but only if we have previously had -f to define the patterns. */
|
| 859 |
|
| 860 |
if (argv[i][1] == 0)
|
| 861 |
{
|
| 862 |
if (pattern_filename != NULL) break;
|
| 863 |
else exit(usage(2));
|
| 864 |
}
|
| 865 |
|
| 866 |
/* Handle a long name option, or -- to terminate the options */
|
| 867 |
|
| 868 |
if (argv[i][1] == '-')
|
| 869 |
{
|
| 870 |
char *arg = argv[i] + 2;
|
| 871 |
char *argequals = strchr(arg, '=');
|
| 872 |
|
| 873 |
if (*arg == 0) /* -- terminates options */
|
| 874 |
{
|
| 875 |
i++;
|
| 876 |
break; /* out of the options-handling loop */
|
| 877 |
}
|
| 878 |
|
| 879 |
longop = TRUE;
|
| 880 |
|
| 881 |
/* Some long options have data that follows after =, for example file=name.
|
| 882 |
Some options have variations in the long name spelling: specifically, we
|
| 883 |
allow "regexp" because GNU grep allows it, though I personally go along
|
| 884 |
with Jeff Friedl in preferring "regex" without the "p". These options are
|
| 885 |
entered in the table as "regex(p)". No option is in both these categories,
|
| 886 |
fortunately. */
|
| 887 |
|
| 888 |
for (op = optionlist; op->one_char != 0; op++)
|
| 889 |
{
|
| 890 |
char *opbra = strchr(op->long_name, '(');
|
| 891 |
char *equals = strchr(op->long_name, '=');
|
| 892 |
if (opbra == NULL) /* Not a (p) case */
|
| 893 |
{
|
| 894 |
if (equals == NULL) /* Not thing=data case */
|
| 895 |
{
|
| 896 |
if (strcmp(arg, op->long_name) == 0) break;
|
| 897 |
}
|
| 898 |
else /* Special case xxx=data */
|
| 899 |
{
|
| 900 |
int oplen = equals - op->long_name;
|
| 901 |
int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
|
| 902 |
if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
|
| 903 |
{
|
| 904 |
option_data = arg + arglen;
|
| 905 |
if (*option_data == '=')
|
| 906 |
{
|
| 907 |
option_data++;
|
| 908 |
longopwasequals = TRUE;
|
| 909 |
}
|
| 910 |
break;
|
| 911 |
}
|
| 912 |
}
|
| 913 |
}
|
| 914 |
else /* Special case xxxx(p) */
|
| 915 |
{
|
| 916 |
char buff1[24];
|
| 917 |
char buff2[24];
|
| 918 |
int baselen = opbra - op->long_name;
|
| 919 |
sprintf(buff1, "%.*s", baselen, op->long_name);
|
| 920 |
sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
|
| 921 |
opbra + 1);
|
| 922 |
if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
|
| 923 |
break;
|
| 924 |
}
|
| 925 |
}
|
| 926 |
|
| 927 |
if (op->one_char == 0)
|
| 928 |
{
|
| 929 |
fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
|
| 930 |
exit(usage(2));
|
| 931 |
}
|
| 932 |
}
|
| 933 |
|
| 934 |
/* One-char options; many that have no data may be in a single argument; we
|
| 935 |
continue till we hit the last one or one that needs data. */
|
| 936 |
|
| 937 |
else
|
| 938 |
{
|
| 939 |
char *s = argv[i] + 1;
|
| 940 |
longop = FALSE;
|
| 941 |
while (*s != 0)
|
| 942 |
{
|
| 943 |
for (op = optionlist; op->one_char != 0; op++)
|
| 944 |
{ if (*s == op->one_char) break; }
|
| 945 |
if (op->one_char == 0)
|
| 946 |
{
|
| 947 |
fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
|
| 948 |
*s, argv[i]);
|
| 949 |
exit(usage(2));
|
| 950 |
}
|
| 951 |
if (op->type != OP_NODATA || s[1] == 0)
|
| 952 |
{
|
| 953 |
option_data = s+1;
|
| 954 |
break;
|
| 955 |
}
|
| 956 |
options = handle_option(*s++, options);
|
| 957 |
}
|
| 958 |
}
|
| 959 |
|
| 960 |
/* At this point we should have op pointing to a matched option */
|
| 961 |
|
| 962 |
if (op->type == OP_NODATA)
|
| 963 |
options = handle_option(op->one_char, options);
|
| 964 |
else
|
| 965 |
{
|
| 966 |
if (*option_data == 0)
|
| 967 |
{
|
| 968 |
if (i >= argc - 1 || longopwasequals)
|
| 969 |
{
|
| 970 |
fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
|
| 971 |
exit(usage(2));
|
| 972 |
}
|
| 973 |
option_data = argv[++i];
|
| 974 |
}
|
| 975 |
|
| 976 |
if (op->type == OP_STRING) *((char **)op->dataptr) = option_data; else
|
| 977 |
{
|
| 978 |
char *endptr;
|
| 979 |
int n = strtoul(option_data, &endptr, 10);
|
| 980 |
if (*endptr != 0)
|
| 981 |
{
|
| 982 |
if (longop)
|
| 983 |
fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%s\n",
|
| 984 |
option_data, op->long_name);
|
| 985 |
else
|
| 986 |
fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
|
| 987 |
option_data, op->one_char);
|
| 988 |
exit(usage(2));
|
| 989 |
}
|
| 990 |
*((int *)op->dataptr) = n;
|
| 991 |
}
|
| 992 |
}
|
| 993 |
}
|
| 994 |
|
| 995 |
/* Options have been decoded. If -C was used, its value is used as a default
|
| 996 |
for -A and -B. */
|
| 997 |
|
| 998 |
if (both_context > 0)
|
| 999 |
{
|
| 1000 |
if (after_context == 0) after_context = both_context;
|
| 1001 |
if (before_context == 0) before_context = both_context;
|
| 1002 |
}
|
| 1003 |
|
| 1004 |
pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
|
| 1005 |
hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
|
| 1006 |
|
| 1007 |
if (pattern_list == NULL || hints_list == NULL)
|
| 1008 |
{
|
| 1009 |
fprintf(stderr, "pcregrep: malloc failed\n");
|
| 1010 |
return 2;
|
| 1011 |
}
|
| 1012 |
|
| 1013 |
/* Compile the regular expression(s). */
|
| 1014 |
|
| 1015 |
if (pattern_filename != NULL)
|
| 1016 |
{
|
| 1017 |
FILE *f = fopen(pattern_filename, "r");
|
| 1018 |
char buffer[MBUFTHIRD + 16];
|
| 1019 |
char *rdstart;
|
| 1020 |
int adjust = 0;
|
| 1021 |
|
| 1022 |
if (f == NULL)
|
| 1023 |
{
|
| 1024 |
fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
|
| 1025 |
strerror(errno));
|
| 1026 |
return 2;
|
| 1027 |
}
|
| 1028 |
|
| 1029 |
if (whole_lines)
|
| 1030 |
{
|
| 1031 |
strcpy(buffer, "^(?:");
|
| 1032 |
adjust = 4;
|
| 1033 |
}
|
| 1034 |
else if (word_match)
|
| 1035 |
{
|
| 1036 |
strcpy(buffer, "\\b");
|
| 1037 |
adjust = 2;
|
| 1038 |
}
|
| 1039 |
|
| 1040 |
rdstart = buffer + adjust;
|
| 1041 |
while (fgets(rdstart, MBUFTHIRD, f) != NULL)
|
| 1042 |
{
|
| 1043 |
char *s = rdstart + (int)strlen(rdstart);
|
| 1044 |
if (pattern_count >= MAX_PATTERN_COUNT)
|
| 1045 |
{
|
| 1046 |
fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",
|
| 1047 |
MAX_PATTERN_COUNT);
|
| 1048 |
return 2;
|
| 1049 |
}
|
| 1050 |
while (s > rdstart && isspace((unsigned char)(s[-1]))) s--;
|
| 1051 |
if (s == rdstart) continue;
|
| 1052 |
if (whole_lines) strcpy(s, ")$");
|
| 1053 |
else if (word_match)strcpy(s, "\\b");
|
| 1054 |
else *s = 0;
|
| 1055 |
pattern_list[pattern_count] = pcre_compile(buffer, options, &error,
|
| 1056 |
&errptr, NULL);
|
| 1057 |
if (pattern_list[pattern_count++] == NULL)
|
| 1058 |
{
|
| 1059 |
fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",
|
| 1060 |
pattern_count, errptr - adjust, error);
|
| 1061 |
return 2;
|
| 1062 |
}
|
| 1063 |
}
|
| 1064 |
fclose(f);
|
| 1065 |
}
|
| 1066 |
|
| 1067 |
/* If no file name, a single regex must be given inline. */
|
| 1068 |
|
| 1069 |
else
|
| 1070 |
{
|
| 1071 |
char buffer[MBUFTHIRD + 16];
|
| 1072 |
char *pat;
|
| 1073 |
int adjust = 0;
|
| 1074 |
|
| 1075 |
if (i >= argc) return usage(2);
|
| 1076 |
|
| 1077 |
if (whole_lines)
|
| 1078 |
{
|
| 1079 |
sprintf(buffer, "^(?:%.*s)$", MBUFTHIRD, argv[i++]);
|
| 1080 |
pat = buffer;
|
| 1081 |
adjust = 4;
|
| 1082 |
}
|
| 1083 |
else if (word_match)
|
| 1084 |
{
|
| 1085 |
sprintf(buffer, "\\b%.*s\\b", MBUFTHIRD, argv[i++]);
|
| 1086 |
pat = buffer;
|
| 1087 |
adjust = 2;
|
| 1088 |
}
|
| 1089 |
else pat = argv[i++];
|
| 1090 |
|
| 1091 |
pattern_list[0] = pcre_compile(pat, options, &error, &errptr, NULL);
|
| 1092 |
|
| 1093 |
if (pattern_list[0] == NULL)
|
| 1094 |
{
|
| 1095 |
fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n",
|
| 1096 |
errptr - adjust, error);
|
| 1097 |
return 2;
|
| 1098 |
}
|
| 1099 |
pattern_count++;
|
| 1100 |
}
|
| 1101 |
|
| 1102 |
/* Study the regular expressions, as we will be running them many times */
|
| 1103 |
|
| 1104 |
for (j = 0; j < pattern_count; j++)
|
| 1105 |
{
|
| 1106 |
hints_list[j] = pcre_study(pattern_list[j], 0, &error);
|
| 1107 |
if (error != NULL)
|
| 1108 |
{
|
| 1109 |
char s[16];
|
| 1110 |
if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
|
| 1111 |
fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
|
| 1112 |
return 2;
|
| 1113 |
}
|
| 1114 |
}
|
| 1115 |
|
| 1116 |
/* If there are include or exclude patterns, compile them. */
|
| 1117 |
|
| 1118 |
if (exclude_pattern != NULL)
|
| 1119 |
{
|
| 1120 |
exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, NULL);
|
| 1121 |
if (exclude_compiled == NULL)
|
| 1122 |
{
|
| 1123 |
fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
|
| 1124 |
errptr, error);
|
| 1125 |
return 2;
|
| 1126 |
}
|
| 1127 |
}
|
| 1128 |
|
| 1129 |
if (include_pattern != NULL)
|
| 1130 |
{
|
| 1131 |
include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, NULL);
|
| 1132 |
if (include_compiled == NULL)
|
| 1133 |
{
|
| 1134 |
fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
|
| 1135 |
errptr, error);
|
| 1136 |
return 2;
|
| 1137 |
}
|
| 1138 |
}
|
| 1139 |
|
| 1140 |
/* If there are no further arguments, do the business on stdin and exit */
|
| 1141 |
|
| 1142 |
if (i >= argc) return pcregrep(stdin,
|
| 1143 |
(filenames_only || filenames_nomatch_only)? stdin_name : NULL);
|
| 1144 |
|
| 1145 |
/* Otherwise, work through the remaining arguments as files or directories.
|
| 1146 |
Pass in the fact that there is only one argument at top level - this suppresses
|
| 1147 |
the file name if the argument is not a directory and filenames_only is not set.
|
| 1148 |
*/
|
| 1149 |
|
| 1150 |
only_one_at_top = (i == argc - 1);
|
| 1151 |
|
| 1152 |
for (; i < argc; i++)
|
| 1153 |
{
|
| 1154 |
int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);
|
| 1155 |
if (frc > 1) rc = frc;
|
| 1156 |
else if (frc == 0 && rc == 1) rc = 0;
|
| 1157 |
}
|
| 1158 |
|
| 1159 |
return rc;
|
| 1160 |
}
|
| 1161 |
|
| 1162 |
/* End of pcregrep */
|