/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 285 by ph10, Fri Dec 7 19:59:19 2007 UTC revision 286 by ph10, Mon Dec 17 14:46:11 2007 UTC
# Line 55  POSSIBILITY OF SUCH DAMAGE. Line 55  POSSIBILITY OF SUCH DAMAGE.
55  #include <unistd.h>  #include <unistd.h>
56  #endif  #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 76  all values greater than FN_DEFAULT. */ Line 84  all values greater than FN_DEFAULT. */
84    
85  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
86    
87    /* File reading styles */
88    
89    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
90    
91  /* Actions for the -d and -D options */  /* Actions for the -d and -D options */
92    
93  enum { dee_READ, dee_SKIP, dee_RECURSE };  enum { dee_READ, dee_SKIP, dee_RECURSE };
# Line 322  return isatty(fileno(stdout)); Line 334  return isatty(fileno(stdout));
334    
335  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
336  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
337  when it did not exist. David Byron added a patch that moved the #include of  when it did not exist. David Byron added a patch that moved the #include of
338  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
339  */  */
340    
341  #elif HAVE_WINDOWS_H  #elif HAVE_WINDOWS_H
# Line 812  be in the middle third most of the time, Line 824  be in the middle third most of the time,
824  "before" context printing.  "before" context printing.
825    
826  Arguments:  Arguments:
827    in           the fopened FILE stream    handle       the fopened FILE stream for a normal file
828                   the gzFile pointer when reading is via libz
829                   the BZFILE pointer when reading is via libbz2
830      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
831    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
832                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
833                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
834    
835  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
836                 1 otherwise (no matches)                 1 otherwise (no matches)
837                   2 if there is a read error on a .bz2 file
838  */  */
839    
840  static int  static int
841  pcregrep(FILE *in, char *printname)  pcregrep(void *handle, int frtype, char *printname)
842  {  {
843  int rc = 1;  int rc = 1;
844  int linenumber = 1;  int linenumber = 1;
# Line 836  char *ptr = buffer; Line 852  char *ptr = buffer;
852  char *endptr;  char *endptr;
853  size_t bufflength;  size_t bufflength;
854  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
855    FILE *in = NULL;                    /* Ensure initialized */
856    
857    #ifdef SUPPORT_LIBZ
858    gzFile ingz = NULL;
859    #endif
860    
861    #ifdef SUPPORT_LIBBZ2
862    BZFILE *inbz2 = NULL;
863    #endif
864    
865    
866    /* Do the first read into the start of the buffer and set up the pointer to end
867    of what we have. In the case of libz, a non-zipped .gz file will be read as a
868    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
869    fail. */
870    
871  /* Do the first read into the start of the buffer and set up the pointer to  #ifdef SUPPORT_LIBZ
872  end of what we have. */  if (frtype == FR_LIBZ)
873      {
874      ingz = (gzFile)handle;
875      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
876      }
877    else
878    #endif
879    
880    #ifdef SUPPORT_LIBBZ2
881    if (frtype == FR_LIBBZ2)
882      {
883      inbz2 = (BZFILE *)handle;
884      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
885      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
886      }                                    /* without the cast it is unsigned. */
887    else
888    #endif
889    
890      {
891      in = (FILE *)handle;
892      bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
893      }
894    
 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);  
895  endptr = buffer + bufflength;  endptr = buffer + bufflength;
896    
897  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
# Line 853  while (ptr < endptr) Line 904  while (ptr < endptr)
904    int i, endlinelength;    int i, endlinelength;
905    int mrc = 0;    int mrc = 0;
906    BOOL match = FALSE;    BOOL match = FALSE;
907    char *matchptr = ptr;    char *matchptr = ptr;
908    char *t = ptr;    char *t = ptr;
909    size_t length, linelength;    size_t length, linelength;
910    
# Line 916  while (ptr < endptr) Line 967  while (ptr < endptr)
967    }    }
968  #endif  #endif
969    
970    /* We come back here after a match when the -o option (only_matching) is set,    /* We come back here after a match when the -o option (only_matching) is set,
971    in order to find any further matches in the same line. */    in order to find any further matches in the same line. */
972    
973    ONLY_MATCHING_RESTART:    ONLY_MATCHING_RESTART:
974    
975    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches. Note that we don't include
976    the final newline in the subject string. */    the final newline in the subject string. */
# Line 981  while (ptr < endptr) Line 1032  while (ptr < endptr)
1032      else if (quiet) return 0;      else if (quiet) return 0;
1033    
1034      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched, and
1035      the --file-offsets and --line-offsets options output offsets for the      the --file-offsets and --line-offsets options output offsets for the
1036      matching substring (they both force --only-matching). None of these options      matching substring (they both force --only-matching). None of these options
1037      prints any context. Afterwards, adjust the start and length, and then jump      prints any context. Afterwards, adjust the start and length, and then jump
1038      back to look for further matches in the same line. If we are in invert      back to look for further matches in the same line. If we are in invert
# Line 991  while (ptr < endptr) Line 1042  while (ptr < endptr)
1042      else if (only_matching)      else if (only_matching)
1043        {        {
1044        if (!invert)        if (!invert)
1045          {          {
1046          if (printname != NULL) fprintf(stdout, "%s:", printname);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1047          if (number) fprintf(stdout, "%d:", linenumber);          if (number) fprintf(stdout, "%d:", linenumber);
1048          if (line_offsets)          if (line_offsets)
1049            fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,            fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,
1050              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1051          else if (file_offsets)          else if (file_offsets)
1052            fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,            fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,
1053              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1054          else          else
1055            fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);            fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1056          fprintf(stdout, "\n");          fprintf(stdout, "\n");
1057          matchptr += offsets[1];          matchptr += offsets[1];
1058          length -= offsets[1];          length -= offsets[1];
1059          match = FALSE;          match = FALSE;
1060          goto ONLY_MATCHING_RESTART;          goto ONLY_MATCHING_RESTART;
1061          }          }
1062        }        }
1063    
1064      /* This is the default case when none of the above options is set. We print      /* This is the default case when none of the above options is set. We print
# Line 1181  while (ptr < endptr) Line 1232  while (ptr < endptr)
1232      linelength = endmatch - ptr - ellength;      linelength = endmatch - ptr - ellength;
1233      }      }
1234    
1235    /* Advance to after the newline and increment the line number. The file    /* Advance to after the newline and increment the line number. The file
1236    offset to the current line is maintained in filepos. */    offset to the current line is maintained in filepos. */
1237    
1238    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
# Line 1207  while (ptr < endptr) Line 1258  while (ptr < endptr)
1258    
1259      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1260      ptr -= MBUFTHIRD;      ptr -= MBUFTHIRD;
1261    
1262    #ifdef SUPPORT_LIBZ
1263        if (frtype == FR_LIBZ)
1264          bufflength = 2*MBUFTHIRD +
1265            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1266        else
1267    #endif
1268    
1269    #ifdef SUPPORT_LIBBZ2
1270        if (frtype == FR_LIBBZ2)
1271          bufflength = 2*MBUFTHIRD +
1272            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1273        else
1274    #endif
1275    
1276      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1277    
1278      endptr = buffer + bufflength;      endptr = buffer + bufflength;
1279    
1280      /* Adjust any last match point */      /* Adjust any last match point */
# Line 1271  grep_or_recurse(char *pathname, BOOL dir Line 1338  grep_or_recurse(char *pathname, BOOL dir
1338  {  {
1339  int rc = 1;  int rc = 1;
1340  int sep;  int sep;
1341  FILE *in;  int frtype;
1342    int pathlen;
1343    void *handle;
1344    FILE *in = NULL;           /* Ensure initialized */
1345    
1346    #ifdef SUPPORT_LIBZ
1347    gzFile ingz = NULL;
1348    #endif
1349    
1350    #ifdef SUPPORT_LIBBZ2
1351    BZFILE *inbz2 = NULL;
1352    #endif
1353    
1354  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1355    
1356  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1357    {    {
1358    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN,
1359      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1360        stdin_name : NULL);        stdin_name : NULL);
1361    }    }
1362    
   
1363  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* If the file is a directory, skip if skipping or if we are recursing, scan
1364  each file within it, subject to any include or exclude patterns that were set.  each file within it, subject to any include or exclude patterns that were set.
1365  The scanning code is localized so it can be made system-specific. */  The scanning code is localized so it can be made system-specific. */
# Line 1339  skipping was not requested. The scan pro Line 1416  skipping was not requested. The scan pro
1416  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1417  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1418    
1419  in = fopen(pathname, "r");  pathlen = strlen(pathname);
1420  if (in == NULL)  
1421    /* Open using zlib if it is supported and the file name ends with .gz. */
1422    
1423    #ifdef SUPPORT_LIBZ
1424    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1425      {
1426      ingz = gzopen(pathname, "rb");
1427      if (ingz == NULL)
1428        {
1429        if (!silent)
1430          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1431            strerror(errno));
1432        return 2;
1433        }
1434      handle = (void *)ingz;
1435      frtype = FR_LIBZ;
1436      }
1437    else
1438    #endif
1439    
1440    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1441    
1442    #ifdef SUPPORT_LIBBZ2
1443    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1444      {
1445      inbz2 = BZ2_bzopen(pathname, "rb");
1446      handle = (void *)inbz2;
1447      frtype = FR_LIBBZ2;
1448      }
1449    else
1450    #endif
1451    
1452    /* Otherwise use plain fopen(). The label is so that we can come back here if
1453    an attempt to read a .bz2 file indicates that it really is a plain file. */
1454    
1455    #ifdef SUPPORT_LIBBZ2
1456    PLAIN_FILE:
1457    #endif
1458      {
1459      in = fopen(pathname, "r");
1460      handle = (void *)in;
1461      frtype = FR_PLAIN;
1462      }
1463    
1464    /* All the opening methods return errno when they fail. */
1465    
1466    if (handle == NULL)
1467    {    {
1468    if (!silent)    if (!silent)
1469      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
# Line 1348  if (in == NULL) Line 1471  if (in == NULL)
1471    return 2;    return 2;
1472    }    }
1473    
1474  rc = pcregrep(in, (filenames > FN_DEFAULT ||  /* Now grep the file */
1475    
1476    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1477    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1478    
1479    /* Close in an appropriate manner. */
1480    
1481    #ifdef SUPPORT_LIBZ
1482    if (frtype == FR_LIBZ)
1483      gzclose(ingz);
1484    else
1485    #endif
1486    
1487    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1488    read failed. If the error indicates that the file isn't in fact bzipped, try
1489    again as a normal file. */
1490    
1491    #ifdef SUPPORT_LIBBZ2
1492    if (frtype == FR_LIBBZ2)
1493      {
1494      if (rc == 2)
1495        {
1496        int errnum;
1497        const char *err = BZ2_bzerror(inbz2, &errnum);
1498        if (errnum == BZ_DATA_ERROR_MAGIC)
1499          {
1500          BZ2_bzclose(inbz2);
1501          goto PLAIN_FILE;
1502          }
1503        else if (!silent)
1504          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1505            pathname, err);
1506        }
1507      BZ2_bzclose(inbz2);
1508      }
1509    else
1510    #endif
1511    
1512    /* Normal file close */
1513    
1514  fclose(in);  fclose(in);
1515    
1516    /* Pass back the yield from pcregrep(). */
1517    
1518  return rc;  return rc;
1519  }  }
1520    
# Line 1392  option_item *op; Line 1555  option_item *op;
1555  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1556  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1557  printf("PATTERN must be present if neither -e nor -f is used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1558  printf("\"-\" can be used as a file name to mean STDIN.\n\n");  printf("\"-\" can be used as a file name to mean STDIN.\n");
1559  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
1560    #ifdef SUPPORT_LIBZ
1561    printf("Files whose names end in .gz are read using zlib.\n");
1562    #endif
1563    
1564    #ifdef SUPPORT_LIBBZ2
1565    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1566    #endif
1567    
1568    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1569    printf("Other files and the standard input are read as plain files.\n\n");
1570    #else
1571    printf("All files are read as plain files, without any interpretation.\n\n");
1572    #endif
1573    
1574    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1575  printf("Options:\n");  printf("Options:\n");
1576    
1577  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
# Line 1428  handle_option(int letter, int options) Line 1605  handle_option(int letter, int options)
1605  {  {
1606  switch(letter)  switch(letter)
1607    {    {
1608    case N_FOFFSETS: file_offsets = TRUE; break;    case N_FOFFSETS: file_offsets = TRUE; break;
1609    case N_HELP: help(); exit(0);    case N_HELP: help(); exit(0);
1610    case N_LOFFSETS: line_offsets = number = TRUE; break;    case N_LOFFSETS: line_offsets = number = TRUE; break;
1611    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1612    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1613    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
# Line 1866  if (both_context > 0) Line 2043  if (both_context > 0)
2043    if (after_context == 0) after_context = both_context;    if (after_context == 0) after_context = both_context;
2044    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2045    }    }
2046    
2047  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2048  However, the latter two set the only_matching flag. */  However, the latter two set the only_matching flag. */
2049    
2050  if ((only_matching && (file_offsets || line_offsets)) ||  if ((only_matching && (file_offsets || line_offsets)) ||
2051      (file_offsets && line_offsets))      (file_offsets && line_offsets))
2052    {    {
2053    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2054      "and/or --line-offsets\n");      "and/or --line-offsets\n");
2055    exit(usage(2));    exit(usage(2));
2056    }    }
2057    
2058  if (file_offsets || line_offsets) only_matching = TRUE;  if (file_offsets || line_offsets) only_matching = TRUE;
2059    
2060  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2061  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
# Line 2117  if (include_pattern != NULL) Line 2294  if (include_pattern != NULL)
2294    
2295  if (i >= argc)  if (i >= argc)
2296    {    {
2297    rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2298    goto EXIT;    goto EXIT;
2299    }    }
2300    

Legend:
Removed from v.285  
changed lines
  Added in v.286

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12