/[pcre]/code/tags/pcre-8.01/pcregrep.c
ViewVC logotype

Diff of /code/tags/pcre-8.01/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 121 by ph10, Mon Mar 12 12:12:47 2007 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #  include <config.h>
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45  #include <locale.h>  #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
# Line 48  POSSIBILITY OF SUCH DAMAGE. Line 52  POSSIBILITY OF SUCH DAMAGE.
52  #include <sys/stat.h>  #include <sys/stat.h>
53  #include <unistd.h>  #include <unistd.h>
54    
 #include "config.h"  
55  #include "pcre.h"  #include "pcre.h"
56    
57  #define FALSE 0  #define FALSE 0
# Line 56  POSSIBILITY OF SUCH DAMAGE. Line 59  POSSIBILITY OF SUCH DAMAGE.
59    
60  typedef int BOOL;  typedef int BOOL;
61    
 #define VERSION "4.2 09-Jan-2006"  
62  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
63    
64  #if BUFSIZ > 8192  #if BUFSIZ > 8192
# Line 65  typedef int BOOL; Line 67  typedef int BOOL;
67  #define MBUFTHIRD 8192  #define MBUFTHIRD 8192
68  #endif  #endif
69    
   
70  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
71  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
72  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
# Line 83  enum { DEE_READ, DEE_SKIP }; Line 84  enum { DEE_READ, DEE_SKIP };
84  #define PO_LINE_MATCH     0x0002  #define PO_LINE_MATCH     0x0002
85  #define PO_FIXED_STRINGS  0x0004  #define PO_FIXED_STRINGS  0x0004
86    
87    /* Line ending types */
88    
89    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY };
90    
91    
92    
93  /*************************************************  /*************************************************
# Line 94  regular code. */ Line 99  regular code. */
99    
100  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
101  static int S_arg = -1;  static int S_arg = -1;
102    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
103    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
104    static const char *jfriedl_prefix = "";
105    static const char *jfriedl_postfix = "";
106  #endif  #endif
107    
108    static int  endlinetype;
109    
110  static char *colour_string = (char *)"1;31";  static char *colour_string = (char *)"1;31";
111  static char *colour_option = NULL;  static char *colour_option = NULL;
112  static char *dee_option = NULL;  static char *dee_option = NULL;
113  static char *DEE_option = NULL;  static char *DEE_option = NULL;
114    static char *newline = NULL;
115  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
116  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
117  static char *locale = NULL;  static char *locale = NULL;
# Line 107  static char *locale = NULL; Line 119  static char *locale = NULL;
119  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
120    
121  static int  pattern_count = 0;  static int  pattern_count = 0;
122  static pcre **pattern_list;  static pcre **pattern_list = NULL;
123  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
124    
125  static char *include_pattern = NULL;  static char *include_pattern = NULL;
126  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
# Line 134  static BOOL number = FALSE; Line 146  static BOOL number = FALSE;
146  static BOOL only_matching = FALSE;  static BOOL only_matching = FALSE;
147  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
148  static BOOL silent = FALSE;  static BOOL silent = FALSE;
149    static BOOL utf8 = FALSE;
150    
151  /* Structure for options and list of them */  /* Structure for options and list of them */
152    
# Line 181  static option_item optionlist[] = { Line 194  static option_item optionlist[] = {
194    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
195    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
196    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
197      { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LR, CRLF)" },
198    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
199    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
200    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
# Line 210  static const char *prefix[] = { Line 224  static const char *prefix[] = {
224  static const char *suffix[] = {  static const char *suffix[] = {
225    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
226    
227    /* UTF-8 tables - used only when the newline setting is "all". */
228    
229    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
230    
231    const char utf8_table4[] = {
232      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
233      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
234      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
235      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
236    
237    
238    
239  /*************************************************  /*************************************************
# Line 222  although at present the only ones are fo Line 246  although at present the only ones are fo
246    
247  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
248    
249  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
250  #include <sys/types.h>  #include <sys/types.h>
251  #include <sys/stat.h>  #include <sys/stat.h>
252  #include <dirent.h>  #include <dirent.h>
# Line 292  Lionel Fourquaux. David Burgess added a Line 316  Lionel Fourquaux. David Burgess added a
316  when it did not exist. */  when it did not exist. */
317    
318    
319  #elif HAVE_WIN32API  #elif HAVE_WINDOWS_H
320    
321  #ifndef STRICT  #ifndef STRICT
322  # define STRICT  # define STRICT
# Line 414  FALSE; Line 438  FALSE;
438  typedef void directory_type;  typedef void directory_type;
439    
440  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
441  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
442  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
443  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
444    
445    
# Line 462  return sys_errlist[n]; Line 486  return sys_errlist[n];
486    
487    
488  /*************************************************  /*************************************************
489    *             Find end of line                   *
490    *************************************************/
491    
492    /* The length of the endline sequence that is found is set via lenptr. This may
493    be zero at the very end of the file if there is no line-ending sequence there.
494    
495    Arguments:
496      p         current position in line
497      endptr    end of available data
498      lenptr    where to put the length of the eol sequence
499    
500    Returns:    pointer to the last byte of the line
501    */
502    
503    static char *
504    end_of_line(char *p, char *endptr, int *lenptr)
505    {
506    switch(endlinetype)
507      {
508      default:      /* Just in case */
509      case EL_LF:
510      while (p < endptr && *p != '\n') p++;
511      if (p < endptr)
512        {
513        *lenptr = 1;
514        return p + 1;
515        }
516      *lenptr = 0;
517      return endptr;
518    
519      case EL_CR:
520      while (p < endptr && *p != '\r') p++;
521      if (p < endptr)
522        {
523        *lenptr = 1;
524        return p + 1;
525        }
526      *lenptr = 0;
527      return endptr;
528    
529      case EL_CRLF:
530      for (;;)
531        {
532        while (p < endptr && *p != '\r') p++;
533        if (++p >= endptr)
534          {
535          *lenptr = 0;
536          return endptr;
537          }
538        if (*p == '\n')
539          {
540          *lenptr = 2;
541          return p + 1;
542          }
543        }
544      break;
545    
546      case EL_ANY:
547      while (p < endptr)
548        {
549        int extra = 0;
550        register int c = *((unsigned char *)p);
551    
552        if (utf8 && c >= 0xc0)
553          {
554          int gcii, gcss;
555          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
556          gcss = 6*extra;
557          c = (c & utf8_table3[extra]) << gcss;
558          for (gcii = 1; gcii <= extra; gcii++)
559            {
560            gcss -= 6;
561            c |= (p[gcii] & 0x3f) << gcss;
562            }
563          }
564    
565        p += 1 + extra;
566    
567        switch (c)
568          {
569          case 0x0a:    /* LF */
570          case 0x0b:    /* VT */
571          case 0x0c:    /* FF */
572          *lenptr = 1;
573          return p;
574    
575          case 0x0d:    /* CR */
576          if (p < endptr && *p == 0x0a)
577            {
578            *lenptr = 2;
579            p++;
580            }
581          else *lenptr = 1;
582          return p;
583    
584          case 0x85:    /* NEL */
585          *lenptr = utf8? 2 : 1;
586          return p;
587    
588          case 0x2028:  /* LS */
589          case 0x2029:  /* PS */
590          *lenptr = 3;
591          return p;
592    
593          default:
594          break;
595          }
596        }   /* End of loop for ANY case */
597    
598      *lenptr = 0;  /* Must have hit the end */
599      return endptr;
600      }     /* End of overall switch */
601    }
602    
603    
604    
605    /*************************************************
606    *         Find start of previous line            *
607    *************************************************/
608    
609    /* This is called when looking back for before lines to print.
610    
611    Arguments:
612      p         start of the subsequent line
613      startptr  start of available data
614    
615    Returns:    pointer to the start of the previous line
616    */
617    
618    static char *
619    previous_line(char *p, char *startptr)
620    {
621    switch(endlinetype)
622      {
623      default:      /* Just in case */
624      case EL_LF:
625      p--;
626      while (p > startptr && p[-1] != '\n') p--;
627      return p;
628    
629      case EL_CR:
630      p--;
631      while (p > startptr && p[-1] != '\n') p--;
632      return p;
633    
634      case EL_CRLF:
635      for (;;)
636        {
637        p -= 2;
638        while (p > startptr && p[-1] != '\n') p--;
639        if (p <= startptr + 1 || p[-2] == '\r') return p;
640        }
641      return p;   /* But control should never get here */
642    
643      case EL_ANY:
644      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
645      if (utf8) while ((*p & 0xc0) == 0x80) p--;
646    
647      while (p > startptr)
648        {
649        register int c;
650        char *pp = p - 1;
651    
652        if (utf8)
653          {
654          int extra = 0;
655          while ((*pp & 0xc0) == 0x80) pp--;
656          c = *((unsigned char *)pp);
657          if (c >= 0xc0)
658            {
659            int gcii, gcss;
660            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
661            gcss = 6*extra;
662            c = (c & utf8_table3[extra]) << gcss;
663            for (gcii = 1; gcii <= extra; gcii++)
664              {
665              gcss -= 6;
666              c |= (pp[gcii] & 0x3f) << gcss;
667              }
668            }
669          }
670        else c = *((unsigned char *)pp);
671    
672        switch (c)
673          {
674          case 0x0a:    /* LF */
675          case 0x0b:    /* VT */
676          case 0x0c:    /* FF */
677          case 0x0d:    /* CR */
678          case 0x85:    /* NEL */
679          case 0x2028:  /* LS */
680          case 0x2029:  /* PS */
681          return p;
682    
683          default:
684          break;
685          }
686    
687        p = pp;  /* Back one character */
688        }        /* End of loop for ANY case */
689    
690      return startptr;  /* Hit start of data */
691      }     /* End of overall switch */
692    }
693    
694    
695    
696    
697    
698    /*************************************************
699  *       Print the previous "after" lines         *  *       Print the previous "after" lines         *
700  *************************************************/  *************************************************/
701    
# Line 486  if (after_context > 0 && lastmatchnumber Line 720  if (after_context > 0 && lastmatchnumber
720    int count = 0;    int count = 0;
721    while (lastmatchrestart < endptr && count++ < after_context)    while (lastmatchrestart < endptr && count++ < after_context)
722      {      {
723        int ellength;
724      char *pp = lastmatchrestart;      char *pp = lastmatchrestart;
725      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
726      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
727      while (*pp != '\n') pp++;      pp = end_of_line(pp, endptr, &ellength);
728      fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
729      lastmatchrestart = pp + 1;      lastmatchrestart = pp;
730      }      }
731    hyphenpending = TRUE;    hyphenpending = TRUE;
732    }    }
# Line 548  way, the buffer is shifted left and re-f Line 783  way, the buffer is shifted left and re-f
783    
784  while (ptr < endptr)  while (ptr < endptr)
785    {    {
786    int i;    int i, endlinelength;
787    int mrc = 0;    int mrc = 0;
788    BOOL match = FALSE;    BOOL match = FALSE;
789    char *t = ptr;    char *t = ptr;
# Line 561  while (ptr < endptr) Line 796  while (ptr < endptr)
796    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
797    that any match is constrained to be in the first line. */    that any match is constrained to be in the first line. */
798    
799    linelength = 0;    t = end_of_line(t, endptr, &endlinelength);
800    while (t < endptr && *t++ != '\n') linelength++;    linelength = t - ptr - endlinelength;
801    length = multiline? endptr - ptr : linelength;    length = multiline? endptr - ptr : linelength;
802    
803      /* Extra processing for Jeffrey Friedl's debugging. */
804    
805    #ifdef JFRIEDL_DEBUG
806      if (jfriedl_XT || jfriedl_XR)
807      {
808          #include <sys/time.h>
809          #include <time.h>
810          struct timeval start_time, end_time;
811          struct timezone dummy;
812    
813          if (jfriedl_XT)
814          {
815              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
816              const char *orig = ptr;
817              ptr = malloc(newlen + 1);
818              if (!ptr) {
819                      printf("out of memory");
820                      exit(2);
821              }
822              endptr = ptr;
823              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
824              for (i = 0; i < jfriedl_XT; i++) {
825                      strncpy(endptr, orig,  length);
826                      endptr += length;
827              }
828              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
829              length = newlen;
830          }
831    
832          if (gettimeofday(&start_time, &dummy) != 0)
833                  perror("bad gettimeofday");
834    
835    
836          for (i = 0; i < jfriedl_XR; i++)
837              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
838    
839          if (gettimeofday(&end_time, &dummy) != 0)
840                  perror("bad gettimeofday");
841    
842          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
843                          -
844                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
845    
846          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
847          return 0;
848      }
849    #endif
850    
851    
852    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches. Note that we don't include
853    the final newline in the subject string. */    the final newline in the subject string. */
854    
# Line 646  while (ptr < endptr) Line 930  while (ptr < endptr)
930    
931        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
932          {          {
933            int ellength;
934          int linecount = 0;          int linecount = 0;
935          char *p = lastmatchrestart;          char *p = lastmatchrestart;
936    
937          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
938            {            {
939            while (*p != '\n') p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
940            linecount++;            linecount++;
941            }            }
942    
# Line 665  while (ptr < endptr) Line 949  while (ptr < endptr)
949            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
950            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
951            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
952            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
953            fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
954            lastmatchrestart = pp + 1;            lastmatchrestart = pp;
955            }            }
956          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
957          }          }
# Line 693  while (ptr < endptr) Line 977  while (ptr < endptr)
977                 linecount < before_context)                 linecount < before_context)
978            {            {
979            linecount++;            linecount++;
980            p--;            p = previous_line(p, buffer);
           while (p > buffer && p[-1] != '\n') p--;  
981            }            }
982    
983          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 702  while (ptr < endptr) Line 985  while (ptr < endptr)
985    
986          while (p < ptr)          while (p < ptr)
987            {            {
988              int ellength;
989            char *pp = p;            char *pp = p;
990            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
991            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
992            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
993            fwrite(p, 1, pp - p + 1, stdout);   /* In case binary zero */            fwrite(p, 1, pp - p, stdout);
994            p = pp + 1;            p = pp;
995            }            }
996          }          }
997    
# Line 723  while (ptr < endptr) Line 1007  while (ptr < endptr)
1007        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1008        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1009        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately. Because the PCRE_FIRSTLINE option is set, the
1010        start of the match will always be before the first \n character. */        start of the match will always be before the first newline sequence. */
1011    
1012        if (multiline)        if (multiline)
1013          {          {
1014            int ellength;
1015          char *endmatch = ptr + offsets[1];          char *endmatch = ptr + offsets[1];
1016          t = ptr;          t = ptr;
1017          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          while (t < endmatch)
1018          while (endmatch < endptr && *endmatch != '\n') endmatch++;            {
1019          linelength = endmatch - ptr;            t = end_of_line(t, endptr, &ellength);
1020              if (t <= endmatch) linenumber++; else break;
1021              }
1022            endmatch = end_of_line(endmatch, endptr, &ellength);
1023            linelength = endmatch - ptr - ellength;
1024          }          }
1025    
1026        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 763  while (ptr < endptr) Line 1052  while (ptr < endptr)
1052          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1053          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
1054          }          }
1055        else fwrite(ptr, 1, linelength, stdout);        else fwrite(ptr, 1, linelength + endlinelength, stdout);
   
       fprintf(stdout, "\n");  
1056        }        }
1057    
1058      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match */
# Line 775  while (ptr < endptr) Line 1062  while (ptr < endptr)
1062      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1063      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1064    
1065      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1066      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1067      }      }
1068    
1069    /* Advance to after the newline and increment the line number. */    /* Advance to after the newline and increment the line number. */
1070    
1071    ptr += linelength + 1;    ptr += linelength + endlinelength;
1072    linenumber++;    linenumber++;
1073    
1074    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
# Line 1037  switch(letter) Line 1324  switch(letter)
1324    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1325    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
1326    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1327    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1328    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1329    case 'w': process_options |= PO_WORD_MATCH; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1330    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1331    
1332    case 'V':    case 'V':
1333    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
1334    exit(0);    exit(0);
1335    break;    break;
1336    
# Line 1152  return FALSE; Line 1438  return FALSE;
1438  *************************************************/  *************************************************/
1439    
1440  /* When the -F option has been used, each string may be a list of strings,  /* When the -F option has been used, each string may be a list of strings,
1441  separated by newlines. They will be matched literally.  separated by line breaks. They will be matched literally.
1442    
1443  Arguments:  Arguments:
1444    pattern        the pattern string    pattern        the pattern string
# Line 1170  compile_pattern(char *pattern, int optio Line 1456  compile_pattern(char *pattern, int optio
1456  {  {
1457  if ((process_options & PO_FIXED_STRINGS) != 0)  if ((process_options & PO_FIXED_STRINGS) != 0)
1458    {    {
1459      char *eop = pattern + strlen(pattern);
1460    char buffer[MBUFTHIRD];    char buffer[MBUFTHIRD];
1461    for(;;)    for(;;)
1462      {      {
1463      char *p = strchr(pattern, '\n');      int ellength;
1464      if (p == NULL)      char *p = end_of_line(pattern, eop, &ellength);
1465        if (ellength == 0)
1466        return compile_single_pattern(pattern, options, filename, count);        return compile_single_pattern(pattern, options, filename, count);
1467      sprintf(buffer, "%.*s", p - pattern, pattern);      sprintf(buffer, "%.*s", p - pattern - ellength, pattern);
1468      pattern = p + 1;      pattern = p;
1469      if (!compile_single_pattern(buffer, options, filename, count))      if (!compile_single_pattern(buffer, options, filename, count))
1470        return FALSE;        return FALSE;
1471      }      }
# Line 1206  char *patterns[MAX_PATTERN_COUNT]; Line 1494  char *patterns[MAX_PATTERN_COUNT];
1494  const char *locale_from = "--locale";  const char *locale_from = "--locale";
1495  const char *error;  const char *error;
1496    
1497    /* Set the default line ending value from the default in the PCRE library;
1498    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1499    */
1500    
1501    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1502    switch(i)
1503      {
1504      default:                 newline = (char *)"lf"; break;
1505      case '\r':               newline = (char *)"cr"; break;
1506      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1507      case -1:                 newline = (char *)"any"; break;
1508      }
1509    
1510  /* Process the options */  /* Process the options */
1511    
1512  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
# Line 1294  for (i = 1; i < argc; i++) Line 1595  for (i = 1; i < argc; i++)
1595        }        }
1596      }      }
1597    
1598    
1599      /* Jeffrey Friedl's debugging harness uses these additional options which
1600      are not in the right form for putting in the option table because they use
1601      only one hyphen, yet are more than one character long. By putting them
1602      separately here, they will not get displayed as part of the help() output,
1603      but I don't think Jeffrey will care about that. */
1604    
1605    #ifdef JFRIEDL_DEBUG
1606      else if (strcmp(argv[i], "-pre") == 0) {
1607              jfriedl_prefix = argv[++i];
1608              continue;
1609      } else if (strcmp(argv[i], "-post") == 0) {
1610              jfriedl_postfix = argv[++i];
1611              continue;
1612      } else if (strcmp(argv[i], "-XT") == 0) {
1613              sscanf(argv[++i], "%d", &jfriedl_XT);
1614              continue;
1615      } else if (strcmp(argv[i], "-XR") == 0) {
1616              sscanf(argv[++i], "%d", &jfriedl_XR);
1617              continue;
1618      }
1619    #endif
1620    
1621    
1622    /* One-char options; many that have no data may be in a single argument; we    /* One-char options; many that have no data may be in a single argument; we
1623    continue till we hit the last one or one that needs data. */    continue till we hit the last one or one that needs data. */
1624    
# Line 1333  for (i = 1; i < argc; i++) Line 1658  for (i = 1; i < argc; i++)
1658    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1659    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
1660    separate item. At the moment, the only such options are "colo(u)r" and    separate item. At the moment, the only such options are "colo(u)r" and
1661    Jeffrey Friedl's special debugging option. */    Jeffrey Friedl's special -S debugging option. */
1662    
1663    if (*option_data == 0 &&    if (*option_data == 0 &&
1664        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
# Line 1465  if (colour_option != NULL && strcmp(colo Line 1790  if (colour_option != NULL && strcmp(colo
1790      }      }
1791    }    }
1792    
1793    /* Interpret the newline type; the default settings are Unix-like. */
1794    
1795    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1796      {
1797      pcre_options |= PCRE_NEWLINE_CR;
1798      endlinetype = EL_CR;
1799      }
1800    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1801      {
1802      pcre_options |= PCRE_NEWLINE_LF;
1803      endlinetype = EL_LF;
1804      }
1805    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1806      {
1807      pcre_options |= PCRE_NEWLINE_CRLF;
1808      endlinetype = EL_CRLF;
1809      }
1810    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1811      {
1812      pcre_options |= PCRE_NEWLINE_ANY;
1813      endlinetype = EL_ANY;
1814      }
1815    else
1816      {
1817      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1818      return 2;
1819      }
1820    
1821  /* Interpret the text values for -d and -D */  /* Interpret the text values for -d and -D */
1822    
1823  if (dee_option != NULL)  if (dee_option != NULL)
# Line 1490  if (DEE_option != NULL) Line 1843  if (DEE_option != NULL)
1843      }      }
1844    }    }
1845    
1846  /* Check the value for Jeff Friedl's debugging option. */  /* Check the values for Jeffrey Friedl's debugging options. */
1847    
1848  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
1849  if (S_arg > 9)  if (S_arg > 9)
# Line 1498  if (S_arg > 9) Line 1851  if (S_arg > 9)
1851    fprintf(stderr, "pcregrep: bad value for -S option\n");    fprintf(stderr, "pcregrep: bad value for -S option\n");
1852    return 2;    return 2;
1853    }    }
1854    if (jfriedl_XT != 0 || jfriedl_XR != 0)
1855      {
1856      if (jfriedl_XT == 0) jfriedl_XT = 1;
1857      if (jfriedl_XR == 0) jfriedl_XR = 1;
1858      }
1859  #endif  #endif
1860    
1861  /* Get memory to store the pattern and hints lists. */  /* Get memory to store the pattern and hints lists. */
# Line 1508  hints_list = (pcre_extra **)malloc(MAX_P Line 1866  hints_list = (pcre_extra **)malloc(MAX_P
1866  if (pattern_list == NULL || hints_list == NULL)  if (pattern_list == NULL || hints_list == NULL)
1867    {    {
1868    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
1869    return 2;    goto EXIT2;
1870    }    }
1871    
1872  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there is no file provided by -f,
# Line 1527  for (j = 0; j < cmd_pattern_count; j++) Line 1885  for (j = 0; j < cmd_pattern_count; j++)
1885    {    {
1886    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(patterns[j], pcre_options, NULL,
1887         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1888      return 2;      goto EXIT2;
1889    }    }
1890    
1891  /* Compile the regular expressions that are provided in a file. */  /* Compile the regular expressions that are provided in a file. */
# Line 1551  if (pattern_filename != NULL) Line 1909  if (pattern_filename != NULL)
1909        {        {
1910        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1911          strerror(errno));          strerror(errno));
1912        return 2;        goto EXIT2;
1913        }        }
1914      filename = pattern_filename;      filename = pattern_filename;
1915      }      }
# Line 1564  if (pattern_filename != NULL) Line 1922  if (pattern_filename != NULL)
1922      linenumber++;      linenumber++;
1923      if (buffer[0] == 0) continue;   /* Skip blank lines */      if (buffer[0] == 0) continue;   /* Skip blank lines */
1924      if (!compile_pattern(buffer, pcre_options, filename, linenumber))      if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1925        return 2;        goto EXIT2;
1926      }      }
1927    
1928    if (f != stdin) fclose(f);    if (f != stdin) fclose(f);
# Line 1580  for (j = 0; j < pattern_count; j++) Line 1938  for (j = 0; j < pattern_count; j++)
1938      char s[16];      char s[16];
1939      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
1940      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
1941      return 2;      goto EXIT2;
1942      }      }
1943    }    }
1944    
# Line 1594  if (exclude_pattern != NULL) Line 1952  if (exclude_pattern != NULL)
1952      {      {
1953      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1954        errptr, error);        errptr, error);
1955      return 2;      goto EXIT2;
1956      }      }
1957    }    }
1958    
# Line 1606  if (include_pattern != NULL) Line 1964  if (include_pattern != NULL)
1964      {      {
1965      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1966        errptr, error);        errptr, error);
1967      return 2;      goto EXIT2;
1968      }      }
1969    }    }
1970    
1971  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */
1972    
1973  if (i >= argc)  if (i >= argc)
1974    return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    {
1975      rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
1976      goto EXIT;
1977      }
1978    
1979  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
1980  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
# Line 1630  for (; i < argc; i++) Line 1991  for (; i < argc; i++)
1991      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
1992    }    }
1993    
1994    EXIT:
1995    if (pattern_list != NULL)
1996      {
1997      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
1998      free(pattern_list);
1999      }
2000    if (hints_list != NULL)
2001      {
2002      for (i = 0; i < pattern_count; i++) free(hints_list[i]);
2003      free(hints_list);
2004      }
2005  return rc;  return rc;
2006    
2007    EXIT2:
2008    rc = 2;
2009    goto EXIT;
2010  }  }
2011    
2012  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.87  
changed lines
  Added in v.121

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12