/[pcre]/code/tags/pcre-7.7/pcregrep.c
ViewVC logotype

Diff of /code/tags/pcre-7.7/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 137 by ph10, Thu Mar 29 13:56:00 2007 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #  include <config.h>
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45  #include <locale.h>  #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53  #include <unistd.h>  #ifdef HAVE_UNISTD_H
54    #  include <unistd.h>
55    #endif
56    
57  #include "config.h"  #include <pcre.h>
 #include "pcre.h"  
58    
59  #define FALSE 0  #define FALSE 0
60  #define TRUE 1  #define TRUE 1
61    
62  typedef int BOOL;  typedef int BOOL;
63    
 #define VERSION "4.2 09-Jan-2006"  
64  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
65    
66  #if BUFSIZ > 8192  #if BUFSIZ > 8192
# Line 65  typedef int BOOL; Line 69  typedef int BOOL;
69  #define MBUFTHIRD 8192  #define MBUFTHIRD 8192
70  #endif  #endif
71    
   
72  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
73  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
74  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
# Line 83  enum { DEE_READ, DEE_SKIP }; Line 86  enum { DEE_READ, DEE_SKIP };
86  #define PO_LINE_MATCH     0x0002  #define PO_LINE_MATCH     0x0002
87  #define PO_FIXED_STRINGS  0x0004  #define PO_FIXED_STRINGS  0x0004
88    
89    /* Line ending types */
90    
91    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY };
92    
93    
94    
95  /*************************************************  /*************************************************
# Line 94  regular code. */ Line 101  regular code. */
101    
102  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
103  static int S_arg = -1;  static int S_arg = -1;
104    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
105    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
106    static const char *jfriedl_prefix = "";
107    static const char *jfriedl_postfix = "";
108  #endif  #endif
109    
110    static int  endlinetype;
111    
112  static char *colour_string = (char *)"1;31";  static char *colour_string = (char *)"1;31";
113  static char *colour_option = NULL;  static char *colour_option = NULL;
114  static char *dee_option = NULL;  static char *dee_option = NULL;
115  static char *DEE_option = NULL;  static char *DEE_option = NULL;
116    static char *newline = NULL;
117  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
118  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
119  static char *locale = NULL;  static char *locale = NULL;
# Line 107  static char *locale = NULL; Line 121  static char *locale = NULL;
121  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
122    
123  static int  pattern_count = 0;  static int  pattern_count = 0;
124  static pcre **pattern_list;  static pcre **pattern_list = NULL;
125  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
126    
127  static char *include_pattern = NULL;  static char *include_pattern = NULL;
128  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
# Line 134  static BOOL number = FALSE; Line 148  static BOOL number = FALSE;
148  static BOOL only_matching = FALSE;  static BOOL only_matching = FALSE;
149  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
150  static BOOL silent = FALSE;  static BOOL silent = FALSE;
151    static BOOL utf8 = FALSE;
152    
153  /* Structure for options and list of them */  /* Structure for options and list of them */
154    
# Line 181  static option_item optionlist[] = { Line 196  static option_item optionlist[] = {
196    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
197    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
198    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
199      { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LR, CRLF)" },
200    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
201    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
202    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
# Line 210  static const char *prefix[] = { Line 226  static const char *prefix[] = {
226  static const char *suffix[] = {  static const char *suffix[] = {
227    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
228    
229    /* UTF-8 tables - used only when the newline setting is "all". */
230    
231    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
232    
233    const char utf8_table4[] = {
234      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
235      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
236      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
237      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
238    
239    
240    
241  /*************************************************  /*************************************************
# Line 222  although at present the only ones are fo Line 248  although at present the only ones are fo
248    
249  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
250    
251  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
252  #include <sys/types.h>  #include <sys/types.h>
253  #include <sys/stat.h>  #include <sys/stat.h>
254  #include <dirent.h>  #include <dirent.h>
# Line 292  Lionel Fourquaux. David Burgess added a Line 318  Lionel Fourquaux. David Burgess added a
318  when it did not exist. */  when it did not exist. */
319    
320    
321  #elif HAVE_WIN32API  #elif HAVE_WINDOWS_H
322    
323  #ifndef STRICT  #ifndef STRICT
324  # define STRICT  # define STRICT
# Line 414  FALSE; Line 440  FALSE;
440  typedef void directory_type;  typedef void directory_type;
441    
442  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
443  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
444  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
445  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
446    
447    
# Line 439  return FALSE; Line 465  return FALSE;
465    
466    
467    
468  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
469  /*************************************************  /*************************************************
470  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
471  *************************************************/  *************************************************/
# Line 462  return sys_errlist[n]; Line 488  return sys_errlist[n];
488    
489    
490  /*************************************************  /*************************************************
491    *             Find end of line                   *
492    *************************************************/
493    
494    /* The length of the endline sequence that is found is set via lenptr. This may
495    be zero at the very end of the file if there is no line-ending sequence there.
496    
497    Arguments:
498      p         current position in line
499      endptr    end of available data
500      lenptr    where to put the length of the eol sequence
501    
502    Returns:    pointer to the last byte of the line
503    */
504    
505    static char *
506    end_of_line(char *p, char *endptr, int *lenptr)
507    {
508    switch(endlinetype)
509      {
510      default:      /* Just in case */
511      case EL_LF:
512      while (p < endptr && *p != '\n') p++;
513      if (p < endptr)
514        {
515        *lenptr = 1;
516        return p + 1;
517        }
518      *lenptr = 0;
519      return endptr;
520    
521      case EL_CR:
522      while (p < endptr && *p != '\r') p++;
523      if (p < endptr)
524        {
525        *lenptr = 1;
526        return p + 1;
527        }
528      *lenptr = 0;
529      return endptr;
530    
531      case EL_CRLF:
532      for (;;)
533        {
534        while (p < endptr && *p != '\r') p++;
535        if (++p >= endptr)
536          {
537          *lenptr = 0;
538          return endptr;
539          }
540        if (*p == '\n')
541          {
542          *lenptr = 2;
543          return p + 1;
544          }
545        }
546      break;
547    
548      case EL_ANY:
549      while (p < endptr)
550        {
551        int extra = 0;
552        register int c = *((unsigned char *)p);
553    
554        if (utf8 && c >= 0xc0)
555          {
556          int gcii, gcss;
557          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
558          gcss = 6*extra;
559          c = (c & utf8_table3[extra]) << gcss;
560          for (gcii = 1; gcii <= extra; gcii++)
561            {
562            gcss -= 6;
563            c |= (p[gcii] & 0x3f) << gcss;
564            }
565          }
566    
567        p += 1 + extra;
568    
569        switch (c)
570          {
571          case 0x0a:    /* LF */
572          case 0x0b:    /* VT */
573          case 0x0c:    /* FF */
574          *lenptr = 1;
575          return p;
576    
577          case 0x0d:    /* CR */
578          if (p < endptr && *p == 0x0a)
579            {
580            *lenptr = 2;
581            p++;
582            }
583          else *lenptr = 1;
584          return p;
585    
586          case 0x85:    /* NEL */
587          *lenptr = utf8? 2 : 1;
588          return p;
589    
590          case 0x2028:  /* LS */
591          case 0x2029:  /* PS */
592          *lenptr = 3;
593          return p;
594    
595          default:
596          break;
597          }
598        }   /* End of loop for ANY case */
599    
600      *lenptr = 0;  /* Must have hit the end */
601      return endptr;
602      }     /* End of overall switch */
603    }
604    
605    
606    
607    /*************************************************
608    *         Find start of previous line            *
609    *************************************************/
610    
611    /* This is called when looking back for before lines to print.
612    
613    Arguments:
614      p         start of the subsequent line
615      startptr  start of available data
616    
617    Returns:    pointer to the start of the previous line
618    */
619    
620    static char *
621    previous_line(char *p, char *startptr)
622    {
623    switch(endlinetype)
624      {
625      default:      /* Just in case */
626      case EL_LF:
627      p--;
628      while (p > startptr && p[-1] != '\n') p--;
629      return p;
630    
631      case EL_CR:
632      p--;
633      while (p > startptr && p[-1] != '\n') p--;
634      return p;
635    
636      case EL_CRLF:
637      for (;;)
638        {
639        p -= 2;
640        while (p > startptr && p[-1] != '\n') p--;
641        if (p <= startptr + 1 || p[-2] == '\r') return p;
642        }
643      return p;   /* But control should never get here */
644    
645      case EL_ANY:
646      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
647      if (utf8) while ((*p & 0xc0) == 0x80) p--;
648    
649      while (p > startptr)
650        {
651        register int c;
652        char *pp = p - 1;
653    
654        if (utf8)
655          {
656          int extra = 0;
657          while ((*pp & 0xc0) == 0x80) pp--;
658          c = *((unsigned char *)pp);
659          if (c >= 0xc0)
660            {
661            int gcii, gcss;
662            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
663            gcss = 6*extra;
664            c = (c & utf8_table3[extra]) << gcss;
665            for (gcii = 1; gcii <= extra; gcii++)
666              {
667              gcss -= 6;
668              c |= (pp[gcii] & 0x3f) << gcss;
669              }
670            }
671          }
672        else c = *((unsigned char *)pp);
673    
674        switch (c)
675          {
676          case 0x0a:    /* LF */
677          case 0x0b:    /* VT */
678          case 0x0c:    /* FF */
679          case 0x0d:    /* CR */
680          case 0x85:    /* NEL */
681          case 0x2028:  /* LS */
682          case 0x2029:  /* PS */
683          return p;
684    
685          default:
686          break;
687          }
688    
689        p = pp;  /* Back one character */
690        }        /* End of loop for ANY case */
691    
692      return startptr;  /* Hit start of data */
693      }     /* End of overall switch */
694    }
695    
696    
697    
698    
699    
700    /*************************************************
701  *       Print the previous "after" lines         *  *       Print the previous "after" lines         *
702  *************************************************/  *************************************************/
703    
# Line 486  if (after_context > 0 && lastmatchnumber Line 722  if (after_context > 0 && lastmatchnumber
722    int count = 0;    int count = 0;
723    while (lastmatchrestart < endptr && count++ < after_context)    while (lastmatchrestart < endptr && count++ < after_context)
724      {      {
725        int ellength;
726      char *pp = lastmatchrestart;      char *pp = lastmatchrestart;
727      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
728      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
729      while (*pp != '\n') pp++;      pp = end_of_line(pp, endptr, &ellength);
730      fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
731      lastmatchrestart = pp + 1;      lastmatchrestart = pp;
732      }      }
733    hyphenpending = TRUE;    hyphenpending = TRUE;
734    }    }
# Line 548  way, the buffer is shifted left and re-f Line 785  way, the buffer is shifted left and re-f
785    
786  while (ptr < endptr)  while (ptr < endptr)
787    {    {
788    int i;    int i, endlinelength;
789    int mrc = 0;    int mrc = 0;
790    BOOL match = FALSE;    BOOL match = FALSE;
791    char *t = ptr;    char *t = ptr;
# Line 561  while (ptr < endptr) Line 798  while (ptr < endptr)
798    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
799    that any match is constrained to be in the first line. */    that any match is constrained to be in the first line. */
800    
801    linelength = 0;    t = end_of_line(t, endptr, &endlinelength);
802    while (t < endptr && *t++ != '\n') linelength++;    linelength = t - ptr - endlinelength;
803    length = multiline? endptr - ptr : linelength;    length = multiline? endptr - ptr : linelength;
804    
805      /* Extra processing for Jeffrey Friedl's debugging. */
806    
807    #ifdef JFRIEDL_DEBUG
808      if (jfriedl_XT || jfriedl_XR)
809      {
810          #include <sys/time.h>
811          #include <time.h>
812          struct timeval start_time, end_time;
813          struct timezone dummy;
814    
815          if (jfriedl_XT)
816          {
817              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
818              const char *orig = ptr;
819              ptr = malloc(newlen + 1);
820              if (!ptr) {
821                      printf("out of memory");
822                      exit(2);
823              }
824              endptr = ptr;
825              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
826              for (i = 0; i < jfriedl_XT; i++) {
827                      strncpy(endptr, orig,  length);
828                      endptr += length;
829              }
830              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
831              length = newlen;
832          }
833    
834          if (gettimeofday(&start_time, &dummy) != 0)
835                  perror("bad gettimeofday");
836    
837    
838          for (i = 0; i < jfriedl_XR; i++)
839              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
840    
841          if (gettimeofday(&end_time, &dummy) != 0)
842                  perror("bad gettimeofday");
843    
844          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
845                          -
846                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
847    
848          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
849          return 0;
850      }
851    #endif
852    
853    
854    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches. Note that we don't include
855    the final newline in the subject string. */    the final newline in the subject string. */
856    
# Line 646  while (ptr < endptr) Line 932  while (ptr < endptr)
932    
933        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
934          {          {
935            int ellength;
936          int linecount = 0;          int linecount = 0;
937          char *p = lastmatchrestart;          char *p = lastmatchrestart;
938    
939          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
940            {            {
941            while (*p != '\n') p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
942            linecount++;            linecount++;
943            }            }
944    
# Line 665  while (ptr < endptr) Line 951  while (ptr < endptr)
951            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
952            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
953            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
954            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
955            fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
956            lastmatchrestart = pp + 1;            lastmatchrestart = pp;
957            }            }
958          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
959          }          }
# Line 693  while (ptr < endptr) Line 979  while (ptr < endptr)
979                 linecount < before_context)                 linecount < before_context)
980            {            {
981            linecount++;            linecount++;
982            p--;            p = previous_line(p, buffer);
           while (p > buffer && p[-1] != '\n') p--;  
983            }            }
984    
985          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 702  while (ptr < endptr) Line 987  while (ptr < endptr)
987    
988          while (p < ptr)          while (p < ptr)
989            {            {
990              int ellength;
991            char *pp = p;            char *pp = p;
992            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
993            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
994            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
995            fwrite(p, 1, pp - p + 1, stdout);   /* In case binary zero */            fwrite(p, 1, pp - p, stdout);
996            p = pp + 1;            p = pp;
997            }            }
998          }          }
999    
# Line 723  while (ptr < endptr) Line 1009  while (ptr < endptr)
1009        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1010        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1011        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately. Because the PCRE_FIRSTLINE option is set, the
1012        start of the match will always be before the first \n character. */        start of the match will always be before the first newline sequence. */
1013    
1014        if (multiline)        if (multiline)
1015          {          {
1016            int ellength;
1017          char *endmatch = ptr + offsets[1];          char *endmatch = ptr + offsets[1];
1018          t = ptr;          t = ptr;
1019          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          while (t < endmatch)
1020          while (endmatch < endptr && *endmatch != '\n') endmatch++;            {
1021          linelength = endmatch - ptr;            t = end_of_line(t, endptr, &ellength);
1022              if (t <= endmatch) linenumber++; else break;
1023              }
1024            endmatch = end_of_line(endmatch, endptr, &ellength);
1025            linelength = endmatch - ptr - ellength;
1026          }          }
1027    
1028        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 763  while (ptr < endptr) Line 1054  while (ptr < endptr)
1054          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1055          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
1056          }          }
1057        else fwrite(ptr, 1, linelength, stdout);        else fwrite(ptr, 1, linelength + endlinelength, stdout);
   
       fprintf(stdout, "\n");  
1058        }        }
1059    
1060      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match */
# Line 775  while (ptr < endptr) Line 1064  while (ptr < endptr)
1064      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1065      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1066    
1067      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1068      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1069      }      }
1070    
1071    /* Advance to after the newline and increment the line number. */    /* Advance to after the newline and increment the line number. */
1072    
1073    ptr += linelength + 1;    ptr += linelength + endlinelength;
1074    linenumber++;    linenumber++;
1075    
1076    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
# Line 1037  switch(letter) Line 1326  switch(letter)
1326    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1327    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
1328    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1329    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1330    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1331    case 'w': process_options |= PO_WORD_MATCH; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1332    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1333    
1334    case 'V':    case 'V':
1335    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
1336    exit(0);    exit(0);
1337    break;    break;
1338    
# Line 1152  return FALSE; Line 1440  return FALSE;
1440  *************************************************/  *************************************************/
1441    
1442  /* When the -F option has been used, each string may be a list of strings,  /* When the -F option has been used, each string may be a list of strings,
1443  separated by newlines. They will be matched literally.  separated by line breaks. They will be matched literally.
1444    
1445  Arguments:  Arguments:
1446    pattern        the pattern string    pattern        the pattern string
# Line 1170  compile_pattern(char *pattern, int optio Line 1458  compile_pattern(char *pattern, int optio
1458  {  {
1459  if ((process_options & PO_FIXED_STRINGS) != 0)  if ((process_options & PO_FIXED_STRINGS) != 0)
1460    {    {
1461      char *eop = pattern + strlen(pattern);
1462    char buffer[MBUFTHIRD];    char buffer[MBUFTHIRD];
1463    for(;;)    for(;;)
1464      {      {
1465      char *p = strchr(pattern, '\n');      int ellength;
1466      if (p == NULL)      char *p = end_of_line(pattern, eop, &ellength);
1467        if (ellength == 0)
1468        return compile_single_pattern(pattern, options, filename, count);        return compile_single_pattern(pattern, options, filename, count);
1469      sprintf(buffer, "%.*s", p - pattern, pattern);      sprintf(buffer, "%.*s", p - pattern - ellength, pattern);
1470      pattern = p + 1;      pattern = p;
1471      if (!compile_single_pattern(buffer, options, filename, count))      if (!compile_single_pattern(buffer, options, filename, count))
1472        return FALSE;        return FALSE;
1473      }      }
# Line 1206  char *patterns[MAX_PATTERN_COUNT]; Line 1496  char *patterns[MAX_PATTERN_COUNT];
1496  const char *locale_from = "--locale";  const char *locale_from = "--locale";
1497  const char *error;  const char *error;
1498    
1499    /* Set the default line ending value from the default in the PCRE library;
1500    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1501    */
1502    
1503    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1504    switch(i)
1505      {
1506      default:                 newline = (char *)"lf"; break;
1507      case '\r':               newline = (char *)"cr"; break;
1508      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1509      case -1:                 newline = (char *)"any"; break;
1510      }
1511    
1512  /* Process the options */  /* Process the options */
1513    
1514  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
# Line 1294  for (i = 1; i < argc; i++) Line 1597  for (i = 1; i < argc; i++)
1597        }        }
1598      }      }
1599    
1600    
1601      /* Jeffrey Friedl's debugging harness uses these additional options which
1602      are not in the right form for putting in the option table because they use
1603      only one hyphen, yet are more than one character long. By putting them
1604      separately here, they will not get displayed as part of the help() output,
1605      but I don't think Jeffrey will care about that. */
1606    
1607    #ifdef JFRIEDL_DEBUG
1608      else if (strcmp(argv[i], "-pre") == 0) {
1609              jfriedl_prefix = argv[++i];
1610              continue;
1611      } else if (strcmp(argv[i], "-post") == 0) {
1612              jfriedl_postfix = argv[++i];
1613              continue;
1614      } else if (strcmp(argv[i], "-XT") == 0) {
1615              sscanf(argv[++i], "%d", &jfriedl_XT);
1616              continue;
1617      } else if (strcmp(argv[i], "-XR") == 0) {
1618              sscanf(argv[++i], "%d", &jfriedl_XR);
1619              continue;
1620      }
1621    #endif
1622    
1623    
1624    /* One-char options; many that have no data may be in a single argument; we    /* One-char options; many that have no data may be in a single argument; we
1625    continue till we hit the last one or one that needs data. */    continue till we hit the last one or one that needs data. */
1626    
# Line 1333  for (i = 1; i < argc; i++) Line 1660  for (i = 1; i < argc; i++)
1660    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1661    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
1662    separate item. At the moment, the only such options are "colo(u)r" and    separate item. At the moment, the only such options are "colo(u)r" and
1663    Jeffrey Friedl's special debugging option. */    Jeffrey Friedl's special -S debugging option. */
1664    
1665    if (*option_data == 0 &&    if (*option_data == 0 &&
1666        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
# Line 1465  if (colour_option != NULL && strcmp(colo Line 1792  if (colour_option != NULL && strcmp(colo
1792      }      }
1793    }    }
1794    
1795    /* Interpret the newline type; the default settings are Unix-like. */
1796    
1797    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1798      {
1799      pcre_options |= PCRE_NEWLINE_CR;
1800      endlinetype = EL_CR;
1801      }
1802    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1803      {
1804      pcre_options |= PCRE_NEWLINE_LF;
1805      endlinetype = EL_LF;
1806      }
1807    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1808      {
1809      pcre_options |= PCRE_NEWLINE_CRLF;
1810      endlinetype = EL_CRLF;
1811      }
1812    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1813      {
1814      pcre_options |= PCRE_NEWLINE_ANY;
1815      endlinetype = EL_ANY;
1816      }
1817    else
1818      {
1819      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1820      return 2;
1821      }
1822    
1823  /* Interpret the text values for -d and -D */  /* Interpret the text values for -d and -D */
1824    
1825  if (dee_option != NULL)  if (dee_option != NULL)
# Line 1490  if (DEE_option != NULL) Line 1845  if (DEE_option != NULL)
1845      }      }
1846    }    }
1847    
1848  /* Check the value for Jeff Friedl's debugging option. */  /* Check the values for Jeffrey Friedl's debugging options. */
1849    
1850  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
1851  if (S_arg > 9)  if (S_arg > 9)
# Line 1498  if (S_arg > 9) Line 1853  if (S_arg > 9)
1853    fprintf(stderr, "pcregrep: bad value for -S option\n");    fprintf(stderr, "pcregrep: bad value for -S option\n");
1854    return 2;    return 2;
1855    }    }
1856    if (jfriedl_XT != 0 || jfriedl_XR != 0)
1857      {
1858      if (jfriedl_XT == 0) jfriedl_XT = 1;
1859      if (jfriedl_XR == 0) jfriedl_XR = 1;
1860      }
1861  #endif  #endif
1862    
1863  /* Get memory to store the pattern and hints lists. */  /* Get memory to store the pattern and hints lists. */
# Line 1508  hints_list = (pcre_extra **)malloc(MAX_P Line 1868  hints_list = (pcre_extra **)malloc(MAX_P
1868  if (pattern_list == NULL || hints_list == NULL)  if (pattern_list == NULL || hints_list == NULL)
1869    {    {
1870    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
1871    return 2;    goto EXIT2;
1872    }    }
1873    
1874  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there is no file provided by -f,
# Line 1527  for (j = 0; j < cmd_pattern_count; j++) Line 1887  for (j = 0; j < cmd_pattern_count; j++)
1887    {    {
1888    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(patterns[j], pcre_options, NULL,
1889         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1890      return 2;      goto EXIT2;
1891    }    }
1892    
1893  /* Compile the regular expressions that are provided in a file. */  /* Compile the regular expressions that are provided in a file. */
# Line 1551  if (pattern_filename != NULL) Line 1911  if (pattern_filename != NULL)
1911        {        {
1912        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1913          strerror(errno));          strerror(errno));
1914        return 2;        goto EXIT2;
1915        }        }
1916      filename = pattern_filename;      filename = pattern_filename;
1917      }      }
# Line 1564  if (pattern_filename != NULL) Line 1924  if (pattern_filename != NULL)
1924      linenumber++;      linenumber++;
1925      if (buffer[0] == 0) continue;   /* Skip blank lines */      if (buffer[0] == 0) continue;   /* Skip blank lines */
1926      if (!compile_pattern(buffer, pcre_options, filename, linenumber))      if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1927        return 2;        goto EXIT2;
1928      }      }
1929    
1930    if (f != stdin) fclose(f);    if (f != stdin) fclose(f);
# Line 1580  for (j = 0; j < pattern_count; j++) Line 1940  for (j = 0; j < pattern_count; j++)
1940      char s[16];      char s[16];
1941      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
1942      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
1943      return 2;      goto EXIT2;
1944      }      }
1945    }    }
1946    
# Line 1594  if (exclude_pattern != NULL) Line 1954  if (exclude_pattern != NULL)
1954      {      {
1955      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1956        errptr, error);        errptr, error);
1957      return 2;      goto EXIT2;
1958      }      }
1959    }    }
1960    
# Line 1606  if (include_pattern != NULL) Line 1966  if (include_pattern != NULL)
1966      {      {
1967      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1968        errptr, error);        errptr, error);
1969      return 2;      goto EXIT2;
1970      }      }
1971    }    }
1972    
1973  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */
1974    
1975  if (i >= argc)  if (i >= argc)
1976    return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    {
1977      rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
1978      goto EXIT;
1979      }
1980    
1981  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
1982  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
# Line 1630  for (; i < argc; i++) Line 1993  for (; i < argc; i++)
1993      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
1994    }    }
1995    
1996    EXIT:
1997    if (pattern_list != NULL)
1998      {
1999      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2000      free(pattern_list);
2001      }
2002    if (hints_list != NULL)
2003      {
2004      for (i = 0; i < pattern_count; i++) free(hints_list[i]);
2005      free(hints_list);
2006      }
2007  return rc;  return rc;
2008    
2009    EXIT2:
2010    rc = 2;
2011    goto EXIT;
2012  }  }
2013    
2014  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.87  
changed lines
  Added in v.137

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12