/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 89 by nigel, Sat Feb 24 21:41:27 2007 UTC revision 222 by ph10, Fri Aug 17 10:48:51 2007 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #include <config.h>
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45  #include <locale.h>  #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55  #include <unistd.h>  #include <unistd.h>
56    #endif
57    
58  #include "config.h"  #include <pcre.h>
 #include "pcre.h"  
59    
60  #define FALSE 0  #define FALSE 0
61  #define TRUE 1  #define TRUE 1
62    
63  typedef int BOOL;  typedef int BOOL;
64    
 #define VERSION "4.2 09-Jan-2006"  
65  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
66    
67  #if BUFSIZ > 8192  #if BUFSIZ > 8192
# Line 65  typedef int BOOL; Line 70  typedef int BOOL;
70  #define MBUFTHIRD 8192  #define MBUFTHIRD 8192
71  #endif  #endif
72    
   
73  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
74  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
75  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
# Line 83  enum { DEE_READ, DEE_SKIP }; Line 87  enum { DEE_READ, DEE_SKIP };
87  #define PO_LINE_MATCH     0x0002  #define PO_LINE_MATCH     0x0002
88  #define PO_FIXED_STRINGS  0x0004  #define PO_FIXED_STRINGS  0x0004
89    
90    /* Line ending types */
91    
92    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93    
94    
95    
96  /*************************************************  /*************************************************
# Line 100  static const char *jfriedl_prefix = ""; Line 108  static const char *jfriedl_prefix = "";
108  static const char *jfriedl_postfix = "";  static const char *jfriedl_postfix = "";
109  #endif  #endif
110    
111    static int  endlinetype;
112    
113  static char *colour_string = (char *)"1;31";  static char *colour_string = (char *)"1;31";
114  static char *colour_option = NULL;  static char *colour_option = NULL;
115  static char *dee_option = NULL;  static char *dee_option = NULL;
116  static char *DEE_option = NULL;  static char *DEE_option = NULL;
117    static char *newline = NULL;
118  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
119  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
120  static char *locale = NULL;  static char *locale = NULL;
# Line 111  static char *locale = NULL; Line 122  static char *locale = NULL;
122  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
123    
124  static int  pattern_count = 0;  static int  pattern_count = 0;
125  static pcre **pattern_list;  static pcre **pattern_list = NULL;
126  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
127    
128  static char *include_pattern = NULL;  static char *include_pattern = NULL;
129  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
# Line 138  static BOOL number = FALSE; Line 149  static BOOL number = FALSE;
149  static BOOL only_matching = FALSE;  static BOOL only_matching = FALSE;
150  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
151  static BOOL silent = FALSE;  static BOOL silent = FALSE;
152    static BOOL utf8 = FALSE;
153    
154  /* Structure for options and list of them */  /* Structure for options and list of them */
155    
# Line 185  static option_item optionlist[] = { Line 197  static option_item optionlist[] = {
197    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
198    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
199    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
200      { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
201    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
202    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
203    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
# Line 214  static const char *prefix[] = { Line 227  static const char *prefix[] = {
227  static const char *suffix[] = {  static const char *suffix[] = {
228    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
229    
230    /* UTF-8 tables - used only when the newline setting is "any". */
231    
232    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
233    
234    const char utf8_table4[] = {
235      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
236      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
237      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
238      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
239    
240    
241    
242  /*************************************************  /*************************************************
# Line 226  although at present the only ones are fo Line 249  although at present the only ones are fo
249    
250  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
251    
252  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
253  #include <sys/types.h>  #include <sys/types.h>
254  #include <sys/stat.h>  #include <sys/stat.h>
255  #include <dirent.h>  #include <dirent.h>
# Line 258  for (;;) Line 281  for (;;)
281    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
282      return dent->d_name;      return dent->d_name;
283    }    }
284  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
285  }  }
286    
287  static void  static void
# Line 296  Lionel Fourquaux. David Burgess added a Line 319  Lionel Fourquaux. David Burgess added a
319  when it did not exist. */  when it did not exist. */
320    
321    
322  #elif HAVE_WIN32API  #elif HAVE_WINDOWS_H
323    
324  #ifndef STRICT  #ifndef STRICT
325  # define STRICT  # define STRICT
# Line 418  FALSE; Line 441  FALSE;
441  typedef void directory_type;  typedef void directory_type;
442    
443  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
444  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
445  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
446  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
447    
448    
# Line 443  return FALSE; Line 466  return FALSE;
466    
467    
468    
469  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
470  /*************************************************  /*************************************************
471  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
472  *************************************************/  *************************************************/
# Line 466  return sys_errlist[n]; Line 489  return sys_errlist[n];
489    
490    
491  /*************************************************  /*************************************************
492    *             Find end of line                   *
493    *************************************************/
494    
495    /* The length of the endline sequence that is found is set via lenptr. This may
496    be zero at the very end of the file if there is no line-ending sequence there.
497    
498    Arguments:
499      p         current position in line
500      endptr    end of available data
501      lenptr    where to put the length of the eol sequence
502    
503    Returns:    pointer to the last byte of the line
504    */
505    
506    static char *
507    end_of_line(char *p, char *endptr, int *lenptr)
508    {
509    switch(endlinetype)
510      {
511      default:      /* Just in case */
512      case EL_LF:
513      while (p < endptr && *p != '\n') p++;
514      if (p < endptr)
515        {
516        *lenptr = 1;
517        return p + 1;
518        }
519      *lenptr = 0;
520      return endptr;
521    
522      case EL_CR:
523      while (p < endptr && *p != '\r') p++;
524      if (p < endptr)
525        {
526        *lenptr = 1;
527        return p + 1;
528        }
529      *lenptr = 0;
530      return endptr;
531    
532      case EL_CRLF:
533      for (;;)
534        {
535        while (p < endptr && *p != '\r') p++;
536        if (++p >= endptr)
537          {
538          *lenptr = 0;
539          return endptr;
540          }
541        if (*p == '\n')
542          {
543          *lenptr = 2;
544          return p + 1;
545          }
546        }
547      break;
548    
549      case EL_ANYCRLF:
550      while (p < endptr)
551        {
552        int extra = 0;
553        register int c = *((unsigned char *)p);
554    
555        if (utf8 && c >= 0xc0)
556          {
557          int gcii, gcss;
558          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
559          gcss = 6*extra;
560          c = (c & utf8_table3[extra]) << gcss;
561          for (gcii = 1; gcii <= extra; gcii++)
562            {
563            gcss -= 6;
564            c |= (p[gcii] & 0x3f) << gcss;
565            }
566          }
567    
568        p += 1 + extra;
569    
570        switch (c)
571          {
572          case 0x0a:    /* LF */
573          *lenptr = 1;
574          return p;
575    
576          case 0x0d:    /* CR */
577          if (p < endptr && *p == 0x0a)
578            {
579            *lenptr = 2;
580            p++;
581            }
582          else *lenptr = 1;
583          return p;
584    
585          default:
586          break;
587          }
588        }   /* End of loop for ANYCRLF case */
589    
590      *lenptr = 0;  /* Must have hit the end */
591      return endptr;
592    
593      case EL_ANY:
594      while (p < endptr)
595        {
596        int extra = 0;
597        register int c = *((unsigned char *)p);
598    
599        if (utf8 && c >= 0xc0)
600          {
601          int gcii, gcss;
602          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
603          gcss = 6*extra;
604          c = (c & utf8_table3[extra]) << gcss;
605          for (gcii = 1; gcii <= extra; gcii++)
606            {
607            gcss -= 6;
608            c |= (p[gcii] & 0x3f) << gcss;
609            }
610          }
611    
612        p += 1 + extra;
613    
614        switch (c)
615          {
616          case 0x0a:    /* LF */
617          case 0x0b:    /* VT */
618          case 0x0c:    /* FF */
619          *lenptr = 1;
620          return p;
621    
622          case 0x0d:    /* CR */
623          if (p < endptr && *p == 0x0a)
624            {
625            *lenptr = 2;
626            p++;
627            }
628          else *lenptr = 1;
629          return p;
630    
631          case 0x85:    /* NEL */
632          *lenptr = utf8? 2 : 1;
633          return p;
634    
635          case 0x2028:  /* LS */
636          case 0x2029:  /* PS */
637          *lenptr = 3;
638          return p;
639    
640          default:
641          break;
642          }
643        }   /* End of loop for ANY case */
644    
645      *lenptr = 0;  /* Must have hit the end */
646      return endptr;
647      }     /* End of overall switch */
648    }
649    
650    
651    
652    /*************************************************
653    *         Find start of previous line            *
654    *************************************************/
655    
656    /* This is called when looking back for before lines to print.
657    
658    Arguments:
659      p         start of the subsequent line
660      startptr  start of available data
661    
662    Returns:    pointer to the start of the previous line
663    */
664    
665    static char *
666    previous_line(char *p, char *startptr)
667    {
668    switch(endlinetype)
669      {
670      default:      /* Just in case */
671      case EL_LF:
672      p--;
673      while (p > startptr && p[-1] != '\n') p--;
674      return p;
675    
676      case EL_CR:
677      p--;
678      while (p > startptr && p[-1] != '\n') p--;
679      return p;
680    
681      case EL_CRLF:
682      for (;;)
683        {
684        p -= 2;
685        while (p > startptr && p[-1] != '\n') p--;
686        if (p <= startptr + 1 || p[-2] == '\r') return p;
687        }
688      return p;   /* But control should never get here */
689    
690      case EL_ANY:
691      case EL_ANYCRLF:
692      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
693      if (utf8) while ((*p & 0xc0) == 0x80) p--;
694    
695      while (p > startptr)
696        {
697        register int c;
698        char *pp = p - 1;
699    
700        if (utf8)
701          {
702          int extra = 0;
703          while ((*pp & 0xc0) == 0x80) pp--;
704          c = *((unsigned char *)pp);
705          if (c >= 0xc0)
706            {
707            int gcii, gcss;
708            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
709            gcss = 6*extra;
710            c = (c & utf8_table3[extra]) << gcss;
711            for (gcii = 1; gcii <= extra; gcii++)
712              {
713              gcss -= 6;
714              c |= (pp[gcii] & 0x3f) << gcss;
715              }
716            }
717          }
718        else c = *((unsigned char *)pp);
719    
720        if (endlinetype == EL_ANYCRLF) switch (c)
721          {
722          case 0x0a:    /* LF */
723          case 0x0d:    /* CR */
724          return p;
725    
726          default:
727          break;
728          }
729    
730        else switch (c)
731          {
732          case 0x0a:    /* LF */
733          case 0x0b:    /* VT */
734          case 0x0c:    /* FF */
735          case 0x0d:    /* CR */
736          case 0x85:    /* NEL */
737          case 0x2028:  /* LS */
738          case 0x2029:  /* PS */
739          return p;
740    
741          default:
742          break;
743          }
744    
745        p = pp;  /* Back one character */
746        }        /* End of loop for ANY case */
747    
748      return startptr;  /* Hit start of data */
749      }     /* End of overall switch */
750    }
751    
752    
753    
754    
755    
756    /*************************************************
757  *       Print the previous "after" lines         *  *       Print the previous "after" lines         *
758  *************************************************/  *************************************************/
759    
# Line 490  if (after_context > 0 && lastmatchnumber Line 778  if (after_context > 0 && lastmatchnumber
778    int count = 0;    int count = 0;
779    while (lastmatchrestart < endptr && count++ < after_context)    while (lastmatchrestart < endptr && count++ < after_context)
780      {      {
781        int ellength;
782      char *pp = lastmatchrestart;      char *pp = lastmatchrestart;
783      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
784      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
785      while (*pp != '\n') pp++;      pp = end_of_line(pp, endptr, &ellength);
786      fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
787      lastmatchrestart = pp + 1;      lastmatchrestart = pp;
788      }      }
789    hyphenpending = TRUE;    hyphenpending = TRUE;
790    }    }
# Line 552  way, the buffer is shifted left and re-f Line 841  way, the buffer is shifted left and re-f
841    
842  while (ptr < endptr)  while (ptr < endptr)
843    {    {
844    int i;    int i, endlinelength;
845    int mrc = 0;    int mrc = 0;
846    BOOL match = FALSE;    BOOL match = FALSE;
847    char *t = ptr;    char *t = ptr;
# Line 565  while (ptr < endptr) Line 854  while (ptr < endptr)
854    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
855    that any match is constrained to be in the first line. */    that any match is constrained to be in the first line. */
856    
857    linelength = 0;    t = end_of_line(t, endptr, &endlinelength);
858    while (t < endptr && *t++ != '\n') linelength++;    linelength = t - ptr - endlinelength;
859    length = multiline? endptr - ptr : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
   
860    
861    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
862    
# Line 700  while (ptr < endptr) Line 988  while (ptr < endptr)
988    
989        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
990          {          {
991            int ellength;
992          int linecount = 0;          int linecount = 0;
993          char *p = lastmatchrestart;          char *p = lastmatchrestart;
994    
995          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
996            {            {
997            while (*p != '\n') p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
998            linecount++;            linecount++;
999            }            }
1000    
# Line 719  while (ptr < endptr) Line 1007  while (ptr < endptr)
1007            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
1008            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1009            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1010            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1011            fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1012            lastmatchrestart = pp + 1;            lastmatchrestart = pp;
1013            }            }
1014          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
1015          }          }
# Line 747  while (ptr < endptr) Line 1035  while (ptr < endptr)
1035                 linecount < before_context)                 linecount < before_context)
1036            {            {
1037            linecount++;            linecount++;
1038            p--;            p = previous_line(p, buffer);
           while (p > buffer && p[-1] != '\n') p--;  
1039            }            }
1040    
1041          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 756  while (ptr < endptr) Line 1043  while (ptr < endptr)
1043    
1044          while (p < ptr)          while (p < ptr)
1045            {            {
1046              int ellength;
1047            char *pp = p;            char *pp = p;
1048            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1049            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1050            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1051            fwrite(p, 1, pp - p + 1, stdout);   /* In case binary zero */            fwrite(p, 1, pp - p, stdout);
1052            p = pp + 1;            p = pp;
1053            }            }
1054          }          }
1055    
# Line 776  while (ptr < endptr) Line 1064  while (ptr < endptr)
1064    
1065        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1066        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1067        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1068        start of the match will always be before the first \n character. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1069          the match will always be before the first newline sequence. */
1070    
1071        if (multiline)        if (multiline)
1072          {          {
1073          char *endmatch = ptr + offsets[1];          int ellength;
1074          t = ptr;          char *endmatch = ptr;
1075          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          if (!invert)
1076          while (endmatch < endptr && *endmatch != '\n') endmatch++;            {
1077          linelength = endmatch - ptr;            endmatch += offsets[1];
1078              t = ptr;
1079              while (t < endmatch)
1080                {
1081                t = end_of_line(t, endptr, &ellength);
1082                if (t <= endmatch) linenumber++; else break;
1083                }
1084              }
1085            endmatch = end_of_line(endmatch, endptr, &ellength);
1086            linelength = endmatch - ptr - ellength;
1087          }          }
1088    
1089        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 817  while (ptr < endptr) Line 1115  while (ptr < endptr)
1115          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1116          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
1117          }          }
1118        else fwrite(ptr, 1, linelength, stdout);        else fwrite(ptr, 1, linelength + endlinelength, stdout);
   
       fprintf(stdout, "\n");  
1119        }        }
1120    
1121      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match */
# Line 829  while (ptr < endptr) Line 1125  while (ptr < endptr)
1125      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1126      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1127    
1128      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1129      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1130      }      }
1131    
1132      /* For a match in multiline inverted mode (which of course did not cause
1133      anything to be printed), we have to move on to the end of the match before
1134      proceeding. */
1135    
1136      if (multiline && invert && match)
1137        {
1138        int ellength;
1139        char *endmatch = ptr + offsets[1];
1140        t = ptr;
1141        while (t < endmatch)
1142          {
1143          t = end_of_line(t, endptr, &ellength);
1144          if (t <= endmatch) linenumber++; else break;
1145          }
1146        endmatch = end_of_line(endmatch, endptr, &ellength);
1147        linelength = endmatch - ptr - ellength;
1148        }
1149    
1150    /* Advance to after the newline and increment the line number. */    /* Advance to after the newline and increment the line number. */
1151    
1152    ptr += linelength + 1;    ptr += linelength + endlinelength;
1153    linenumber++;    linenumber++;
1154    
1155    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
# Line 1091  switch(letter) Line 1405  switch(letter)
1405    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1406    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
1407    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1408    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1409    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1410    case 'w': process_options |= PO_WORD_MATCH; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1411    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1412    
1413    case 'V':    case 'V':
1414    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
1415    exit(0);    exit(0);
1416    break;    break;
1417    
# Line 1174  sprintf(buffer, "%s%.*s%s", prefix[proce Line 1487  sprintf(buffer, "%s%.*s%s", prefix[proce
1487    suffix[process_options]);    suffix[process_options]);
1488  pattern_list[pattern_count] =  pattern_list[pattern_count] =
1489    pcre_compile(buffer, options, &error, &errptr, pcretables);    pcre_compile(buffer, options, &error, &errptr, pcretables);
1490  if (pattern_list[pattern_count++] != NULL) return TRUE;  if (pattern_list[pattern_count] != NULL)
1491      {
1492      pattern_count++;
1493      return TRUE;
1494      }
1495    
1496  /* Handle compile errors */  /* Handle compile errors */
1497    
# Line 1206  return FALSE; Line 1523  return FALSE;
1523  *************************************************/  *************************************************/
1524    
1525  /* When the -F option has been used, each string may be a list of strings,  /* When the -F option has been used, each string may be a list of strings,
1526  separated by newlines. They will be matched literally.  separated by line breaks. They will be matched literally.
1527    
1528  Arguments:  Arguments:
1529    pattern        the pattern string    pattern        the pattern string
# Line 1224  compile_pattern(char *pattern, int optio Line 1541  compile_pattern(char *pattern, int optio
1541  {  {
1542  if ((process_options & PO_FIXED_STRINGS) != 0)  if ((process_options & PO_FIXED_STRINGS) != 0)
1543    {    {
1544      char *eop = pattern + strlen(pattern);
1545    char buffer[MBUFTHIRD];    char buffer[MBUFTHIRD];
1546    for(;;)    for(;;)
1547      {      {
1548      char *p = strchr(pattern, '\n');      int ellength;
1549      if (p == NULL)      char *p = end_of_line(pattern, eop, &ellength);
1550        if (ellength == 0)
1551        return compile_single_pattern(pattern, options, filename, count);        return compile_single_pattern(pattern, options, filename, count);
1552      sprintf(buffer, "%.*s", p - pattern, pattern);      sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1553      pattern = p + 1;      pattern = p;
1554      if (!compile_single_pattern(buffer, options, filename, count))      if (!compile_single_pattern(buffer, options, filename, count))
1555        return FALSE;        return FALSE;
1556      }      }
# Line 1254  int i, j; Line 1573  int i, j;
1573  int rc = 1;  int rc = 1;
1574  int pcre_options = 0;  int pcre_options = 0;
1575  int cmd_pattern_count = 0;  int cmd_pattern_count = 0;
1576    int hint_count = 0;
1577  int errptr;  int errptr;
1578  BOOL only_one_at_top;  BOOL only_one_at_top;
1579  char *patterns[MAX_PATTERN_COUNT];  char *patterns[MAX_PATTERN_COUNT];
1580  const char *locale_from = "--locale";  const char *locale_from = "--locale";
1581  const char *error;  const char *error;
1582    
1583    /* Set the default line ending value from the default in the PCRE library;
1584    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1585    */
1586    
1587    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1588    switch(i)
1589      {
1590      default:                 newline = (char *)"lf"; break;
1591      case '\r':               newline = (char *)"cr"; break;
1592      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1593      case -1:                 newline = (char *)"any"; break;
1594      case -2:                 newline = (char *)"anycrlf"; break;
1595      }
1596    
1597  /* Process the options */  /* Process the options */
1598    
1599  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
# Line 1315  for (i = 1; i < argc; i++) Line 1649  for (i = 1; i < argc; i++)
1649          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
1650            {            {
1651            int oplen = equals - op->long_name;            int oplen = equals - op->long_name;
1652            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1653            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1654              {              {
1655              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1334  for (i = 1; i < argc; i++) Line 1668  for (i = 1; i < argc; i++)
1668          char buff2[24];          char buff2[24];
1669          int baselen = opbra - op->long_name;          int baselen = opbra - op->long_name;
1670          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
1671          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1,
1672            opbra + 1);            (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1673          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1674            break;            break;
1675          }          }
# Line 1543  if (colour_option != NULL && strcmp(colo Line 1877  if (colour_option != NULL && strcmp(colo
1877      }      }
1878    }    }
1879    
1880    /* Interpret the newline type; the default settings are Unix-like. */
1881    
1882    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1883      {
1884      pcre_options |= PCRE_NEWLINE_CR;
1885      endlinetype = EL_CR;
1886      }
1887    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1888      {
1889      pcre_options |= PCRE_NEWLINE_LF;
1890      endlinetype = EL_LF;
1891      }
1892    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1893      {
1894      pcre_options |= PCRE_NEWLINE_CRLF;
1895      endlinetype = EL_CRLF;
1896      }
1897    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1898      {
1899      pcre_options |= PCRE_NEWLINE_ANY;
1900      endlinetype = EL_ANY;
1901      }
1902    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1903      {
1904      pcre_options |= PCRE_NEWLINE_ANYCRLF;
1905      endlinetype = EL_ANYCRLF;
1906      }
1907    else
1908      {
1909      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1910      return 2;
1911      }
1912    
1913  /* Interpret the text values for -d and -D */  /* Interpret the text values for -d and -D */
1914    
1915  if (dee_option != NULL)  if (dee_option != NULL)
# Line 1591  hints_list = (pcre_extra **)malloc(MAX_P Line 1958  hints_list = (pcre_extra **)malloc(MAX_P
1958  if (pattern_list == NULL || hints_list == NULL)  if (pattern_list == NULL || hints_list == NULL)
1959    {    {
1960    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
1961    return 2;    goto EXIT2;
1962    }    }
1963    
1964  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there is no file provided by -f,
# Line 1610  for (j = 0; j < cmd_pattern_count; j++) Line 1977  for (j = 0; j < cmd_pattern_count; j++)
1977    {    {
1978    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(patterns[j], pcre_options, NULL,
1979         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1980      return 2;      goto EXIT2;
1981    }    }
1982    
1983  /* Compile the regular expressions that are provided in a file. */  /* Compile the regular expressions that are provided in a file. */
# Line 1634  if (pattern_filename != NULL) Line 2001  if (pattern_filename != NULL)
2001        {        {
2002        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2003          strerror(errno));          strerror(errno));
2004        return 2;        goto EXIT2;
2005        }        }
2006      filename = pattern_filename;      filename = pattern_filename;
2007      }      }
# Line 1647  if (pattern_filename != NULL) Line 2014  if (pattern_filename != NULL)
2014      linenumber++;      linenumber++;
2015      if (buffer[0] == 0) continue;   /* Skip blank lines */      if (buffer[0] == 0) continue;   /* Skip blank lines */
2016      if (!compile_pattern(buffer, pcre_options, filename, linenumber))      if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2017        return 2;        goto EXIT2;
2018      }      }
2019    
2020    if (f != stdin) fclose(f);    if (f != stdin) fclose(f);
# Line 1663  for (j = 0; j < pattern_count; j++) Line 2030  for (j = 0; j < pattern_count; j++)
2030      char s[16];      char s[16];
2031      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2032      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2033      return 2;      goto EXIT2;
2034      }      }
2035      hint_count++;
2036    }    }
2037    
2038  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
# Line 1677  if (exclude_pattern != NULL) Line 2045  if (exclude_pattern != NULL)
2045      {      {
2046      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2047        errptr, error);        errptr, error);
2048      return 2;      goto EXIT2;
2049      }      }
2050    }    }
2051    
# Line 1689  if (include_pattern != NULL) Line 2057  if (include_pattern != NULL)
2057      {      {
2058      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2059        errptr, error);        errptr, error);
2060      return 2;      goto EXIT2;
2061      }      }
2062    }    }
2063    
2064  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */
2065    
2066  if (i >= argc)  if (i >= argc)
2067    return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    {
2068      rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2069      goto EXIT;
2070      }
2071    
2072  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2073  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
# Line 1713  for (; i < argc; i++) Line 2084  for (; i < argc; i++)
2084      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
2085    }    }
2086    
2087    EXIT:
2088    if (pattern_list != NULL)
2089      {
2090      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2091      free(pattern_list);
2092      }
2093    if (hints_list != NULL)
2094      {
2095      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2096      free(hints_list);
2097      }
2098  return rc;  return rc;
2099    
2100    EXIT2:
2101    rc = 2;
2102    goto EXIT;
2103  }  }
2104    
2105  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.89  
changed lines
  Added in v.222

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12