/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 149 by ph10, Mon Apr 16 15:28:08 2007 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #  include <config.h>
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45  #include <locale.h>  #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53  #include <unistd.h>  #ifdef HAVE_UNISTD_H
54    #  include <unistd.h>
55    #endif
56    
57  #include "config.h"  #include <pcre.h>
 #include "pcre.h"  
58    
59  #define FALSE 0  #define FALSE 0
60  #define TRUE 1  #define TRUE 1
61    
62  typedef int BOOL;  typedef int BOOL;
63    
 #define VERSION "4.2 09-Jan-2006"  
64  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
65    
66  #if BUFSIZ > 8192  #if BUFSIZ > 8192
# Line 65  typedef int BOOL; Line 69  typedef int BOOL;
69  #define MBUFTHIRD 8192  #define MBUFTHIRD 8192
70  #endif  #endif
71    
   
72  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
73  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
74  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
# Line 83  enum { DEE_READ, DEE_SKIP }; Line 86  enum { DEE_READ, DEE_SKIP };
86  #define PO_LINE_MATCH     0x0002  #define PO_LINE_MATCH     0x0002
87  #define PO_FIXED_STRINGS  0x0004  #define PO_FIXED_STRINGS  0x0004
88    
89    /* Line ending types */
90    
91    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
92    
93    
94    
95  /*************************************************  /*************************************************
# Line 94  regular code. */ Line 101  regular code. */
101    
102  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
103  static int S_arg = -1;  static int S_arg = -1;
104    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
105    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
106    static const char *jfriedl_prefix = "";
107    static const char *jfriedl_postfix = "";
108  #endif  #endif
109    
110    static int  endlinetype;
111    
112  static char *colour_string = (char *)"1;31";  static char *colour_string = (char *)"1;31";
113  static char *colour_option = NULL;  static char *colour_option = NULL;
114  static char *dee_option = NULL;  static char *dee_option = NULL;
115  static char *DEE_option = NULL;  static char *DEE_option = NULL;
116    static char *newline = NULL;
117  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
118  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
119  static char *locale = NULL;  static char *locale = NULL;
# Line 107  static char *locale = NULL; Line 121  static char *locale = NULL;
121  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
122    
123  static int  pattern_count = 0;  static int  pattern_count = 0;
124  static pcre **pattern_list;  static pcre **pattern_list = NULL;
125  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
126    
127  static char *include_pattern = NULL;  static char *include_pattern = NULL;
128  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
# Line 134  static BOOL number = FALSE; Line 148  static BOOL number = FALSE;
148  static BOOL only_matching = FALSE;  static BOOL only_matching = FALSE;
149  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
150  static BOOL silent = FALSE;  static BOOL silent = FALSE;
151    static BOOL utf8 = FALSE;
152    
153  /* Structure for options and list of them */  /* Structure for options and list of them */
154    
# Line 181  static option_item optionlist[] = { Line 196  static option_item optionlist[] = {
196    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
197    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
198    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
199      { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
200    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
201    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
202    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
# Line 210  static const char *prefix[] = { Line 226  static const char *prefix[] = {
226  static const char *suffix[] = {  static const char *suffix[] = {
227    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
228    
229    /* UTF-8 tables - used only when the newline setting is "any". */
230    
231    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
232    
233    const char utf8_table4[] = {
234      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
235      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
236      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
237      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
238    
239    
240    
241  /*************************************************  /*************************************************
# Line 222  although at present the only ones are fo Line 248  although at present the only ones are fo
248    
249  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
250    
251  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
252  #include <sys/types.h>  #include <sys/types.h>
253  #include <sys/stat.h>  #include <sys/stat.h>
254  #include <dirent.h>  #include <dirent.h>
# Line 292  Lionel Fourquaux. David Burgess added a Line 318  Lionel Fourquaux. David Burgess added a
318  when it did not exist. */  when it did not exist. */
319    
320    
321  #elif HAVE_WIN32API  #elif HAVE_WINDOWS_H
322    
323  #ifndef STRICT  #ifndef STRICT
324  # define STRICT  # define STRICT
# Line 414  FALSE; Line 440  FALSE;
440  typedef void directory_type;  typedef void directory_type;
441    
442  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
443  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
444  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
445  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
446    
447    
# Line 439  return FALSE; Line 465  return FALSE;
465    
466    
467    
468  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
469  /*************************************************  /*************************************************
470  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
471  *************************************************/  *************************************************/
# Line 462  return sys_errlist[n]; Line 488  return sys_errlist[n];
488    
489    
490  /*************************************************  /*************************************************
491    *             Find end of line                   *
492    *************************************************/
493    
494    /* The length of the endline sequence that is found is set via lenptr. This may
495    be zero at the very end of the file if there is no line-ending sequence there.
496    
497    Arguments:
498      p         current position in line
499      endptr    end of available data
500      lenptr    where to put the length of the eol sequence
501    
502    Returns:    pointer to the last byte of the line
503    */
504    
505    static char *
506    end_of_line(char *p, char *endptr, int *lenptr)
507    {
508    switch(endlinetype)
509      {
510      default:      /* Just in case */
511      case EL_LF:
512      while (p < endptr && *p != '\n') p++;
513      if (p < endptr)
514        {
515        *lenptr = 1;
516        return p + 1;
517        }
518      *lenptr = 0;
519      return endptr;
520    
521      case EL_CR:
522      while (p < endptr && *p != '\r') p++;
523      if (p < endptr)
524        {
525        *lenptr = 1;
526        return p + 1;
527        }
528      *lenptr = 0;
529      return endptr;
530    
531      case EL_CRLF:
532      for (;;)
533        {
534        while (p < endptr && *p != '\r') p++;
535        if (++p >= endptr)
536          {
537          *lenptr = 0;
538          return endptr;
539          }
540        if (*p == '\n')
541          {
542          *lenptr = 2;
543          return p + 1;
544          }
545        }
546      break;
547    
548      case EL_ANYCRLF:
549      while (p < endptr)
550        {
551        int extra = 0;
552        register int c = *((unsigned char *)p);
553    
554        if (utf8 && c >= 0xc0)
555          {
556          int gcii, gcss;
557          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
558          gcss = 6*extra;
559          c = (c & utf8_table3[extra]) << gcss;
560          for (gcii = 1; gcii <= extra; gcii++)
561            {
562            gcss -= 6;
563            c |= (p[gcii] & 0x3f) << gcss;
564            }
565          }
566    
567        p += 1 + extra;
568    
569        switch (c)
570          {
571          case 0x0a:    /* LF */
572          *lenptr = 1;
573          return p;
574    
575          case 0x0d:    /* CR */
576          if (p < endptr && *p == 0x0a)
577            {
578            *lenptr = 2;
579            p++;
580            }
581          else *lenptr = 1;
582          return p;
583    
584          default:
585          break;
586          }
587        }   /* End of loop for ANYCRLF case */
588    
589      *lenptr = 0;  /* Must have hit the end */
590      return endptr;
591    
592      case EL_ANY:
593      while (p < endptr)
594        {
595        int extra = 0;
596        register int c = *((unsigned char *)p);
597    
598        if (utf8 && c >= 0xc0)
599          {
600          int gcii, gcss;
601          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
602          gcss = 6*extra;
603          c = (c & utf8_table3[extra]) << gcss;
604          for (gcii = 1; gcii <= extra; gcii++)
605            {
606            gcss -= 6;
607            c |= (p[gcii] & 0x3f) << gcss;
608            }
609          }
610    
611        p += 1 + extra;
612    
613        switch (c)
614          {
615          case 0x0a:    /* LF */
616          case 0x0b:    /* VT */
617          case 0x0c:    /* FF */
618          *lenptr = 1;
619          return p;
620    
621          case 0x0d:    /* CR */
622          if (p < endptr && *p == 0x0a)
623            {
624            *lenptr = 2;
625            p++;
626            }
627          else *lenptr = 1;
628          return p;
629    
630          case 0x85:    /* NEL */
631          *lenptr = utf8? 2 : 1;
632          return p;
633    
634          case 0x2028:  /* LS */
635          case 0x2029:  /* PS */
636          *lenptr = 3;
637          return p;
638    
639          default:
640          break;
641          }
642        }   /* End of loop for ANY case */
643    
644      *lenptr = 0;  /* Must have hit the end */
645      return endptr;
646      }     /* End of overall switch */
647    }
648    
649    
650    
651    /*************************************************
652    *         Find start of previous line            *
653    *************************************************/
654    
655    /* This is called when looking back for before lines to print.
656    
657    Arguments:
658      p         start of the subsequent line
659      startptr  start of available data
660    
661    Returns:    pointer to the start of the previous line
662    */
663    
664    static char *
665    previous_line(char *p, char *startptr)
666    {
667    switch(endlinetype)
668      {
669      default:      /* Just in case */
670      case EL_LF:
671      p--;
672      while (p > startptr && p[-1] != '\n') p--;
673      return p;
674    
675      case EL_CR:
676      p--;
677      while (p > startptr && p[-1] != '\n') p--;
678      return p;
679    
680      case EL_CRLF:
681      for (;;)
682        {
683        p -= 2;
684        while (p > startptr && p[-1] != '\n') p--;
685        if (p <= startptr + 1 || p[-2] == '\r') return p;
686        }
687      return p;   /* But control should never get here */
688    
689      case EL_ANY:
690      case EL_ANYCRLF:
691      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
692      if (utf8) while ((*p & 0xc0) == 0x80) p--;
693    
694      while (p > startptr)
695        {
696        register int c;
697        char *pp = p - 1;
698    
699        if (utf8)
700          {
701          int extra = 0;
702          while ((*pp & 0xc0) == 0x80) pp--;
703          c = *((unsigned char *)pp);
704          if (c >= 0xc0)
705            {
706            int gcii, gcss;
707            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
708            gcss = 6*extra;
709            c = (c & utf8_table3[extra]) << gcss;
710            for (gcii = 1; gcii <= extra; gcii++)
711              {
712              gcss -= 6;
713              c |= (pp[gcii] & 0x3f) << gcss;
714              }
715            }
716          }
717        else c = *((unsigned char *)pp);
718    
719        if (endlinetype == EL_ANYCRLF) switch (c)
720          {
721          case 0x0a:    /* LF */
722          case 0x0d:    /* CR */
723          return p;
724    
725          default:
726          break;
727          }
728    
729        else switch (c)
730          {
731          case 0x0a:    /* LF */
732          case 0x0b:    /* VT */
733          case 0x0c:    /* FF */
734          case 0x0d:    /* CR */
735          case 0x85:    /* NEL */
736          case 0x2028:  /* LS */
737          case 0x2029:  /* PS */
738          return p;
739    
740          default:
741          break;
742          }
743    
744        p = pp;  /* Back one character */
745        }        /* End of loop for ANY case */
746    
747      return startptr;  /* Hit start of data */
748      }     /* End of overall switch */
749    }
750    
751    
752    
753    
754    
755    /*************************************************
756  *       Print the previous "after" lines         *  *       Print the previous "after" lines         *
757  *************************************************/  *************************************************/
758    
# Line 486  if (after_context > 0 && lastmatchnumber Line 777  if (after_context > 0 && lastmatchnumber
777    int count = 0;    int count = 0;
778    while (lastmatchrestart < endptr && count++ < after_context)    while (lastmatchrestart < endptr && count++ < after_context)
779      {      {
780        int ellength;
781      char *pp = lastmatchrestart;      char *pp = lastmatchrestart;
782      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
783      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
784      while (*pp != '\n') pp++;      pp = end_of_line(pp, endptr, &ellength);
785      fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
786      lastmatchrestart = pp + 1;      lastmatchrestart = pp;
787      }      }
788    hyphenpending = TRUE;    hyphenpending = TRUE;
789    }    }
# Line 548  way, the buffer is shifted left and re-f Line 840  way, the buffer is shifted left and re-f
840    
841  while (ptr < endptr)  while (ptr < endptr)
842    {    {
843    int i;    int i, endlinelength;
844    int mrc = 0;    int mrc = 0;
845    BOOL match = FALSE;    BOOL match = FALSE;
846    char *t = ptr;    char *t = ptr;
# Line 561  while (ptr < endptr) Line 853  while (ptr < endptr)
853    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
854    that any match is constrained to be in the first line. */    that any match is constrained to be in the first line. */
855    
856    linelength = 0;    t = end_of_line(t, endptr, &endlinelength);
857    while (t < endptr && *t++ != '\n') linelength++;    linelength = t - ptr - endlinelength;
858    length = multiline? endptr - ptr : linelength;    length = multiline? endptr - ptr : linelength;
859    
860      /* Extra processing for Jeffrey Friedl's debugging. */
861    
862    #ifdef JFRIEDL_DEBUG
863      if (jfriedl_XT || jfriedl_XR)
864      {
865          #include <sys/time.h>
866          #include <time.h>
867          struct timeval start_time, end_time;
868          struct timezone dummy;
869    
870          if (jfriedl_XT)
871          {
872              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
873              const char *orig = ptr;
874              ptr = malloc(newlen + 1);
875              if (!ptr) {
876                      printf("out of memory");
877                      exit(2);
878              }
879              endptr = ptr;
880              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
881              for (i = 0; i < jfriedl_XT; i++) {
882                      strncpy(endptr, orig,  length);
883                      endptr += length;
884              }
885              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
886              length = newlen;
887          }
888    
889          if (gettimeofday(&start_time, &dummy) != 0)
890                  perror("bad gettimeofday");
891    
892    
893          for (i = 0; i < jfriedl_XR; i++)
894              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
895    
896          if (gettimeofday(&end_time, &dummy) != 0)
897                  perror("bad gettimeofday");
898    
899          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
900                          -
901                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
902    
903          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
904          return 0;
905      }
906    #endif
907    
908    
909    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches. Note that we don't include
910    the final newline in the subject string. */    the final newline in the subject string. */
911    
# Line 646  while (ptr < endptr) Line 987  while (ptr < endptr)
987    
988        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
989          {          {
990            int ellength;
991          int linecount = 0;          int linecount = 0;
992          char *p = lastmatchrestart;          char *p = lastmatchrestart;
993    
994          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
995            {            {
996            while (*p != '\n') p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
997            linecount++;            linecount++;
998            }            }
999    
# Line 665  while (ptr < endptr) Line 1006  while (ptr < endptr)
1006            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
1007            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1008            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1009            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1010            fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1011            lastmatchrestart = pp + 1;            lastmatchrestart = pp;
1012            }            }
1013          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
1014          }          }
# Line 693  while (ptr < endptr) Line 1034  while (ptr < endptr)
1034                 linecount < before_context)                 linecount < before_context)
1035            {            {
1036            linecount++;            linecount++;
1037            p--;            p = previous_line(p, buffer);
           while (p > buffer && p[-1] != '\n') p--;  
1038            }            }
1039    
1040          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 702  while (ptr < endptr) Line 1042  while (ptr < endptr)
1042    
1043          while (p < ptr)          while (p < ptr)
1044            {            {
1045              int ellength;
1046            char *pp = p;            char *pp = p;
1047            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1048            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1049            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1050            fwrite(p, 1, pp - p + 1, stdout);   /* In case binary zero */            fwrite(p, 1, pp - p, stdout);
1051            p = pp + 1;            p = pp;
1052            }            }
1053          }          }
1054    
# Line 723  while (ptr < endptr) Line 1064  while (ptr < endptr)
1064        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1065        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1066        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately. Because the PCRE_FIRSTLINE option is set, the
1067        start of the match will always be before the first \n character. */        start of the match will always be before the first newline sequence. */
1068    
1069        if (multiline)        if (multiline)
1070          {          {
1071            int ellength;
1072          char *endmatch = ptr + offsets[1];          char *endmatch = ptr + offsets[1];
1073          t = ptr;          t = ptr;
1074          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          while (t < endmatch)
1075          while (endmatch < endptr && *endmatch != '\n') endmatch++;            {
1076          linelength = endmatch - ptr;            t = end_of_line(t, endptr, &ellength);
1077              if (t <= endmatch) linenumber++; else break;
1078              }
1079            endmatch = end_of_line(endmatch, endptr, &ellength);
1080            linelength = endmatch - ptr - ellength;
1081          }          }
1082    
1083        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 763  while (ptr < endptr) Line 1109  while (ptr < endptr)
1109          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1110          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
1111          }          }
1112        else fwrite(ptr, 1, linelength, stdout);        else fwrite(ptr, 1, linelength + endlinelength, stdout);
   
       fprintf(stdout, "\n");  
1113        }        }
1114    
1115      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match */
# Line 775  while (ptr < endptr) Line 1119  while (ptr < endptr)
1119      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1120      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1121    
1122      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1123      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1124      }      }
1125    
1126    /* Advance to after the newline and increment the line number. */    /* Advance to after the newline and increment the line number. */
1127    
1128    ptr += linelength + 1;    ptr += linelength + endlinelength;
1129    linenumber++;    linenumber++;
1130    
1131    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
# Line 1037  switch(letter) Line 1381  switch(letter)
1381    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1382    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
1383    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1384    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1385    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1386    case 'w': process_options |= PO_WORD_MATCH; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1387    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1388    
1389    case 'V':    case 'V':
1390    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
1391    exit(0);    exit(0);
1392    break;    break;
1393    
# Line 1120  sprintf(buffer, "%s%.*s%s", prefix[proce Line 1463  sprintf(buffer, "%s%.*s%s", prefix[proce
1463    suffix[process_options]);    suffix[process_options]);
1464  pattern_list[pattern_count] =  pattern_list[pattern_count] =
1465    pcre_compile(buffer, options, &error, &errptr, pcretables);    pcre_compile(buffer, options, &error, &errptr, pcretables);
1466  if (pattern_list[pattern_count++] != NULL) return TRUE;  if (pattern_list[pattern_count] != NULL)
1467      {
1468      pattern_count++;
1469      return TRUE;
1470      }
1471    
1472  /* Handle compile errors */  /* Handle compile errors */
1473    
# Line 1152  return FALSE; Line 1499  return FALSE;
1499  *************************************************/  *************************************************/
1500    
1501  /* When the -F option has been used, each string may be a list of strings,  /* When the -F option has been used, each string may be a list of strings,
1502  separated by newlines. They will be matched literally.  separated by line breaks. They will be matched literally.
1503    
1504  Arguments:  Arguments:
1505    pattern        the pattern string    pattern        the pattern string
# Line 1170  compile_pattern(char *pattern, int optio Line 1517  compile_pattern(char *pattern, int optio
1517  {  {
1518  if ((process_options & PO_FIXED_STRINGS) != 0)  if ((process_options & PO_FIXED_STRINGS) != 0)
1519    {    {
1520      char *eop = pattern + strlen(pattern);
1521    char buffer[MBUFTHIRD];    char buffer[MBUFTHIRD];
1522    for(;;)    for(;;)
1523      {      {
1524      char *p = strchr(pattern, '\n');      int ellength;
1525      if (p == NULL)      char *p = end_of_line(pattern, eop, &ellength);
1526        if (ellength == 0)
1527        return compile_single_pattern(pattern, options, filename, count);        return compile_single_pattern(pattern, options, filename, count);
1528      sprintf(buffer, "%.*s", p - pattern, pattern);      sprintf(buffer, "%.*s", p - pattern - ellength, pattern);
1529      pattern = p + 1;      pattern = p;
1530      if (!compile_single_pattern(buffer, options, filename, count))      if (!compile_single_pattern(buffer, options, filename, count))
1531        return FALSE;        return FALSE;
1532      }      }
# Line 1200  int i, j; Line 1549  int i, j;
1549  int rc = 1;  int rc = 1;
1550  int pcre_options = 0;  int pcre_options = 0;
1551  int cmd_pattern_count = 0;  int cmd_pattern_count = 0;
1552    int hint_count = 0;
1553  int errptr;  int errptr;
1554  BOOL only_one_at_top;  BOOL only_one_at_top;
1555  char *patterns[MAX_PATTERN_COUNT];  char *patterns[MAX_PATTERN_COUNT];
1556  const char *locale_from = "--locale";  const char *locale_from = "--locale";
1557  const char *error;  const char *error;
1558    
1559    /* Set the default line ending value from the default in the PCRE library;
1560    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1561    */
1562    
1563    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1564    switch(i)
1565      {
1566      default:                 newline = (char *)"lf"; break;
1567      case '\r':               newline = (char *)"cr"; break;
1568      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1569      case -1:                 newline = (char *)"any"; break;
1570      case -2:                 newline = (char *)"anycrlf"; break;
1571      }
1572    
1573  /* Process the options */  /* Process the options */
1574    
1575  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
# Line 1294  for (i = 1; i < argc; i++) Line 1658  for (i = 1; i < argc; i++)
1658        }        }
1659      }      }
1660    
1661    
1662      /* Jeffrey Friedl's debugging harness uses these additional options which
1663      are not in the right form for putting in the option table because they use
1664      only one hyphen, yet are more than one character long. By putting them
1665      separately here, they will not get displayed as part of the help() output,
1666      but I don't think Jeffrey will care about that. */
1667    
1668    #ifdef JFRIEDL_DEBUG
1669      else if (strcmp(argv[i], "-pre") == 0) {
1670              jfriedl_prefix = argv[++i];
1671              continue;
1672      } else if (strcmp(argv[i], "-post") == 0) {
1673              jfriedl_postfix = argv[++i];
1674              continue;
1675      } else if (strcmp(argv[i], "-XT") == 0) {
1676              sscanf(argv[++i], "%d", &jfriedl_XT);
1677              continue;
1678      } else if (strcmp(argv[i], "-XR") == 0) {
1679              sscanf(argv[++i], "%d", &jfriedl_XR);
1680              continue;
1681      }
1682    #endif
1683    
1684    
1685    /* One-char options; many that have no data may be in a single argument; we    /* One-char options; many that have no data may be in a single argument; we
1686    continue till we hit the last one or one that needs data. */    continue till we hit the last one or one that needs data. */
1687    
# Line 1333  for (i = 1; i < argc; i++) Line 1721  for (i = 1; i < argc; i++)
1721    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1722    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
1723    separate item. At the moment, the only such options are "colo(u)r" and    separate item. At the moment, the only such options are "colo(u)r" and
1724    Jeffrey Friedl's special debugging option. */    Jeffrey Friedl's special -S debugging option. */
1725    
1726    if (*option_data == 0 &&    if (*option_data == 0 &&
1727        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
# Line 1465  if (colour_option != NULL && strcmp(colo Line 1853  if (colour_option != NULL && strcmp(colo
1853      }      }
1854    }    }
1855    
1856    /* Interpret the newline type; the default settings are Unix-like. */
1857    
1858    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1859      {
1860      pcre_options |= PCRE_NEWLINE_CR;
1861      endlinetype = EL_CR;
1862      }
1863    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1864      {
1865      pcre_options |= PCRE_NEWLINE_LF;
1866      endlinetype = EL_LF;
1867      }
1868    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1869      {
1870      pcre_options |= PCRE_NEWLINE_CRLF;
1871      endlinetype = EL_CRLF;
1872      }
1873    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1874      {
1875      pcre_options |= PCRE_NEWLINE_ANY;
1876      endlinetype = EL_ANY;
1877      }
1878    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1879      {
1880      pcre_options |= PCRE_NEWLINE_ANYCRLF;
1881      endlinetype = EL_ANYCRLF;
1882      }
1883    else
1884      {
1885      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1886      return 2;
1887      }
1888    
1889  /* Interpret the text values for -d and -D */  /* Interpret the text values for -d and -D */
1890    
1891  if (dee_option != NULL)  if (dee_option != NULL)
# Line 1490  if (DEE_option != NULL) Line 1911  if (DEE_option != NULL)
1911      }      }
1912    }    }
1913    
1914  /* Check the value for Jeff Friedl's debugging option. */  /* Check the values for Jeffrey Friedl's debugging options. */
1915    
1916  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
1917  if (S_arg > 9)  if (S_arg > 9)
# Line 1498  if (S_arg > 9) Line 1919  if (S_arg > 9)
1919    fprintf(stderr, "pcregrep: bad value for -S option\n");    fprintf(stderr, "pcregrep: bad value for -S option\n");
1920    return 2;    return 2;
1921    }    }
1922    if (jfriedl_XT != 0 || jfriedl_XR != 0)
1923      {
1924      if (jfriedl_XT == 0) jfriedl_XT = 1;
1925      if (jfriedl_XR == 0) jfriedl_XR = 1;
1926      }
1927  #endif  #endif
1928    
1929  /* Get memory to store the pattern and hints lists. */  /* Get memory to store the pattern and hints lists. */
# Line 1508  hints_list = (pcre_extra **)malloc(MAX_P Line 1934  hints_list = (pcre_extra **)malloc(MAX_P
1934  if (pattern_list == NULL || hints_list == NULL)  if (pattern_list == NULL || hints_list == NULL)
1935    {    {
1936    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
1937    return 2;    goto EXIT2;
1938    }    }
1939    
1940  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there is no file provided by -f,
# Line 1527  for (j = 0; j < cmd_pattern_count; j++) Line 1953  for (j = 0; j < cmd_pattern_count; j++)
1953    {    {
1954    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(patterns[j], pcre_options, NULL,
1955         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1956      return 2;      goto EXIT2;
1957    }    }
1958    
1959  /* Compile the regular expressions that are provided in a file. */  /* Compile the regular expressions that are provided in a file. */
# Line 1551  if (pattern_filename != NULL) Line 1977  if (pattern_filename != NULL)
1977        {        {
1978        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1979          strerror(errno));          strerror(errno));
1980        return 2;        goto EXIT2;
1981        }        }
1982      filename = pattern_filename;      filename = pattern_filename;
1983      }      }
# Line 1564  if (pattern_filename != NULL) Line 1990  if (pattern_filename != NULL)
1990      linenumber++;      linenumber++;
1991      if (buffer[0] == 0) continue;   /* Skip blank lines */      if (buffer[0] == 0) continue;   /* Skip blank lines */
1992      if (!compile_pattern(buffer, pcre_options, filename, linenumber))      if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1993        return 2;        goto EXIT2;
1994      }      }
1995    
1996    if (f != stdin) fclose(f);    if (f != stdin) fclose(f);
# Line 1580  for (j = 0; j < pattern_count; j++) Line 2006  for (j = 0; j < pattern_count; j++)
2006      char s[16];      char s[16];
2007      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2008      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2009      return 2;      goto EXIT2;
2010      }      }
2011      hint_count++;
2012    }    }
2013    
2014  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
# Line 1594  if (exclude_pattern != NULL) Line 2021  if (exclude_pattern != NULL)
2021      {      {
2022      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2023        errptr, error);        errptr, error);
2024      return 2;      goto EXIT2;
2025      }      }
2026    }    }
2027    
# Line 1606  if (include_pattern != NULL) Line 2033  if (include_pattern != NULL)
2033      {      {
2034      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2035        errptr, error);        errptr, error);
2036      return 2;      goto EXIT2;
2037      }      }
2038    }    }
2039    
2040  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */
2041    
2042  if (i >= argc)  if (i >= argc)
2043    return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    {
2044      rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2045      goto EXIT;
2046      }
2047    
2048  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2049  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
# Line 1630  for (; i < argc; i++) Line 2060  for (; i < argc; i++)
2060      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
2061    }    }
2062    
2063    EXIT:
2064    if (pattern_list != NULL)
2065      {
2066      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2067      free(pattern_list);
2068      }
2069    if (hints_list != NULL)
2070      {
2071      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2072      free(hints_list);
2073      }
2074  return rc;  return rc;
2075    
2076    EXIT2:
2077    rc = 2;
2078    goto EXIT;
2079  }  }
2080    
2081  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.87  
changed lines
  Added in v.149

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12