/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC revision 172 by ph10, Tue Jun 5 10:40:13 2007 UTC
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 44  POSSIBILITY OF SUCH DAMAGE.
44  #include <locale.h>  #include <locale.h>
45  #include <errno.h>  #include <errno.h>
46    
47  #ifndef _WIN32  
48  #include <sys/resource.h>  /* A number of things vary for Windows builds. Originally, pcretest opened its
49    input and output without "b"; then I was told that "b" was needed in some
50    environments, so it was added for release 5.0 to both the input and output. (It
51    makes no difference on Unix-like systems.) Later I was told that it is wrong
52    for the input on Windows. I've now abstracted the modes into two macros that
53    are set here, to make it easier to fiddle with them, and removed "b" from the
54    input mode under Windows. */
55    
56    #if defined(_WIN32) || defined(WIN32)
57    #include <io.h>                /* For _setmode() */
58    #include <fcntl.h>             /* For _O_BINARY */
59    #define INPUT_MODE   "r"
60    #define OUTPUT_MODE  "wb"
61    
62    #else
63    #include <sys/time.h>          /* These two includes are needed */
64    #include <sys/resource.h>      /* for setrlimit(). */
65    #define INPUT_MODE   "rb"
66    #define OUTPUT_MODE  "wb"
67  #endif  #endif
68    
 #define PCRE_SPY        /* For Win32 build, import data, not export */  
69    
70  /* We include pcre_internal.h because we need the internal info for displaying  /* We have to include pcre_internal.h because we need the internal info for
71  the results of pcre_study() and we also need to know about the internal  displaying the results of pcre_study() and we also need to know about the
72  macros, structures, and other internal data values; pcretest has "inside  internal macros, structures, and other internal data values; pcretest has
73  information" compared to a program that strictly follows the PCRE API. */  "inside information" compared to a program that strictly follows the PCRE API.
74    
75    Although pcre_internal.h does itself include pcre.h, we explicitly include it
76    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
77    appropriately for an application, not for building PCRE. */
78    
79    #include "pcre.h"
80  #include "pcre_internal.h"  #include "pcre_internal.h"
81    
82  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to the data tables that PCRE uses. So as not to have to keep
# Line 74  symbols to prevent clashes. */ Line 96  symbols to prevent clashes. */
96    
97  /* We also need the pcre_printint() function for printing out compiled  /* We also need the pcre_printint() function for printing out compiled
98  patterns. This function is in a separate file so that it can be included in  patterns. This function is in a separate file so that it can be included in
99  pcre_compile.c when that module is compiled with debugging enabled. */  pcre_compile.c when that module is compiled with debugging enabled.
100    
101    The definition of the macro PRINTABLE, which determines whether to print an
102    output character as-is or as a hex value when showing compiled patterns, is
103    contained in this file. We uses it here also, in cases when the locale has not
104    been explicitly changed, so as to get consistent output from systems that
105    differ in their output from isprint() even in the "C" locale. */
106    
107  #include "pcre_printint.src"  #include "pcre_printint.src"
108    
109    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
110    
111    
112  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
113  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 87  Makefile. */ Line 117  Makefile. */
117  #include "pcreposix.h"  #include "pcreposix.h"
118  #endif  #endif
119    
120  /* It is also possible, for the benefit of the version imported into Exim, to  /* It is also possible, for the benefit of the version currently imported into
121  build pcretest without support for UTF8 (define NOUTF8), without the interface  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
122  to the DFA matcher (NODFA), and without the doublecheck of the old "info"  interface to the DFA matcher (NODFA), and without the doublecheck of the old
123  function (define NOINFOCHECK). */  "info" function (define NOINFOCHECK). In fact, we automatically cut out the
124    UTF8 support if PCRE is built without it. */
125    
126    #ifndef SUPPORT_UTF8
127    #ifndef NOUTF8
128    #define NOUTF8
129    #endif
130    #endif
131    
132    
133  /* Other parameters */  /* Other parameters */
# Line 103  function (define NOINFOCHECK). */ Line 140  function (define NOINFOCHECK). */
140  #endif  #endif
141  #endif  #endif
142    
143    /* This is the default loop count for timing. */
144    
145  #define LOOPREPEAT 500000  #define LOOPREPEAT 500000
146    
147  /* Static variables */  /* Static variables */
# Line 114  static int callout_extra; Line 153  static int callout_extra;
153  static int callout_fail_count;  static int callout_fail_count;
154  static int callout_fail_id;  static int callout_fail_id;
155  static int first_callout;  static int first_callout;
156    static int locale_set = 0;
157  static int show_malloc;  static int show_malloc;
158  static int use_utf8;  static int use_utf8;
159  static size_t gotten_store;  static size_t gotten_store;
# Line 157  uschar *here = start; Line 197  uschar *here = start;
197  for (;;)  for (;;)
198    {    {
199    int rlen = buffer_size - (here - buffer);    int rlen = buffer_size - (here - buffer);
200    
201    if (rlen > 1000)    if (rlen > 1000)
202      {      {
203      int dlen;      int dlen;
# Line 213  return NULL; /* Control never gets here Line 254  return NULL; /* Control never gets here
254    
255  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
256  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
257  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
258    
259  Arguments:  Arguments:
260    str           string to be converted    str           string to be converted
# Line 311  Arguments: Line 352  Arguments:
352  Returns:     number of characters placed in the buffer  Returns:     number of characters placed in the buffer
353  */  */
354    
355    #if !defined NOUTF8
356    
357  static int  static int
358  ord2utf8(int cvalue, uschar *utf8bytes)  ord2utf8(int cvalue, uschar *utf8bytes)
359  {  {
# Line 327  for (j = i; j > 0; j--) Line 370  for (j = i; j > 0; j--)
370  return i + 1;  return i + 1;
371  }  }
372    
373    #endif
374    
375    
376    
377  /*************************************************  /*************************************************
# Line 353  while (length-- > 0) Line 398  while (length-- > 0)
398        {        {
399        length -= rc - 1;        length -= rc - 1;
400        p += rc;        p += rc;
401        if (c < 256 && isprint(c))        if (PRINTHEX(c))
402          {          {
403          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
404          yield++;          yield++;
405          }          }
406        else        else
407          {          {
408          int n;          int n = 4;
409          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
410          yield += n;          yield += (n <= 0x000000ff)? 2 :
411                     (n <= 0x00000fff)? 3 :
412                     (n <= 0x0000ffff)? 4 :
413                     (n <= 0x000fffff)? 5 : 6;
414          }          }
415        continue;        continue;
416        }        }
# Line 371  while (length-- > 0) Line 419  while (length-- > 0)
419    
420     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
421    
422    if (isprint(c = *(p++)))    c = *p++;
423      if (PRINTHEX(c))
424      {      {
425      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
426      yield++;      yield++;
# Line 614  return count; Line 663  return count;
663  *************************************************/  *************************************************/
664    
665  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes, where
666  xxx is LF, CR, or CRLF. Print a message and return 0 if there is no match.  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
667    no match.
668    
669  Arguments:  Arguments:
670    p           points after the leading '<'    p           points after the leading '<'
# Line 629  check_newline(uschar *p, FILE *f) Line 679  check_newline(uschar *p, FILE *f)
679  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
680  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
681  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
682    if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
683    if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
684  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
685  return 0;  return 0;
686  }  }
# Line 636  return 0; Line 688  return 0;
688    
689    
690  /*************************************************  /*************************************************
691    *             Usage function                     *
692    *************************************************/
693    
694    static void
695    usage(void)
696    {
697    printf("Usage:     pcretest [options] [<input> [<output>]]\n");
698    printf("  -b       show compiled code (bytecode)\n");
699    printf("  -C       show PCRE compile-time options and exit\n");
700    printf("  -d       debug: show compiled code and information (-b and -i)\n");
701    #if !defined NODFA
702    printf("  -dfa     force DFA matching for all subjects\n");
703    #endif
704    printf("  -help    show usage information\n");
705    printf("  -i       show information about compiled patterns\n"
706           "  -m       output memory used information\n"
707           "  -o <n>   set size of offsets vector to <n>\n");
708    #if !defined NOPOSIX
709    printf("  -p       use POSIX interface\n");
710    #endif
711    printf("  -q       quiet: do not output PCRE version number at start\n");
712    printf("  -S <n>   set stack size to <n> megabytes\n");
713    printf("  -s       output store (memory) used information\n"
714           "  -t       time compilation and execution\n");
715    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
716    printf("  -tm      time execution (matching) only\n");
717    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
718    }
719    
720    
721    
722    /*************************************************
723  *                Main Program                    *  *                Main Program                    *
724  *************************************************/  *************************************************/
725    
# Line 650  int options = 0; Line 734  int options = 0;
734  int study_options = 0;  int study_options = 0;
735  int op = 1;  int op = 1;
736  int timeit = 0;  int timeit = 0;
737    int timeitm = 0;
738  int showinfo = 0;  int showinfo = 0;
739  int showstore = 0;  int showstore = 0;
740  int quiet = 0;  int quiet = 0;
# Line 681  buffer = (unsigned char *)malloc(buffer_ Line 766  buffer = (unsigned char *)malloc(buffer_
766  dbuffer = (unsigned char *)malloc(buffer_size);  dbuffer = (unsigned char *)malloc(buffer_size);
767  pbuffer = (unsigned char *)malloc(buffer_size);  pbuffer = (unsigned char *)malloc(buffer_size);
768    
769  /* The outfile variable is static so that new_malloc can use it. The _setmode()  /* The outfile variable is static so that new_malloc can use it. */
 stuff is some magic that I don't understand, but which apparently does good  
 things in Windows. It's related to line terminations.  */  
   
 #if defined(_WIN32) || defined(WIN32)  
 _setmode( _fileno( stdout ), 0x8000 );  
 #endif  /* defined(_WIN32) || defined(WIN32) */  
770    
771  outfile = stdout;  outfile = stdout;
772    
773    /* The following  _setmode() stuff is some Windows magic that tells its runtime
774    library to translate CRLF into a single LF character. At least, that's what
775    I've been told: never having used Windows I take this all on trust. Originally
776    it set 0x8000, but then I was advised that _O_BINARY was better. */
777    
778    #if defined(_WIN32) || defined(WIN32)
779    _setmode( _fileno( stdout ), _O_BINARY );
780    #endif
781    
782  /* Scan options */  /* Scan options */
783    
784  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
# Line 699  while (argc > 1 && argv[op][0] == '-') Line 787  while (argc > 1 && argv[op][0] == '-')
787    
788    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
789      showstore = 1;      showstore = 1;
   else if (strcmp(argv[op], "-t") == 0) timeit = 1;  
790    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
791      else if (strcmp(argv[op], "-b") == 0) debug = 1;
792    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
793    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
794  #if !defined NODFA  #if !defined NODFA
# Line 713  while (argc > 1 && argv[op][0] == '-') Line 801  while (argc > 1 && argv[op][0] == '-')
801      op++;      op++;
802      argc--;      argc--;
803      }      }
804      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
805        {
806        int both = argv[op][2] == 0;
807        int temp;
808        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
809                         *endptr == 0))
810          {
811          timeitm = temp;
812          op++;
813          argc--;
814          }
815        else timeitm = LOOPREPEAT;
816        if (both) timeit = timeitm;
817        }
818    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
819        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
820          *endptr == 0))          *endptr == 0))
821      {      {
822  #ifdef _WIN32  #if defined(_WIN32) || defined(WIN32)
823      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
824      exit(1);      exit(1);
825  #else  #else
# Line 749  while (argc > 1 && argv[op][0] == '-') Line 851  while (argc > 1 && argv[op][0] == '-')
851      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
852      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
853      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
854        (rc == '\n')? "LF" : "CRLF");        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
855          (rc == -2)? "ANYCRLF" :
856          (rc == -1)? "ANY" : "???");
857      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
858      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
859      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
# Line 760  while (argc > 1 && argv[op][0] == '-') Line 864  while (argc > 1 && argv[op][0] == '-')
864      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %d\n", rc);
865      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
866      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
867      exit(0);      goto EXIT;
868        }
869      else if (strcmp(argv[op], "-help") == 0 ||
870               strcmp(argv[op], "--help") == 0)
871        {
872        usage();
873        goto EXIT;
874      }      }
875    else    else
876      {      {
877      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
878      printf("Usage:   pcretest [options] [<input> [<output>]]\n");      usage();
     printf("  -C     show PCRE compile-time options and exit\n");  
     printf("  -d     debug: show compiled code; implies -i\n");  
 #if !defined NODFA  
     printf("  -dfa   force DFA matching for all subjects\n");  
 #endif  
     printf("  -i     show information about compiled pattern\n"  
            "  -m     output memory used information\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -S <n> set stack size to <n> megabytes\n");  
     printf("  -s     output store (memory) used information\n"  
            "  -t     time compilation and execution\n");  
879      yield = 1;      yield = 1;
880      goto EXIT;      goto EXIT;
881      }      }
# Line 794  offsets = (int *)malloc(size_offsets_max Line 890  offsets = (int *)malloc(size_offsets_max
890  if (offsets == NULL)  if (offsets == NULL)
891    {    {
892    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
893      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
894    yield = 1;    yield = 1;
895    goto EXIT;    goto EXIT;
896    }    }
# Line 803  if (offsets == NULL) Line 899  if (offsets == NULL)
899    
900  if (argc > 1)  if (argc > 1)
901    {    {
902    infile = fopen(argv[op], "rb");    infile = fopen(argv[op], INPUT_MODE);
903    if (infile == NULL)    if (infile == NULL)
904      {      {
905      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
# Line 814  if (argc > 1) Line 910  if (argc > 1)
910    
911  if (argc > 2)  if (argc > 2)
912    {    {
913    outfile = fopen(argv[op+1], "wb");    outfile = fopen(argv[op+1], OUTPUT_MODE);
914    if (outfile == NULL)    if (outfile == NULL)
915      {      {
916      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
# Line 854  while (!done) Line 950  while (!done)
950    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
951    int do_study = 0;    int do_study = 0;
952    int do_debug = debug;    int do_debug = debug;
953      int debug_lengths = 1;
954    int do_G = 0;    int do_G = 0;
955    int do_g = 0;    int do_g = 0;
956    int do_showinfo = showinfo;    int do_showinfo = showinfo;
957    int do_showrest = 0;    int do_showrest = 0;
958    int do_flip = 0;    int do_flip = 0;
959    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
960    
961    use_utf8 = 0;    use_utf8 = 0;
962    
# Line 969  while (!done) Line 1066  while (!done)
1066      }      }
1067    
1068    pp = p;    pp = p;
1069      poffset = p - buffer;
1070    
1071    for(;;)    for(;;)
1072      {      {
# Line 989  while (!done) Line 1087  while (!done)
1087      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1088      }      }
1089    
1090      /* The buffer may have moved while being extended; reset the start of data
1091      pointer to the correct relative point in the buffer. */
1092    
1093      p = buffer + poffset;
1094    
1095    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1096    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1097    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
# Line 1020  while (!done) Line 1123  while (!done)
1123    
1124        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1125        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1126          case 'B': do_debug = 1; break;
1127        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
1128        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1129        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
# Line 1037  while (!done) Line 1141  while (!done)
1141        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1142        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1143        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1144          case 'Z': debug_lengths = 0; break;
1145        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1146        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1147    
1148        case 'L':        case 'L':
1149        ppp = pp;        ppp = pp;
1150        /* The '\r' test here is so that it works on Windows */        /* The '\r' test here is so that it works on Windows. */
1151        while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;        /* The '0' test is just in case this is an unterminated line. */
1152          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1153        *ppp = 0;        *ppp = 0;
1154        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1155          {          {
1156          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1157          goto SKIP_DATA;          goto SKIP_DATA;
1158          }          }
1159          locale_set = 1;
1160        tables = pcre_maketables();        tables = pcre_maketables();
1161        pp = ppp;        pp = ppp;
1162        break;        break;
# Line 1116  while (!done) Line 1223  while (!done)
1223  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1224    
1225      {      {
1226      if (timeit)      if (timeit > 0)
1227        {        {
1228        register int i;        register int i;
1229        clock_t time_taken;        clock_t time_taken;
1230        clock_t start_time = clock();        clock_t start_time = clock();
1231        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1232          {          {
1233          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1234          if (re != NULL) free(re);          if (re != NULL) free(re);
1235          }          }
1236        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1237        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1238          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1239            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1240        }        }
1241    
# Line 1180  while (!done) Line 1287  while (!done)
1287    
1288      if (do_study)      if (do_study)
1289        {        {
1290        if (timeit)        if (timeit > 0)
1291          {          {
1292          register int i;          register int i;
1293          clock_t time_taken;          clock_t time_taken;
1294          clock_t start_time = clock();          clock_t start_time = clock();
1295          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
1296            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
1297          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1298          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
1299          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
1300            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeit) /
1301              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
1302          }          }
1303        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 1233  while (!done) Line 1340  while (!done)
1340    
1341      SHOW_INFO:      SHOW_INFO:
1342    
1343        if (do_debug)
1344          {
1345          fprintf(outfile, "------------------------------------------------------------------\n");
1346          pcre_printint(re, outfile, debug_lengths);
1347          }
1348    
1349      if (do_showinfo)      if (do_showinfo)
1350        {        {
1351        unsigned long int get_options, all_options;        unsigned long int get_options, all_options;
1352  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1353        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1354  #endif  #endif
1355        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char, okpartial, jchanged;
1356        int nameentrysize, namecount;        int nameentrysize, namecount;
1357        const uschar *nametable;        const uschar *nametable;
1358    
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         pcre_printint(re, outfile);  
         }  
   
1359        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1360        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1361        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
# Line 1258  while (!done) Line 1365  while (!done)
1365        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1366        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1367        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1368          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1369          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1370    
1371  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1372        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
# Line 1299  while (!done) Line 1408  while (!done)
1408            }            }
1409          }          }
1410    
1411        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
       to fish it out via out back door */  
1412    
1413        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1414        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
          }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
1415    
1416        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1417          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
# Line 1327  while (!done) Line 1429  while (!done)
1429            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1430            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1431    
1432        switch (get_options & PCRE_NEWLINE_CRLF)        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1433    
1434          switch (get_options & PCRE_NEWLINE_BITS)
1435          {          {
1436          case PCRE_NEWLINE_CR:          case PCRE_NEWLINE_CR:
1437          fprintf(outfile, "Forced newline sequence: CR\n");          fprintf(outfile, "Forced newline sequence: CR\n");
# Line 1341  while (!done) Line 1445  while (!done)
1445          fprintf(outfile, "Forced newline sequence: CRLF\n");          fprintf(outfile, "Forced newline sequence: CRLF\n");
1446          break;          break;
1447    
1448            case PCRE_NEWLINE_ANYCRLF:
1449            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1450            break;
1451    
1452            case PCRE_NEWLINE_ANY:
1453            fprintf(outfile, "Forced newline sequence: ANY\n");
1454            break;
1455    
1456          default:          default:
1457          break;          break;
1458          }          }
# Line 1358  while (!done) Line 1470  while (!done)
1470          int ch = first_char & 255;          int ch = first_char & 255;
1471          const char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1472            "" : " (caseless)";            "" : " (caseless)";
1473          if (isprint(ch))          if (PRINTHEX(ch))
1474            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1475          else          else
1476            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 1373  while (!done) Line 1485  while (!done)
1485          int ch = need_char & 255;          int ch = need_char & 255;
1486          const char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1487            "" : " (caseless)";            "" : " (caseless)";
1488          if (isprint(ch))          if (PRINTHEX(ch))
1489            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1490          else          else
1491            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
# Line 1409  while (!done) Line 1521  while (!done)
1521                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
1522                    c = 2;                    c = 2;
1523                    }                    }
1524                  if (isprint(i) && i != ' ')                  if (PRINTHEX(i) && i != ' ')
1525                    {                    {
1526                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
1527                    c += 2;                    c += 2;
# Line 1468  while (!done) Line 1580  while (!done)
1580                  strerror(errno));                  strerror(errno));
1581                }                }
1582              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
1583    
1584              }              }
1585            }            }
1586          fclose(f);          fclose(f);
# Line 1485  while (!done) Line 1598  while (!done)
1598    for (;;)    for (;;)
1599      {      {
1600      uschar *q;      uschar *q;
1601      uschar *bptr = dbuffer;      uschar *bptr;
1602      int *use_offsets = offsets;      int *use_offsets = offsets;
1603      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1604      int callout_data = 0;      int callout_data = 0;
# Line 1541  while (!done) Line 1654  while (!done)
1654      p = buffer;      p = buffer;
1655      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1656    
1657      q = dbuffer;      bptr = q = dbuffer;
1658      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1659        {        {
1660        int i = 0;        int i = 0;
# Line 1736  while (!done) Line 1849  while (!done)
1849            if (offsets == NULL)            if (offsets == NULL)
1850              {              {
1851              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1852                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1853              yield = 1;              yield = 1;
1854              goto EXIT;              goto EXIT;
1855              }              }
# Line 1866  while (!done) Line 1979  while (!done)
1979    
1980      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
1981        {        {
1982        if (timeit)        if (timeitm > 0)
1983          {          {
1984          register int i;          register int i;
1985          clock_t time_taken;          clock_t time_taken;
# Line 1876  while (!done) Line 1989  while (!done)
1989          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
1990            {            {
1991            int workspace[1000];            int workspace[1000];
1992            for (i = 0; i < LOOPREPEAT; i++)            for (i = 0; i < timeitm; i++)
1993              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1994                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
1995                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
# Line 1884  while (!done) Line 1997  while (!done)
1997          else          else
1998  #endif  #endif
1999    
2000          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeitm; i++)
2001            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2002              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2003    
2004          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2005          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2006            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
2007              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2008          }          }
2009    
# Line 1966  while (!done) Line 2079  while (!done)
2079    
2080        if (count >= 0)        if (count >= 0)
2081          {          {
2082          int i;          int i, maxcount;
2083    
2084    #if !defined NODFA
2085            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2086    #endif
2087              maxcount = use_size_offsets/3;
2088    
2089            /* This is a check against a lunatic return value. */
2090    
2091            if (count > maxcount)
2092              {
2093              fprintf(outfile,
2094                "** PCRE error: returned count %d is too big for offset size %d\n",
2095                count, use_size_offsets);
2096              count = use_size_offsets/3;
2097              if (do_g || do_G)
2098                {
2099                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2100                do_g = do_G = FALSE;        /* Break g/G loop */
2101                }
2102              }
2103    
2104          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2105            {            {
2106            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 2084  while (!done) Line 2218  while (!done)
2218          }          }
2219    
2220        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2221        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2222        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2223        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2224        offset values to achieve this. We won't be at the end of the string -  
2225        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any" or
2226          "anycrlf". If the previous match was at the end of a line terminated by
2227          CRLF, an advance of one character just passes the \r, whereas we should
2228          prefer the longer newline sequence, as does the code in pcre_exec().
2229          Fudge the offset value to achieve this.
2230    
2231          Otherwise, in the case of UTF-8 matching, the advance must be one
2232          character, not one byte. */
2233    
2234        else        else
2235          {          {
2236          if (g_notempty != 0)          if (g_notempty != 0)
2237            {            {
2238            int onechar = 1;            int onechar = 1;
2239              unsigned int obits = ((real_pcre *)re)->options;
2240            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2241            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2242                {
2243                int d;
2244                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2245                obits = (d == '\r')? PCRE_NEWLINE_CR :
2246                        (d == '\n')? PCRE_NEWLINE_LF :
2247                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2248                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2249                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2250                }
2251              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2252                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2253                  &&
2254                  start_offset < len - 1 &&
2255                  bptr[start_offset] == '\r' &&
2256                  bptr[start_offset+1] == '\n')
2257                onechar++;
2258              else if (use_utf8)
2259              {              {
2260              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2261                {                {
# Line 2131  while (!done) Line 2290  while (!done)
2290        character. */        character. */
2291    
2292        g_notempty = 0;        g_notempty = 0;
2293    
2294        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2295          {          {
2296          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
# Line 2165  while (!done) Line 2325  while (!done)
2325      {      {
2326      new_free((void *)tables);      new_free((void *)tables);
2327      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2328        locale_set = 0;
2329      }      }
2330    }    }
2331    

Legend:
Removed from v.91  
changed lines
  Added in v.172

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12