/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 73 by nigel, Sat Feb 24 21:40:30 2007 UTC revision 75 by nigel, Sat Feb 24 21:40:37 2007 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather untidy in places. */  been extended and consequently is now rather untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
# Line 12  been extended and consequently is now ra Line 42  been extended and consequently is now ra
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44  #include <locale.h>  #include <locale.h>
45    #include <errno.h>
46    
47  /* We need the internal info for displaying the results of pcre_study(). Also  /* We need the internal info for displaying the results of pcre_study(). Also
48  for getting the opcodes for showing compiled code. */  for getting the opcodes for showing compiled code. */
# Line 35  Makefile. */ Line 66  Makefile. */
66  #endif  #endif
67  #endif  #endif
68    
69  #define LOOPREPEAT 50000  #define LOOPREPEAT 500000
70    
71  #define BUFFER_SIZE 30000  #define BUFFER_SIZE 30000
72    #define PBUFFER_SIZE BUFFER_SIZE
73  #define DBUFFER_SIZE BUFFER_SIZE  #define DBUFFER_SIZE BUFFER_SIZE
74    
75    
# Line 52  static int show_malloc; Line 84  static int show_malloc;
84  static int use_utf8;  static int use_utf8;
85  static size_t gotten_store;  static size_t gotten_store;
86    
87    static uschar *pbuffer = NULL;
88    
89    
90  static const int utf8_table1[] = {  static const int utf8_table1[] = {
91    0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};    0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
# Line 71  static const int utf8_table3[] = { Line 105  static const int utf8_table3[] = {
105  /* The code for doing this is held in a separate file that is also included in  /* The code for doing this is held in a separate file that is also included in
106  pcre.c when it is compiled with the debug switch. It defines a function called  pcre.c when it is compiled with the debug switch. It defines a function called
107  print_internals(), which uses a table of opcode lengths defined by the macro  print_internals(), which uses a table of opcode lengths defined by the macro
108  OP_LENGTHS, whose name must be OP_lengths. */  OP_LENGTHS, whose name must be OP_lengths. It also uses a table that translates
109    Unicode property names to numbers; this is kept in a separate file. */
110    
111  static uschar OP_lengths[] = { OP_LENGTHS };  static uschar OP_lengths[] = { OP_LENGTHS };
112    
113    #include "ucp.h"
114    #include "ucptypetable.c"
115  #include "printint.c"  #include "printint.c"
116    
117    
# Line 269  data is not zero. */ Line 306  data is not zero. */
306  static int callout(pcre_callout_block *cb)  static int callout(pcre_callout_block *cb)
307  {  {
308  FILE *f = (first_callout | callout_extra)? outfile : NULL;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
309  int i, pre_start, post_start;  int i, pre_start, post_start, subject_length;
310    
311  if (callout_extra)  if (callout_extra)
312    {    {
# Line 300  pre_start = pchars((unsigned char *)cb-> Line 337  pre_start = pchars((unsigned char *)cb->
337  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
338    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
339    
340    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
341    
342  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  (void)pchars((unsigned char *)(cb->subject + cb->current_position),
343    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
344    
345  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
346    
347  /* Always print appropriate indicators, with callout number if not already  /* Always print appropriate indicators, with callout number if not already
348  shown */  shown. For automatic callouts, show the pattern offset. */
349    
350  if (callout_extra) fprintf(outfile, "    ");  if (cb->callout_number == 255)
351    else fprintf(outfile, "%3d ", cb->callout_number);    {
352      fprintf(outfile, "%+3d ", cb->pattern_position);
353      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
354      }
355    else
356      {
357      if (callout_extra) fprintf(outfile, "    ");
358        else fprintf(outfile, "%3d ", cb->callout_number);
359      }
360    
361  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
362  fprintf(outfile, "^");  fprintf(outfile, "^");
# Line 320  if (post_start > 0) Line 367  if (post_start > 0)
367    fprintf(outfile, "^");    fprintf(outfile, "^");
368    }    }
369    
370    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
371      fprintf(outfile, " ");
372    
373    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
374      pbuffer + cb->pattern_position);
375    
376  fprintf(outfile, "\n");  fprintf(outfile, "\n");
377  first_callout = 0;  first_callout = 0;
378    
# Line 396  if ((rc = pcre_fullinfo(re, study, optio Line 449  if ((rc = pcre_fullinfo(re, study, optio
449    
450    
451  /*************************************************  /*************************************************
452    *         Byte flipping function                 *
453    *************************************************/
454    
455    static long int
456    byteflip(long int value, int n)
457    {
458    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
459    return ((value & 0x000000ff) << 24) |
460           ((value & 0x0000ff00) <<  8) |
461           ((value & 0x00ff0000) >>  8) |
462           ((value & 0xff000000) >> 24);
463    }
464    
465    
466    
467    
468    /*************************************************
469  *                Main Program                    *  *                Main Program                    *
470  *************************************************/  *************************************************/
471    
# Line 429  when I am debugging. */ Line 499  when I am debugging. */
499    
500  buffer = (unsigned char *)malloc(BUFFER_SIZE);  buffer = (unsigned char *)malloc(BUFFER_SIZE);
501  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
502    pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
503    
504  /* Static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. The _setmode()
505    stuff is some magic that I don't understand, but which apparently does good
506    things in Windows. It's related to line terminations.  */
507    
508    #if defined(_WIN32) || defined(WIN32)
509    _setmode( _fileno( stdout ), 0x8000 );
510    #endif  /* defined(_WIN32) || defined(WIN32) */
511    
512  outfile = stdout;  outfile = stdout;
513    
# Line 462  while (argc > 1 && argv[op][0] == '-') Line 539  while (argc > 1 && argv[op][0] == '-')
539      printf("Compiled with\n");      printf("Compiled with\n");
540      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
541      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
542        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
543        printf("  %sUnicode properties support\n", rc? "" : "No ");
544      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
545      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
546      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
# Line 481  while (argc > 1 && argv[op][0] == '-') Line 560  while (argc > 1 && argv[op][0] == '-')
560      printf("  -C     show PCRE compile-time options and exit\n");      printf("  -C     show PCRE compile-time options and exit\n");
561      printf("  -d     debug: show compiled code; implies -i\n"      printf("  -d     debug: show compiled code; implies -i\n"
562             "  -i     show information about compiled pattern\n"             "  -i     show information about compiled pattern\n"
563               "  -m     output memory used information\n"
564             "  -o <n> set size of offsets vector to <n>\n");             "  -o <n> set size of offsets vector to <n>\n");
565  #if !defined NOPOSIX  #if !defined NOPOSIX
566      printf("  -p     use POSIX interface\n");      printf("  -p     use POSIX interface\n");
567  #endif  #endif
568      printf("  -s     output store information\n"      printf("  -s     output store (memory) used information\n"
569             "  -t     time compilation and execution\n");             "  -t     time compilation and execution\n");
570      return 1;      return 1;
571      }      }
# Line 508  if (offsets == NULL) Line 588  if (offsets == NULL)
588    
589  if (argc > 1)  if (argc > 1)
590    {    {
591    infile = fopen(argv[op], "r");    infile = fopen(argv[op], "rb");
592    if (infile == NULL)    if (infile == NULL)
593      {      {
594      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
# Line 518  if (argc > 1) Line 598  if (argc > 1)
598    
599  if (argc > 2)  if (argc > 2)
600    {    {
601    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], "wb");
602    if (outfile == NULL)    if (outfile == NULL)
603      {      {
604      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
# Line 551  while (!done) Line 631  while (!done)
631    
632    const char *error;    const char *error;
633    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
634      unsigned char *to_file = NULL;
635    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
636      unsigned long int true_size, true_study_size = 0;
637      size_t size, regex_gotten_store;
638    int do_study = 0;    int do_study = 0;
639    int do_debug = debug;    int do_debug = debug;
640    int do_G = 0;    int do_G = 0;
641    int do_g = 0;    int do_g = 0;
642    int do_showinfo = showinfo;    int do_showinfo = showinfo;
643    int do_showrest = 0;    int do_showrest = 0;
644      int do_flip = 0;
645    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
646    
647    use_utf8 = 0;    use_utf8 = 0;
# Line 571  while (!done) Line 655  while (!done)
655    while (isspace(*p)) p++;    while (isspace(*p)) p++;
656    if (*p == 0) continue;    if (*p == 0) continue;
657    
658    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
659    complete, read more. */  
660      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
661        {
662        unsigned long int magic;
663        uschar sbuf[8];
664        FILE *f;
665    
666        p++;
667        pp = p + (int)strlen((char *)p);
668        while (isspace(pp[-1])) pp--;
669        *pp = 0;
670    
671        f = fopen((char *)p, "rb");
672        if (f == NULL)
673          {
674          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
675          continue;
676          }
677    
678        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
679    
680        true_size =
681          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
682        true_study_size =
683          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
684    
685        re = (real_pcre *)new_malloc(true_size);
686        regex_gotten_store = gotten_store;
687    
688        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
689    
690        magic = ((real_pcre *)re)->magic_number;
691        if (magic != MAGIC_NUMBER)
692          {
693          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
694            {
695            do_flip = 1;
696            }
697          else
698            {
699            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
700            fclose(f);
701            continue;
702            }
703          }
704    
705        fprintf(outfile, "Compiled regex%s loaded from %s\n",
706          do_flip? " (byte-inverted)" : "", p);
707    
708        /* Need to know if UTF-8 for printing data strings */
709    
710        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
711        use_utf8 = (options & PCRE_UTF8) != 0;
712    
713        /* Now see if there is any following study data */
714    
715        if (true_study_size != 0)
716          {
717          pcre_study_data *psd;
718    
719          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
720          extra->flags = PCRE_EXTRA_STUDY_DATA;
721    
722          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
723          extra->study_data = psd;
724    
725          if (fread(psd, 1, true_study_size, f) != true_study_size)
726            {
727            FAIL_READ:
728            fprintf(outfile, "Failed to read data from %s\n", p);
729            if (extra != NULL) new_free(extra);
730            if (re != NULL) new_free(re);
731            fclose(f);
732            continue;
733            }
734          fprintf(outfile, "Study data loaded from %s\n", p);
735          do_study = 1;     /* To get the data output if requested */
736          }
737        else fprintf(outfile, "No study data\n");
738    
739        fclose(f);
740        goto SHOW_INFO;
741        }
742    
743      /* In-line pattern (the usual case). Get the delimiter and seek the end of
744      the pattern; if is isn't complete, read more. */
745    
746    delimiter = *p++;    delimiter = *p++;
747    
# Line 617  while (!done) Line 786  while (!done)
786    
787    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
788    
789    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
790      for callouts. */
791    
792    *pp++ = 0;    *pp++ = 0;
793      strcpy((char *)pbuffer, (char *)p);
794    
795    /* Look for options after final delimiter */    /* Look for options after final delimiter */
796    
# Line 639  while (!done) Line 810  while (!done)
810    
811        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
812        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
813          case 'C': options |= PCRE_AUTO_CALLOUT; break;
814        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
815        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
816          case 'F': do_flip = 1; break;
817        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
818        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
819        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
# Line 669  while (!done) Line 842  while (!done)
842        pp = ppp;        pp = ppp;
843        break;        break;
844    
845          case '>':
846          to_file = pp;
847          while (*pp != 0) pp++;
848          while (isspace(pp[-1])) pp--;
849          *pp = 0;
850          break;
851    
852        case '\n': case ' ': break;        case '\n': case ' ': break;
853    
854        default:        default:
855        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
856        goto SKIP_DATA;        goto SKIP_DATA;
# Line 685  while (!done) Line 866  while (!done)
866      {      {
867      int rc;      int rc;
868      int cflags = 0;      int cflags = 0;
869    
870      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
871      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
872      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
# Line 759  while (!done) Line 941  while (!done)
941                sizeof(real_pcre) -                sizeof(real_pcre) -
942                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
943    
944        /* Extract the size for possible writing before possibly flipping it,
945        and remember the store that was got. */
946    
947        true_size = ((real_pcre *)re)->size;
948        regex_gotten_store = gotten_store;
949    
950        /* If /S was present, study the regexp to generate additional info to
951        help with the matching. */
952    
953        if (do_study)
954          {
955          if (timeit)
956            {
957            register int i;
958            clock_t time_taken;
959            clock_t start_time = clock();
960            for (i = 0; i < LOOPREPEAT; i++)
961              extra = pcre_study(re, study_options, &error);
962            time_taken = clock() - start_time;
963            if (extra != NULL) free(extra);
964            fprintf(outfile, "  Study time %.3f milliseconds\n",
965              (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
966                (double)CLOCKS_PER_SEC);
967            }
968          extra = pcre_study(re, study_options, &error);
969          if (error != NULL)
970            fprintf(outfile, "Failed to study: %s\n", error);
971          else if (extra != NULL)
972            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
973          }
974    
975        /* If the 'F' option was present, we flip the bytes of all the integer
976        fields in the regex data block and the study block. This is to make it
977        possible to test PCRE's handling of byte-flipped patterns, e.g. those
978        compiled on a different architecture. */
979    
980        if (do_flip)
981          {
982          real_pcre *rre = (real_pcre *)re;
983          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
984          rre->size = byteflip(rre->size, sizeof(rre->size));
985          rre->options = byteflip(rre->options, sizeof(rre->options));
986          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
987          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
988          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
989          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
990          rre->name_table_offset = byteflip(rre->name_table_offset,
991            sizeof(rre->name_table_offset));
992          rre->name_entry_size = byteflip(rre->name_entry_size,
993            sizeof(rre->name_entry_size));
994          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
995    
996          if (extra != NULL)
997            {
998            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
999            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1000            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1001            }
1002          }
1003    
1004        /* Extract information from the compiled data if required */
1005    
1006        SHOW_INFO:
1007    
1008      if (do_showinfo)      if (do_showinfo)
1009        {        {
1010        unsigned long int get_options;        unsigned long int get_options, all_options;
1011        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1012        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char;
1013        int nameentrysize, namecount;        int nameentrysize, namecount;
1014        const uschar *nametable;        const uschar *nametable;
       size_t size;  
1015    
1016        if (do_debug)        if (do_debug)
1017          {          {
# Line 802  while (!done) Line 1047  while (!done)
1047              get_options, old_options);              get_options, old_options);
1048          }          }
1049    
1050        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1051          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1052          size, gotten_store);          size, regex_gotten_store);
1053    
1054        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1055        if (backrefmax > 0)        if (backrefmax > 0)
# Line 822  while (!done) Line 1067  while (!done)
1067            }            }
1068          }          }
1069    
1070          /* The NOPARTIAL bit is a private bit in the options, so we have
1071          to fish it out via out back door */
1072    
1073          all_options = ((real_pcre *)re)->options;
1074          if (do_flip)
1075            {
1076            all_options = byteflip(all_options, sizeof(all_options));
1077            }
1078    
1079          if ((all_options & PCRE_NOPARTIAL) != 0)
1080            fprintf(outfile, "Partial matching not supported\n");
1081    
1082        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1083          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",
1084            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
# Line 871  while (!done) Line 1128  while (!done)
1128          else          else
1129            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1130          }          }
       }  
   
     /* If /S was present, study the regexp to generate additional info to  
     help with the matching. */  
   
     if (do_study)  
       {  
       if (timeit)  
         {  
         register int i;  
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < LOOPREPEAT; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.3f milliseconds\n",  
           (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /  
             (double)CLOCKS_PER_SEC);  
         }  
   
       extra = pcre_study(re, study_options, &error);  
       if (error != NULL)  
         fprintf(outfile, "Failed to study: %s\n", error);  
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
1131    
1132        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
1133        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
1134        so messes up the test suite. */        so messes up the test suite. (And with the /F option, it might be
1135          flipped.) */
1136    
1137        else if (do_showinfo)        if (do_study)
1138          {          {
1139          size_t size;          if (extra == NULL)
1140          uschar *start_bits = NULL;            fprintf(outfile, "Study returned NULL\n");
         new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);  
         new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);  
         /* fprintf(outfile, "Study size = %d\n", size); */  
         if (start_bits == NULL)  
           fprintf(outfile, "No starting character set\n");  
1141          else          else
1142            {            {
1143            int i;            uschar *start_bits = NULL;
1144            int c = 24;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1145            fprintf(outfile, "Starting character set: ");  
1146            for (i = 0; i < 256; i++)            if (start_bits == NULL)
1147                fprintf(outfile, "No starting byte set\n");
1148              else
1149              {              {
1150              if ((start_bits[i/8] & (1<<(i%8))) != 0)              int i;
1151                int c = 24;
1152                fprintf(outfile, "Starting byte set: ");
1153                for (i = 0; i < 256; i++)
1154                {                {
1155                if (c > 75)                if ((start_bits[i/8] & (1<<(i&7))) != 0)
                 {  
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
1156                  {                  {
1157                  fprintf(outfile, "%c ", i);                  if (c > 75)
1158                  c += 2;                    {
1159                  }                    fprintf(outfile, "\n  ");
1160                else                    c = 2;
1161                  {                    }
1162                  fprintf(outfile, "\\x%02x ", i);                  if (isprint(i) && i != ' ')
1163                  c += 5;                    {
1164                      fprintf(outfile, "%c ", i);
1165                      c += 2;
1166                      }
1167                    else
1168                      {
1169                      fprintf(outfile, "\\x%02x ", i);
1170                      c += 5;
1171                      }
1172                  }                  }
1173                }                }
1174                fprintf(outfile, "\n");
1175              }              }
           fprintf(outfile, "\n");  
1176            }            }
1177          }          }
1178        }        }
1179      }  
1180        /* If the '>' option was present, we write out the regex to a file, and
1181        that is all. The first 8 bytes of the file are the regex length and then
1182        the study length, in big-endian order. */
1183    
1184        if (to_file != NULL)
1185          {
1186          FILE *f = fopen((char *)to_file, "wb");
1187          if (f == NULL)
1188            {
1189            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1190            }
1191          else
1192            {
1193            uschar sbuf[8];
1194            sbuf[0] = (true_size >> 24)  & 255;
1195            sbuf[1] = (true_size >> 16)  & 255;
1196            sbuf[2] = (true_size >>  8)  & 255;
1197            sbuf[3] = (true_size)  & 255;
1198    
1199            sbuf[4] = (true_study_size >> 24)  & 255;
1200            sbuf[5] = (true_study_size >> 16)  & 255;
1201            sbuf[6] = (true_study_size >>  8)  & 255;
1202            sbuf[7] = (true_study_size)  & 255;
1203    
1204            if (fwrite(sbuf, 1, 8, f) < 8 ||
1205                fwrite(re, 1, true_size, f) < true_size)
1206              {
1207              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1208              }
1209            else
1210              {
1211              fprintf(outfile, "Compiled regex written to %s\n", to_file);
1212              if (extra != NULL)
1213                {
1214                if (fwrite(extra->study_data, 1, true_study_size, f) <
1215                    true_study_size)
1216                  {
1217                  fprintf(outfile, "Write error on %s: %s\n", to_file,
1218                    strerror(errno));
1219                  }
1220                else fprintf(outfile, "Study data written to %s\n", to_file);
1221                }
1222              }
1223            fclose(f);
1224            }
1225          continue;  /* With next regex */
1226          }
1227        }        /* End of non-POSIX compile */
1228    
1229    /* Read data lines and test them */    /* Read data lines and test them */
1230    
# Line 1045  while (!done) Line 1328  while (!done)
1328            }            }
1329          break;          break;
1330    
1331          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1332          p--;          p--;
1333          continue;          continue;
1334    
1335            case '>':
1336            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1337            continue;
1338    
1339          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1340          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1341          continue;          continue;
# Line 1159  while (!done) Line 1446  while (!done)
1446          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1447          continue;          continue;
1448    
1449            case 'P':
1450            options |= PCRE_PARTIAL;
1451            continue;
1452    
1453          case 'S':          case 'S':
1454          show_malloc = 1;          show_malloc = 1;
1455          continue;          continue;
# Line 1269  while (!done) Line 1560  while (!done)
1560              min = mid;              min = mid;
1561              mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;              mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1562              }              }
1563            else if (count >= 0 || count == PCRE_ERROR_NOMATCH)            else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1564                                     count == PCRE_ERROR_PARTIAL)
1565              {              {
1566              if (mid == min + 1)              if (mid == min + 1)
1567                {                {
# Line 1305  while (!done) Line 1597  while (!done)
1597        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
1598        value of match_limit. */        value of match_limit. */
1599    
1600        else count = pcre_exec(re, extra, (char *)bptr, len,        else
1601          start_offset, options | g_notempty, use_offsets, use_size_offsets);          {
1602            count = pcre_exec(re, extra, (char *)bptr, len,
1603              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1604            }
1605    
1606        if (count == 0)        if (count == 0)
1607          {          {
# Line 1393  while (!done) Line 1688  while (!done)
1688            }            }
1689          }          }
1690    
1691          /* There was a partial match */
1692    
1693          else if (count == PCRE_ERROR_PARTIAL)
1694            {
1695            fprintf(outfile, "Partial match\n");
1696            break;  /* Out of the /g loop */
1697            }
1698    
1699        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
1700        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end.
1701        We want to advance the start offset, and continue. In the case of UTF-8        We want to advance the start offset, and continue. In the case of UTF-8

Legend:
Removed from v.73  
changed lines
  Added in v.75

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12