/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 39 by nigel, Sat Feb 24 21:39:13 2007 UTC revision 55 by nigel, Sat Feb 24 21:39:46 2007 UTC
# Line 34  Makefile. */ Line 34  Makefile. */
34    
35  static FILE *outfile;  static FILE *outfile;
36  static int log_store = 0;  static int log_store = 0;
37    static size_t gotten_store;
38    
39    
40    
41    static int utf8_table1[] = {
42      0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
43    
44    static int utf8_table2[] = {
45      0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
46    
47    static int utf8_table3[] = {
48      0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
49    
50    
51    /*************************************************
52    *       Convert character value to UTF-8         *
53    *************************************************/
54    
55    /* This function takes an integer value in the range 0 - 0x7fffffff
56    and encodes it as a UTF-8 character in 0 to 6 bytes.
57    
58    Arguments:
59      cvalue     the character value
60      buffer     pointer to buffer for result - at least 6 bytes long
61    
62    Returns:     number of characters placed in the buffer
63                 -1 if input character is negative
64                 0 if input character is positive but too big (only when
65                 int is longer than 32 bits)
66    */
67    
68    static int
69    ord2utf8(int cvalue, unsigned char *buffer)
70    {
71    register int i, j;
72    for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
73      if (cvalue <= utf8_table1[i]) break;
74    if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
75    if (cvalue < 0) return -1;
76    *buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);
77    cvalue >>= 6 - i;
78    for (j = 0; j < i; j++)
79      {
80      *buffer++ = 0x80 | (cvalue & 0x3f);
81      cvalue >>= 6;
82      }
83    return i + 1;
84    }
85    
86    
87    /*************************************************
88    *            Convert UTF-8 string to value       *
89    *************************************************/
90    
91    /* This function takes one or more bytes that represents a UTF-8 character,
92    and returns the value of the character.
93    
94    Argument:
95      buffer   a pointer to the byte vector
96      vptr     a pointer to an int to receive the value
97    
98    Returns:   >  0 => the number of bytes consumed
99               -6 to 0 => malformed UTF-8 character at offset = (-return)
100    */
101    
102    int
103    utf82ord(unsigned char *buffer, int *vptr)
104    {
105    int c = *buffer++;
106    int d = c;
107    int i, j, s;
108    
109    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
110      {
111      if ((d & 0x80) == 0) break;
112      d <<= 1;
113      }
114    
115    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
116    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
117    
118    /* i now has a value in the range 1-5 */
119    
120    d = c & utf8_table3[i];
121    s = 6 - i;
122    
123    for (j = 0; j < i; j++)
124      {
125      c = *buffer++;
126      if ((c & 0xc0) != 0x80) return -(j+1);
127      d |= (c & 0x3f) << s;
128      s += 6;
129      }
130    
131    /* Check that encoding was the correct unique one */
132    
133    for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
134      if (d <= utf8_table1[j]) break;
135    if (j != i) return -(i+1);
136    
137    /* Valid value */
138    
139    *vptr = d;
140    return i+1;
141    }
142    
143    
144    
145    
146    
147    
# Line 48  static const char *OP_names[] = { Line 156  static const char *OP_names[] = {
156    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
157    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
158    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
159    "class", "Ref",    "class", "Ref", "Recurse",
160    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
161    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
162    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Branumber", "Bra"
163  };  };
164    
165    
# Line 70  for(;;) Line 178  for(;;)
178    
179    if (*code >= OP_BRA)    if (*code >= OP_BRA)
180      {      {
181      fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);      if (*code - OP_BRA > EXTRACT_BASIC_MAX)
182          fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);
183        else
184          fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
185      code += 2;      code += 2;
186      }      }
187    
# Line 86  for(;;) Line 197  for(;;)
197      code++;      code++;
198      break;      break;
199    
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
200      case OP_CHARS:      case OP_CHARS:
201      charlength = *(++code);      charlength = *(++code);
202      fprintf(outfile, "%3d ", charlength);      fprintf(outfile, "%3d ", charlength);
# Line 113  for(;;) Line 214  for(;;)
214      case OP_ASSERTBACK:      case OP_ASSERTBACK:
215      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
216      case OP_ONCE:      case OP_ONCE:
217      fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);      case OP_COND:
218      code += 2;      case OP_BRANUMBER:
     break;  
   
219      case OP_REVERSE:      case OP_REVERSE:
220        case OP_CREF:
221      fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);      fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
222      code += 2;      code += 2;
223      break;      break;
# Line 190  for(;;) Line 290  for(;;)
290      break;      break;
291    
292      case OP_REF:      case OP_REF:
293      fprintf(outfile, "    \\%d", *(++code));      fprintf(outfile, "    \\%d", (code[1] << 8) | code[2]);
294      code++;      code += 3;
295      goto CLASS_REF_REPEAT;      goto CLASS_REF_REPEAT;
296    
297      case OP_CLASS:      case OP_CLASS:
# Line 264  for(;;) Line 364  for(;;)
364    
365    
366    
367  /* Character string printing function. */  /* Character string printing function. A "normal" and a UTF-8 version. */
368    
369  static void pchars(unsigned char *p, int length)  static void pchars(unsigned char *p, int length, int utf8)
370  {  {
371  int c;  int c;
372  while (length-- > 0)  while (length-- > 0)
373      {
374      if (utf8)
375        {
376        int rc = utf82ord(p, &c);
377        if (rc > 0)
378          {
379          length -= rc - 1;
380          p += rc;
381          if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);
382            else fprintf(outfile, "\\x{%02x}", c);
383          continue;
384          }
385        }
386    
387       /* Not UTF-8, or malformed UTF-8  */
388    
389    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
390      else fprintf(outfile, "\\x%02x", c);      else fprintf(outfile, "\\x%02x", c);
391      }
392  }  }
393    
394    
# Line 281  compiled re. */ Line 398  compiled re. */
398    
399  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
400  {  {
401    gotten_store = size;
402  if (log_store)  if (log_store)
403    fprintf(outfile, "Memory allocation (code space): %d\n",    fprintf(outfile, "Memory allocation (code space): %d\n",
404      (int)((int)size - offsetof(real_pcre, code[0])));      (int)((int)size - offsetof(real_pcre, code[0])));
# Line 289  return malloc(size); Line 407  return malloc(size);
407    
408    
409    
410    
411    /* Get one piece of information from the pcre_fullinfo() function */
412    
413    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
414    {
415    int rc;
416    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
417      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
418    }
419    
420    
421    
422    
423  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
424  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
425  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 302  int op = 1; Line 433  int op = 1;
433  int timeit = 0;  int timeit = 0;
434  int showinfo = 0;  int showinfo = 0;
435  int showstore = 0;  int showstore = 0;
436    int size_offsets = 45;
437    int size_offsets_max;
438    int *offsets;
439    #if !defined NOPOSIX
440  int posix = 0;  int posix = 0;
441    #endif
442  int debug = 0;  int debug = 0;
443  int done = 0;  int done = 0;
444  unsigned char buffer[30000];  unsigned char buffer[30000];
# Line 316  outfile = stdout; Line 452  outfile = stdout;
452    
453  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
454    {    {
455      char *endptr;
456    
457    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
458      showstore = 1;      showstore = 1;
459    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
460    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
461    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
462      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
463          ((size_offsets = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
464        {
465        op++;
466        argc--;
467        }
468    #if !defined NOPOSIX
469    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
470    #endif
471    else    else
472      {      {
473      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
474      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
475      printf("  -d   debug: show compiled code; implies -i\n"      printf("  -d     debug: show compiled code; implies -i\n"
476             "  -i   show information about compiled pattern\n"             "  -i     show information about compiled pattern\n"
477             "  -p   use POSIX interface\n"             "  -o <n> set size of offsets vector to <n>\n");
478             "  -s   output store information\n"  #if !defined NOPOSIX
479             "  -t   time compilation and execution\n");      printf("  -p     use POSIX interface\n");
480    #endif
481        printf("  -s     output store information\n"
482               "  -t     time compilation and execution\n");
483      return 1;      return 1;
484      }      }
485    op++;    op++;
486    argc--;    argc--;
487    }    }
488    
489    /* Get the store for the offsets vector, and remember what it was */
490    
491    size_offsets_max = size_offsets;
492    offsets = malloc(size_offsets_max * sizeof(int));
493    if (offsets == NULL)
494      {
495      printf("** Failed to get %d bytes of memory for offsets vector\n",
496        size_offsets_max * sizeof(int));
497      return 1;
498      }
499    
500  /* Sort out the input and output files */  /* Sort out the input and output files */
501    
502  if (argc > 1)  if (argc > 1)
# Line 376  while (!done) Line 536  while (!done)
536    
537  #if !defined NOPOSIX  /* There are still compilers that require no indent */  #if !defined NOPOSIX  /* There are still compilers that require no indent */
538    regex_t preg;    regex_t preg;
539      int do_posix = 0;
540  #endif  #endif
541    
542    const char *error;    const char *error;
543    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
544    unsigned const char *tables = NULL;    const unsigned char *tables = NULL;
545    int do_study = 0;    int do_study = 0;
546    int do_debug = debug;    int do_debug = debug;
547    int do_G = 0;    int do_G = 0;
548    int do_g = 0;    int do_g = 0;
549    int do_showinfo = showinfo;    int do_showinfo = showinfo;
550    int do_showrest = 0;    int do_showrest = 0;
551    int do_posix = 0;    int utf8 = 0;
552    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
553    
554    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
# Line 479  while (!done) Line 640  while (!done)
640        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
641        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
642        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
643          case '8': options |= PCRE_UTF8; utf8 = 1; break;
644    
645        case 'L':        case 'L':
646        ppp = pp;        ppp = pp;
# Line 573  while (!done) Line 735  while (!done)
735        goto CONTINUE;        goto CONTINUE;
736        }        }
737    
738      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
739        info-returning functions. The old one has a limited interface and
740        returns only limited data. Check that it agrees with the newer one. */
741    
742      if (do_showinfo)      if (do_showinfo)
743        {        {
744        int first_char, count;        unsigned long int get_options;
745          int old_first_char, old_options, old_count;
746          int count, backrefmax, first_char, need_char;
747          size_t size;
748    
749        if (do_debug) print_internals(re);        if (do_debug) print_internals(re);
750    
751        count = pcre_info(re, &options, &first_char);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
752          new_info(re, NULL, PCRE_INFO_SIZE, &size);
753          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
754          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
755          new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
756          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
757    
758          old_count = pcre_info(re, &old_options, &old_first_char);
759        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
760          "Error %d while reading info\n", count);          "Error %d from pcre_info()\n", count);
761        else        else
762          {          {
763          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (old_count != count) fprintf(outfile,
764          if (options == 0) fprintf(outfile, "No options\n");            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
765            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",              old_count);
766              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
767              ((options & PCRE_CASELESS) != 0)? " caseless" : "",          if (old_first_char != first_char) fprintf(outfile,
768              ((options & PCRE_EXTENDED) != 0)? " extended" : "",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
769              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",              first_char, old_first_char);
770              ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
771              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",          if (old_options != (int)get_options) fprintf(outfile,
772              ((options & PCRE_EXTRA) != 0)? " extra" : "",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
773              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");              get_options, old_options);
774            }
         if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)  
           fprintf(outfile, "Case state changes\n");  
775    
776          if (first_char == -1)        if (size != gotten_store) fprintf(outfile,
777            {          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
778            fprintf(outfile, "First char at start or follows \\n\n");          size, gotten_store);
779            }  
780          else if (first_char < 0)        fprintf(outfile, "Capturing subpattern count = %d\n", count);
781            {        if (backrefmax > 0)
782            fprintf(outfile, "No first char\n");          fprintf(outfile, "Max back reference = %d\n", backrefmax);
783            }        if (get_options == 0) fprintf(outfile, "No options\n");
784            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
785              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
786              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
787              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
788              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
789              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
790              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
791              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
792              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
793              ((get_options & PCRE_UTF8) != 0)? " utf8" : "");
794    
795          if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
796            fprintf(outfile, "Case state changes\n");
797    
798          if (first_char == -1)
799            {
800            fprintf(outfile, "First char at start or follows \\n\n");
801            }
802          else if (first_char < 0)
803            {
804            fprintf(outfile, "No first char\n");
805            }
806          else
807            {
808            if (isprint(first_char))
809              fprintf(outfile, "First char = \'%c\'\n", first_char);
810          else          else
811            {            fprintf(outfile, "First char = %d\n", first_char);
812            if (isprint(first_char))          }
             fprintf(outfile, "First char = \'%c\'\n", first_char);  
           else  
             fprintf(outfile, "First char = %d\n", first_char);  
           }  
813    
814          if (((((real_pcre *)re)->options) & PCRE_REQCHSET) != 0)        if (need_char < 0)
815            {          {
816            int req_char = ((real_pcre *)re)->req_char;          fprintf(outfile, "No need char\n");
817            if (isprint(req_char))          }
818              fprintf(outfile, "Req char = \'%c\'\n", req_char);        else
819            else          {
820              fprintf(outfile, "Req char = %d\n", req_char);          if (isprint(need_char))
821            }            fprintf(outfile, "Need char = \'%c\'\n", need_char);
822          else fprintf(outfile, "No req char\n");          else
823              fprintf(outfile, "Need char = %d\n", need_char);
824          }          }
825        }        }
826    
# Line 654  while (!done) Line 849  while (!done)
849        else if (extra == NULL)        else if (extra == NULL)
850          fprintf(outfile, "Study returned NULL\n");          fprintf(outfile, "Study returned NULL\n");
851    
       /* This looks at internal information. A bit kludgy to do it this  
       way, but it is useful for testing. */  
   
852        else if (do_showinfo)        else if (do_showinfo)
853          {          {
854          real_pcre_extra *xx = (real_pcre_extra *)extra;          uschar *start_bits = NULL;
855          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
856            if (start_bits == NULL)
857            fprintf(outfile, "No starting character set\n");            fprintf(outfile, "No starting character set\n");
858          else          else
859            {            {
# Line 669  while (!done) Line 862  while (!done)
862            fprintf(outfile, "Starting character set: ");            fprintf(outfile, "Starting character set: ");
863            for (i = 0; i < 256; i++)            for (i = 0; i < 256; i++)
864              {              {
865              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if ((start_bits[i/8] & (1<<(i%8))) != 0)
866                {                {
867                if (c > 75)                if (c > 75)
868                  {                  {
# Line 700  while (!done) Line 893  while (!done)
893      {      {
894      unsigned char *q;      unsigned char *q;
895      unsigned char *bptr = dbuffer;      unsigned char *bptr = dbuffer;
896        int use_size_offsets = size_offsets;
897      int count, c;      int count, c;
898      int copystrings = 0;      int copystrings = 0;
899      int getstrings = 0;      int getstrings = 0;
900      int getlist = 0;      int getlist = 0;
901      int gmatched = 0;      int gmatched = 0;
902      int start_offset = 0;      int start_offset = 0;
903      int offsets[45];      int g_notempty = 0;
     int size_offsets = sizeof(offsets)/sizeof(int);  
904    
905      options = 0;      options = 0;
906    
# Line 751  while (!done) Line 944  while (!done)
944          break;          break;
945    
946          case 'x':          case 'x':
947    
948            /* Handle \x{..} specially - new Perl thing for utf8 */
949    
950            if (*p == '{')
951              {
952              unsigned char *pt = p;
953              c = 0;
954              while (isxdigit(*(++pt)))
955                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
956              if (*pt == '}')
957                {
958                unsigned char buffer[8];
959                int ii, utn;
960                utn = ord2utf8(c, buffer);
961                for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];
962                c = buffer[ii];   /* Last byte */
963                p = pt + 1;
964                break;
965                }
966              /* Not correct form; fall through */
967              }
968    
969            /* Ordinary \x */
970    
971          c = 0;          c = 0;
972          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
973            {            {
# Line 791  while (!done) Line 1008  while (!done)
1008    
1009          case 'O':          case 'O':
1010          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1011          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1012              {
1013    
1014    if (offsets != NULL)
1015    
1016              free(offsets);
1017              size_offsets_max = n;
1018              offsets = malloc(size_offsets_max * sizeof(int));
1019              if (offsets == NULL)
1020                {
1021                printf("** Failed to get %d bytes of memory for offsets vector\n",
1022                  size_offsets_max * sizeof(int));
1023                return 1;
1024                }
1025              }
1026            use_size_offsets = n;
1027    
1028    if (n == 0)
1029      {
1030      free(offsets);
1031      offsets = NULL;
1032      size_offsets_max = 0;
1033      }
1034    
1035          continue;          continue;
1036    
1037          case 'Z':          case 'Z':
# Line 811  while (!done) Line 1051  while (!done)
1051        {        {
1052        int rc;        int rc;
1053        int eflags = 0;        int eflags = 0;
1054        regmatch_t pmatch[30];        regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
1055        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1056        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1057    
1058        rc = regexec(&preg, (const char *)bptr,        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         sizeof(pmatch)/sizeof(regmatch_t), pmatch, eflags);  
1059    
1060        if (rc != 0)        if (rc != 0)
1061          {          {
# Line 826  while (!done) Line 1065  while (!done)
1065        else        else
1066          {          {
1067          size_t i;          size_t i;
1068          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < use_size_offsets; i++)
1069            {            {
1070            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1071              {              {
1072              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1073              pchars(dbuffer + pmatch[i].rm_so,              pchars(dbuffer + pmatch[i].rm_so,
1074                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, utf8);
1075              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1076              if (i == 0 && do_showrest)              if (i == 0 && do_showrest)
1077                {                {
1078                fprintf(outfile, " 0+ ");                fprintf(outfile, " 0+ ");
1079                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);
1080                fprintf(outfile, "\n");                fprintf(outfile, "\n");
1081                }                }
1082              }              }
1083            }            }
1084          }          }
1085          free(pmatch);
1086        }        }
1087    
1088      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
# Line 859  while (!done) Line 1099  while (!done)
1099          clock_t start_time = clock();          clock_t start_time = clock();
1100          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1101            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1102              start_offset, options, offsets, size_offsets);              start_offset, options | g_notempty, offsets, use_size_offsets);
1103          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1104          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1105            ((double)time_taken * 1000.0)/            ((double)time_taken * 1000.0)/
# Line 867  while (!done) Line 1107  while (!done)
1107          }          }
1108    
1109        count = pcre_exec(re, extra, (char *)bptr, len,        count = pcre_exec(re, extra, (char *)bptr, len,
1110          start_offset, options, offsets, size_offsets);          start_offset, options | g_notempty, offsets, use_size_offsets);
1111    
1112        if (count == 0)        if (count == 0)
1113          {          {
1114          fprintf(outfile, "Matched, but too many substrings\n");          fprintf(outfile, "Matched, but too many substrings\n");
1115          count = size_offsets/3;          count = use_size_offsets/3;
1116          }          }
1117    
1118        /* Matched */        /* Matched */
# Line 887  while (!done) Line 1127  while (!done)
1127            else            else
1128              {              {
1129              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1130              pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);              pchars(bptr + offsets[i], offsets[i+1] - offsets[i], utf8);
1131              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1132              if (i == 0)              if (i == 0)
1133                {                {
1134                if (do_showrest)                if (do_showrest)
1135                  {                  {
1136                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
1137                  pchars(bptr + offsets[i+1], len - offsets[i+1]);                  pchars(bptr + offsets[i+1], len - offsets[i+1], utf8);
1138                  fprintf(outfile, "\n");                  fprintf(outfile, "\n");
1139                  }                  }
1140                }                }
# Line 927  while (!done) Line 1167  while (!done)
1167              else              else
1168                {                {
1169                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1170                free((void *)substring);                /* free((void *)substring); */
1171                  pcre_free_substring(substring);
1172                }                }
1173              }              }
1174            }            }
# Line 945  while (!done) Line 1186  while (!done)
1186                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1187              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
1188                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
1189              free((void *)stringlist);              /* free((void *)stringlist); */
1190                pcre_free_substring_list(stringlist);
1191              }              }
1192            }            }
1193          }          }
1194    
1195        /* Failed to match */        /* Failed to match. If this is a /g or /G loop and we previously set
1196          g_notempty after a null match, this is not necessarily the end.
1197          We want to advance the start offset, and continue. Fudge the offset
1198          values to achieve this. We won't be at the end of the string - that
1199          was checked before setting g_notempty. */
1200    
1201        else        else
1202          {          {
1203          if (gmatched == 0)          if (g_notempty != 0)
1204            {            {
1205            if (count == -1) fprintf(outfile, "No match\n");            offsets[0] = start_offset;
1206              else fprintf(outfile, "Error %d\n", count);            offsets[1] = start_offset + 1;
1207              }
1208            else
1209              {
1210              if (gmatched == 0)   /* Error if no previous matches */
1211                {
1212                if (count == -1) fprintf(outfile, "No match\n");
1213                  else fprintf(outfile, "Error %d\n", count);
1214                }
1215              break;  /* Out of the /g loop */
1216            }            }
         break;  /* Out of the /g loop */  
1217          }          }
1218    
1219        /* If not /g or /G we are done */        /* If not /g or /G we are done */
1220    
1221        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
1222    
1223        /* If we have matched an empty string, set PCRE_NOTEMPTY for the next        /* If we have matched an empty string, first check to see if we are at
1224        match. This mimics what Perl's /g option does. */        the end of the subject. If so, the /g loop is over. Otherwise, mimic
1225          what Perl's /g options does. This turns out to be rather cunning. First
1226          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1227          same point. If this fails (picked up above) we advance to the next
1228          character. */
1229    
1230        if (offsets[1] == offsets[0])        g_notempty = 0;
1231          options |= PCRE_NOTEMPTY;        if (offsets[0] == offsets[1])
1232        else          {
1233          options &= ~PCRE_NOTEMPTY;          if (offsets[0] == len) break;
1234            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1235            }
1236    
1237        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
1238    

Legend:
Removed from v.39  
changed lines
  Added in v.55

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12