/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 96 by nigel, Fri Mar 2 13:10:43 2007 UTC revision 230 by ph10, Mon Sep 10 13:23:56 2007 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include <config.h>
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 67  input mode under Windows. */ Line 71  input mode under Windows. */
71  #endif  #endif
72    
73    
74  #define PCRE_SPY        /* For Win32 build, import data, not export */  /* We have to include pcre_internal.h because we need the internal info for
75    displaying the results of pcre_study() and we also need to know about the
76  /* We include pcre_internal.h because we need the internal info for displaying  internal macros, structures, and other internal data values; pcretest has
77  the results of pcre_study() and we also need to know about the internal  "inside information" compared to a program that strictly follows the PCRE API.
78  macros, structures, and other internal data values; pcretest has "inside  
79  information" compared to a program that strictly follows the PCRE API. */  Although pcre_internal.h does itself include pcre.h, we explicitly include it
80    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81    appropriately for an application, not for building PCRE. */
82    
83    #include "pcre.h"
84  #include "pcre_internal.h"  #include "pcre_internal.h"
85    
86  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to the data tables that PCRE uses. So as not to have to keep
# Line 114  Makefile. */ Line 121  Makefile. */
121  #include "pcreposix.h"  #include "pcreposix.h"
122  #endif  #endif
123    
124  /* It is also possible, for the benefit of the version imported into Exim, to  /* It is also possible, for the benefit of the version currently imported into
125  build pcretest without support for UTF8 (define NOUTF8), without the interface  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
126  to the DFA matcher (NODFA), and without the doublecheck of the old "info"  interface to the DFA matcher (NODFA), and without the doublecheck of the old
127  function (define NOINFOCHECK). */  "info" function (define NOINFOCHECK). In fact, we automatically cut out the
128    UTF8 support if PCRE is built without it. */
129    
130    #ifndef SUPPORT_UTF8
131    #ifndef NOUTF8
132    #define NOUTF8
133    #endif
134    #endif
135    
136    
137  /* Other parameters */  /* Other parameters */
# Line 142  static int callout_count; Line 156  static int callout_count;
156  static int callout_extra;  static int callout_extra;
157  static int callout_fail_count;  static int callout_fail_count;
158  static int callout_fail_id;  static int callout_fail_id;
159    static int debug_lengths;
160  static int first_callout;  static int first_callout;
161  static int locale_set = 0;  static int locale_set = 0;
162  static int show_malloc;  static int show_malloc;
# Line 649  return count; Line 664  return count;
664    
665    
666  /*************************************************  /*************************************************
667    *         Case-independent strncmp() function    *
668    *************************************************/
669    
670    /*
671    Arguments:
672      s         first string
673      t         second string
674      n         number of characters to compare
675    
676    Returns:    < 0, = 0, or > 0, according to the comparison
677    */
678    
679    static int
680    strncmpic(uschar *s, uschar *t, int n)
681    {
682    while (n--)
683      {
684      int c = tolower(*s++) - tolower(*t++);
685      if (c) return c;
686      }
687    return 0;
688    }
689    
690    
691    
692    /*************************************************
693  *         Check newline indicator                *  *         Check newline indicator                *
694  *************************************************/  *************************************************/
695    
696  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes, where
697  xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
698    no match.
699    
700  Arguments:  Arguments:
701    p           points after the leading '<'    p           points after the leading '<'
# Line 665  Returns: appropriate PCRE_NEWLINE_x Line 707  Returns: appropriate PCRE_NEWLINE_x
707  static int  static int
708  check_newline(uschar *p, FILE *f)  check_newline(uschar *p, FILE *f)
709  {  {
710  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
711  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
712  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
713  if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
714    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
715  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
716  return 0;  return 0;
717  }  }
# Line 840  while (argc > 1 && argv[op][0] == '-') Line 883  while (argc > 1 && argv[op][0] == '-')
883      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
884      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
885        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
886          (rc == -2)? "ANYCRLF" :
887        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
888      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
889      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
# Line 851  while (argc > 1 && argv[op][0] == '-') Line 895  while (argc > 1 && argv[op][0] == '-')
895      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %d\n", rc);
896      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
897      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
898      exit(0);      goto EXIT;
899      }      }
900    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(argv[op], "-help") == 0 ||
901             strcmp(argv[op], "--help") == 0)             strcmp(argv[op], "--help") == 0)
# Line 877  offsets = (int *)malloc(size_offsets_max Line 921  offsets = (int *)malloc(size_offsets_max
921  if (offsets == NULL)  if (offsets == NULL)
922    {    {
923    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
924      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
925    yield = 1;    yield = 1;
926    goto EXIT;    goto EXIT;
927    }    }
# Line 945  while (!done) Line 989  while (!done)
989    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
990    
991    use_utf8 = 0;    use_utf8 = 0;
992      debug_lengths = 1;
993    
994    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
995    if (extend_inputline(infile, buffer) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
# Line 1127  while (!done) Line 1172  while (!done)
1172        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1173        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1174        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1175          case 'Z': debug_lengths = 0; break;
1176        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1177        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1178    
# Line 1303  while (!done) Line 1349  while (!done)
1349        rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));        rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1350        rre->size = byteflip(rre->size, sizeof(rre->size));        rre->size = byteflip(rre->size, sizeof(rre->size));
1351        rre->options = byteflip(rre->options, sizeof(rre->options));        rre->options = byteflip(rre->options, sizeof(rre->options));
1352          rre->flags = byteflip(rre->flags, sizeof(rre->flags));
1353        rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));        rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1354        rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));        rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1355        rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));        rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
# Line 1328  while (!done) Line 1375  while (!done)
1375      if (do_debug)      if (do_debug)
1376        {        {
1377        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
1378        pcre_printint(re, outfile);        pcre_printint(re, outfile, debug_lengths);
1379        }        }
1380    
1381      if (do_showinfo)      if (do_showinfo)
# Line 1337  while (!done) Line 1384  while (!done)
1384  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1385        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1386  #endif  #endif
1387        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char, okpartial, jchanged,
1388            hascrorlf;
1389        int nameentrysize, namecount;        int nameentrysize, namecount;
1390        const uschar *nametable;        const uschar *nametable;
1391    
# Line 1350  while (!done) Line 1398  while (!done)
1398        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1399        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1400        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1401          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1402          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1403          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1404    
1405  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1406        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
# Line 1391  while (!done) Line 1442  while (!done)
1442            }            }
1443          }          }
1444    
1445        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1446        to fish it out via out back door */        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1447    
1448        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1449        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
          }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
1450    
1451        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1452          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
# Line 1419  while (!done) Line 1464  while (!done)
1464            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1465            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1466    
1467          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1468    
1469        switch (get_options & PCRE_NEWLINE_BITS)        switch (get_options & PCRE_NEWLINE_BITS)
1470          {          {
1471          case PCRE_NEWLINE_CR:          case PCRE_NEWLINE_CR:
# Line 1433  while (!done) Line 1480  while (!done)
1480          fprintf(outfile, "Forced newline sequence: CRLF\n");          fprintf(outfile, "Forced newline sequence: CRLF\n");
1481          break;          break;
1482    
1483            case PCRE_NEWLINE_ANYCRLF:
1484            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1485            break;
1486    
1487          case PCRE_NEWLINE_ANY:          case PCRE_NEWLINE_ANY:
1488          fprintf(outfile, "Forced newline sequence: ANY\n");          fprintf(outfile, "Forced newline sequence: ANY\n");
1489          break;          break;
# Line 1582  while (!done) Line 1633  while (!done)
1633    for (;;)    for (;;)
1634      {      {
1635      uschar *q;      uschar *q;
1636      uschar *bptr = dbuffer;      uschar *bptr;
1637      int *use_offsets = offsets;      int *use_offsets = offsets;
1638      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1639      int callout_data = 0;      int callout_data = 0;
# Line 1638  while (!done) Line 1689  while (!done)
1689      p = buffer;      p = buffer;
1690      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1691    
1692      q = dbuffer;      bptr = q = dbuffer;
1693      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1694        {        {
1695        int i = 0;        int i = 0;
# Line 1833  while (!done) Line 1884  while (!done)
1884            if (offsets == NULL)            if (offsets == NULL)
1885              {              {
1886              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1887                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1888              yield = 1;              yield = 1;
1889              goto EXIT;              goto EXIT;
1890              }              }
# Line 2202  while (!done) Line 2253  while (!done)
2253          }          }
2254    
2255        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2256        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2257        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2258        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2259        offset values to achieve this. We won't be at the end of the string -  
2260        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any" or
2261          "anycrlf". If the previous match was at the end of a line terminated by
2262          CRLF, an advance of one character just passes the \r, whereas we should
2263          prefer the longer newline sequence, as does the code in pcre_exec().
2264          Fudge the offset value to achieve this.
2265    
2266          Otherwise, in the case of UTF-8 matching, the advance must be one
2267          character, not one byte. */
2268    
2269        else        else
2270          {          {
2271          if (g_notempty != 0)          if (g_notempty != 0)
2272            {            {
2273            int onechar = 1;            int onechar = 1;
2274              unsigned int obits = ((real_pcre *)re)->options;
2275            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2276            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2277                {
2278                int d;
2279                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2280                obits = (d == '\r')? PCRE_NEWLINE_CR :
2281                        (d == '\n')? PCRE_NEWLINE_LF :
2282                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2283                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2284                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2285                }
2286              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2287                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2288                  &&
2289                  start_offset < len - 1 &&
2290                  bptr[start_offset] == '\r' &&
2291                  bptr[start_offset+1] == '\n')
2292                onechar++;
2293              else if (use_utf8)
2294              {              {
2295              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2296                {                {
# Line 2249  while (!done) Line 2325  while (!done)
2325        character. */        character. */
2326    
2327        g_notempty = 0;        g_notempty = 0;
2328    
2329        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2330          {          {
2331          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;

Legend:
Removed from v.96  
changed lines
  Added in v.230

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12