/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 96 by nigel, Fri Mar 2 13:10:43 2007 UTC revision 169 by ph10, Mon Jun 4 10:49:21 2007 UTC
# Line 67  input mode under Windows. */ Line 67  input mode under Windows. */
67  #endif  #endif
68    
69    
70  #define PCRE_SPY        /* For Win32 build, import data, not export */  /* We have to include pcre_internal.h because we need the internal info for
71    displaying the results of pcre_study() and we also need to know about the
72  /* We include pcre_internal.h because we need the internal info for displaying  internal macros, structures, and other internal data values; pcretest has
73  the results of pcre_study() and we also need to know about the internal  "inside information" compared to a program that strictly follows the PCRE API.
74  macros, structures, and other internal data values; pcretest has "inside  
75  information" compared to a program that strictly follows the PCRE API. */  Although pcre_internal.h does itself include pcre.h, we explicitly include it
76    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
77    appropriately for an application, not for building PCRE. */
78    
79    #include "pcre.h"
80  #include "pcre_internal.h"  #include "pcre_internal.h"
81    
82  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to the data tables that PCRE uses. So as not to have to keep
# Line 114  Makefile. */ Line 117  Makefile. */
117  #include "pcreposix.h"  #include "pcreposix.h"
118  #endif  #endif
119    
120  /* It is also possible, for the benefit of the version imported into Exim, to  /* It is also possible, for the benefit of the version currently imported into
121  build pcretest without support for UTF8 (define NOUTF8), without the interface  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
122  to the DFA matcher (NODFA), and without the doublecheck of the old "info"  interface to the DFA matcher (NODFA), and without the doublecheck of the old
123  function (define NOINFOCHECK). */  "info" function (define NOINFOCHECK). In fact, we automatically cut out the
124    UTF8 support if PCRE is built without it. */
125    
126    #ifndef SUPPORT_UTF8
127    #ifndef NOUTF8
128    #define NOUTF8
129    #endif
130    #endif
131    
132    
133  /* Other parameters */  /* Other parameters */
# Line 653  return count; Line 663  return count;
663  *************************************************/  *************************************************/
664    
665  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes, where
666  xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
667    no match.
668    
669  Arguments:  Arguments:
670    p           points after the leading '<'    p           points after the leading '<'
# Line 668  check_newline(uschar *p, FILE *f) Line 679  check_newline(uschar *p, FILE *f)
679  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
680  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
681  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
682    if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
683  if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
684  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
685  return 0;  return 0;
# Line 840  while (argc > 1 && argv[op][0] == '-') Line 852  while (argc > 1 && argv[op][0] == '-')
852      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
853      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
854        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
855          (rc == -2)? "ANYCRLF" :
856        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
857      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
858      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
# Line 851  while (argc > 1 && argv[op][0] == '-') Line 864  while (argc > 1 && argv[op][0] == '-')
864      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %d\n", rc);
865      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
866      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
867      exit(0);      goto EXIT;
868      }      }
869    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(argv[op], "-help") == 0 ||
870             strcmp(argv[op], "--help") == 0)             strcmp(argv[op], "--help") == 0)
# Line 877  offsets = (int *)malloc(size_offsets_max Line 890  offsets = (int *)malloc(size_offsets_max
890  if (offsets == NULL)  if (offsets == NULL)
891    {    {
892    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
893      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
894    yield = 1;    yield = 1;
895    goto EXIT;    goto EXIT;
896    }    }
# Line 937  while (!done) Line 950  while (!done)
950    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
951    int do_study = 0;    int do_study = 0;
952    int do_debug = debug;    int do_debug = debug;
953      int debug_lengths = 1;
954    int do_G = 0;    int do_G = 0;
955    int do_g = 0;    int do_g = 0;
956    int do_showinfo = showinfo;    int do_showinfo = showinfo;
# Line 1127  while (!done) Line 1141  while (!done)
1141        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1142        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1143        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1144          case 'Z': debug_lengths = 0; break;
1145        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1146        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1147    
# Line 1328  while (!done) Line 1343  while (!done)
1343      if (do_debug)      if (do_debug)
1344        {        {
1345        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
1346        pcre_printint(re, outfile);        pcre_printint(re, outfile, debug_lengths);
1347        }        }
1348    
1349      if (do_showinfo)      if (do_showinfo)
# Line 1337  while (!done) Line 1352  while (!done)
1352  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1353        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1354  #endif  #endif
1355        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char, okpartial, jchanged;
1356        int nameentrysize, namecount;        int nameentrysize, namecount;
1357        const uschar *nametable;        const uschar *nametable;
1358    
# Line 1350  while (!done) Line 1365  while (!done)
1365        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1366        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1367        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1368          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1369          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1370    
1371  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1372        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
# Line 1390  while (!done) Line 1407  while (!done)
1407            nametable += nameentrysize;            nametable += nameentrysize;
1408            }            }
1409          }          }
1410    
1411        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
       to fish it out via out back door */  
1412    
1413        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1414        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
          }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
1415    
1416        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1417          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
# Line 1418  while (!done) Line 1428  while (!done)
1428            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1429            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1430            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1431    
1432          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1433    
1434        switch (get_options & PCRE_NEWLINE_BITS)        switch (get_options & PCRE_NEWLINE_BITS)
1435          {          {
# Line 1433  while (!done) Line 1445  while (!done)
1445          fprintf(outfile, "Forced newline sequence: CRLF\n");          fprintf(outfile, "Forced newline sequence: CRLF\n");
1446          break;          break;
1447    
1448            case PCRE_NEWLINE_ANYCRLF:
1449            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1450            break;
1451    
1452          case PCRE_NEWLINE_ANY:          case PCRE_NEWLINE_ANY:
1453          fprintf(outfile, "Forced newline sequence: ANY\n");          fprintf(outfile, "Forced newline sequence: ANY\n");
1454          break;          break;
# Line 1582  while (!done) Line 1598  while (!done)
1598    for (;;)    for (;;)
1599      {      {
1600      uschar *q;      uschar *q;
1601      uschar *bptr = dbuffer;      uschar *bptr;
1602      int *use_offsets = offsets;      int *use_offsets = offsets;
1603      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1604      int callout_data = 0;      int callout_data = 0;
# Line 1638  while (!done) Line 1654  while (!done)
1654      p = buffer;      p = buffer;
1655      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1656    
1657      q = dbuffer;      bptr = q = dbuffer;
1658      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1659        {        {
1660        int i = 0;        int i = 0;
# Line 1833  while (!done) Line 1849  while (!done)
1849            if (offsets == NULL)            if (offsets == NULL)
1850              {              {
1851              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1852                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1853              yield = 1;              yield = 1;
1854              goto EXIT;              goto EXIT;
1855              }              }
# Line 2202  while (!done) Line 2218  while (!done)
2218          }          }
2219    
2220        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2221        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2222        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2223        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2224        offset values to achieve this. We won't be at the end of the string -  
2225        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any" or
2226          "anycrlf". If the previous match was at the end of a line terminated by
2227          CRLF, an advance of one character just passes the \r, whereas we should
2228          prefer the longer newline sequence, as does the code in pcre_exec().
2229          Fudge the offset value to achieve this.
2230    
2231          Otherwise, in the case of UTF-8 matching, the advance must be one
2232          character, not one byte. */
2233    
2234        else        else
2235          {          {
2236          if (g_notempty != 0)          if (g_notempty != 0)
2237            {            {
2238            int onechar = 1;            int onechar = 1;
2239              unsigned int obits = ((real_pcre *)re)->options;
2240            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2241            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2242                {
2243                int d;
2244                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2245                obits = (d == '\r')? PCRE_NEWLINE_CR :
2246                        (d == '\n')? PCRE_NEWLINE_LF :
2247                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2248                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2249                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2250                }
2251              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2252                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2253                  &&
2254                  start_offset < len - 1 &&
2255                  bptr[start_offset] == '\r' &&
2256                  bptr[start_offset+1] == '\n')
2257                onechar++;
2258              else if (use_utf8)
2259              {              {
2260              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2261                {                {
# Line 2249  while (!done) Line 2290  while (!done)
2290        character. */        character. */
2291    
2292        g_notempty = 0;        g_notempty = 0;
2293    
2294        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2295          {          {
2296          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;

Legend:
Removed from v.96  
changed lines
  Added in v.169

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12