/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 93 by nigel, Sat Feb 24 21:41:42 2007 UTC revision 199 by ph10, Tue Jul 31 14:39:09 2007 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include <config.h>
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 67  input mode under Windows. */ Line 71  input mode under Windows. */
71  #endif  #endif
72    
73    
74  #define PCRE_SPY        /* For Win32 build, import data, not export */  /* We have to include pcre_internal.h because we need the internal info for
75    displaying the results of pcre_study() and we also need to know about the
76  /* We include pcre_internal.h because we need the internal info for displaying  internal macros, structures, and other internal data values; pcretest has
77  the results of pcre_study() and we also need to know about the internal  "inside information" compared to a program that strictly follows the PCRE API.
78  macros, structures, and other internal data values; pcretest has "inside  
79  information" compared to a program that strictly follows the PCRE API. */  Although pcre_internal.h does itself include pcre.h, we explicitly include it
80    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81    appropriately for an application, not for building PCRE. */
82    
83    #include "pcre.h"
84  #include "pcre_internal.h"  #include "pcre_internal.h"
85    
86  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to the data tables that PCRE uses. So as not to have to keep
# Line 114  Makefile. */ Line 121  Makefile. */
121  #include "pcreposix.h"  #include "pcreposix.h"
122  #endif  #endif
123    
124  /* It is also possible, for the benefit of the version imported into Exim, to  /* It is also possible, for the benefit of the version currently imported into
125  build pcretest without support for UTF8 (define NOUTF8), without the interface  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
126  to the DFA matcher (NODFA), and without the doublecheck of the old "info"  interface to the DFA matcher (NODFA), and without the doublecheck of the old
127  function (define NOINFOCHECK). */  "info" function (define NOINFOCHECK). In fact, we automatically cut out the
128    UTF8 support if PCRE is built without it. */
129    
130    #ifndef SUPPORT_UTF8
131    #ifndef NOUTF8
132    #define NOUTF8
133    #endif
134    #endif
135    
136    
137  /* Other parameters */  /* Other parameters */
# Line 653  return count; Line 667  return count;
667  *************************************************/  *************************************************/
668    
669  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes, where
670  xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
671    no match.
672    
673  Arguments:  Arguments:
674    p           points after the leading '<'    p           points after the leading '<'
# Line 668  check_newline(uschar *p, FILE *f) Line 683  check_newline(uschar *p, FILE *f)
683  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
684  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
685  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
686    if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
687  if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
688  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
689  return 0;  return 0;
# Line 840  while (argc > 1 && argv[op][0] == '-') Line 856  while (argc > 1 && argv[op][0] == '-')
856      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
857      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
858        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
859          (rc == -2)? "ANYCRLF" :
860        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
861      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
862      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
# Line 851  while (argc > 1 && argv[op][0] == '-') Line 868  while (argc > 1 && argv[op][0] == '-')
868      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %d\n", rc);
869      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
870      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
871      exit(0);      goto EXIT;
872      }      }
873    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(argv[op], "-help") == 0 ||
874             strcmp(argv[op], "--help") == 0)             strcmp(argv[op], "--help") == 0)
# Line 877  offsets = (int *)malloc(size_offsets_max Line 894  offsets = (int *)malloc(size_offsets_max
894  if (offsets == NULL)  if (offsets == NULL)
895    {    {
896    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
897      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
898    yield = 1;    yield = 1;
899    goto EXIT;    goto EXIT;
900    }    }
# Line 937  while (!done) Line 954  while (!done)
954    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
955    int do_study = 0;    int do_study = 0;
956    int do_debug = debug;    int do_debug = debug;
957      int debug_lengths = 1;
958    int do_G = 0;    int do_G = 0;
959    int do_g = 0;    int do_g = 0;
960    int do_showinfo = showinfo;    int do_showinfo = showinfo;
# Line 1127  while (!done) Line 1145  while (!done)
1145        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1146        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1147        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1148          case 'Z': debug_lengths = 0; break;
1149        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1150        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1151    
# Line 1328  while (!done) Line 1347  while (!done)
1347      if (do_debug)      if (do_debug)
1348        {        {
1349        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
1350        pcre_printint(re, outfile);        pcre_printint(re, outfile, debug_lengths);
1351        }        }
1352    
1353      if (do_showinfo)      if (do_showinfo)
# Line 1337  while (!done) Line 1356  while (!done)
1356  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1357        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1358  #endif  #endif
1359        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char, okpartial, jchanged;
1360        int nameentrysize, namecount;        int nameentrysize, namecount;
1361        const uschar *nametable;        const uschar *nametable;
1362    
# Line 1350  while (!done) Line 1369  while (!done)
1369        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1370        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1371        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1372          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1373          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1374    
1375  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1376        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
# Line 1391  while (!done) Line 1412  while (!done)
1412            }            }
1413          }          }
1414    
1415        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
       to fish it out via out back door */  
1416    
1417        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1418        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
          }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
1419    
1420        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1421          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
# Line 1419  while (!done) Line 1433  while (!done)
1433            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1434            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1435    
1436          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1437    
1438        switch (get_options & PCRE_NEWLINE_BITS)        switch (get_options & PCRE_NEWLINE_BITS)
1439          {          {
1440          case PCRE_NEWLINE_CR:          case PCRE_NEWLINE_CR:
# Line 1433  while (!done) Line 1449  while (!done)
1449          fprintf(outfile, "Forced newline sequence: CRLF\n");          fprintf(outfile, "Forced newline sequence: CRLF\n");
1450          break;          break;
1451    
1452            case PCRE_NEWLINE_ANYCRLF:
1453            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1454            break;
1455    
1456          case PCRE_NEWLINE_ANY:          case PCRE_NEWLINE_ANY:
1457          fprintf(outfile, "Forced newline sequence: ANY\n");          fprintf(outfile, "Forced newline sequence: ANY\n");
1458          break;          break;
# Line 1582  while (!done) Line 1602  while (!done)
1602    for (;;)    for (;;)
1603      {      {
1604      uschar *q;      uschar *q;
1605      uschar *bptr = dbuffer;      uschar *bptr;
1606      int *use_offsets = offsets;      int *use_offsets = offsets;
1607      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1608      int callout_data = 0;      int callout_data = 0;
# Line 1638  while (!done) Line 1658  while (!done)
1658      p = buffer;      p = buffer;
1659      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1660    
1661      q = dbuffer;      bptr = q = dbuffer;
1662      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1663        {        {
1664        int i = 0;        int i = 0;
# Line 1833  while (!done) Line 1853  while (!done)
1853            if (offsets == NULL)            if (offsets == NULL)
1854              {              {
1855              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1856                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1857              yield = 1;              yield = 1;
1858              goto EXIT;              goto EXIT;
1859              }              }
# Line 2202  while (!done) Line 2222  while (!done)
2222          }          }
2223    
2224        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2225        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2226        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2227        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2228        offset values to achieve this. We won't be at the end of the string -  
2229        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any" or
2230          "anycrlf". If the previous match was at the end of a line terminated by
2231          CRLF, an advance of one character just passes the \r, whereas we should
2232          prefer the longer newline sequence, as does the code in pcre_exec().
2233          Fudge the offset value to achieve this.
2234    
2235          Otherwise, in the case of UTF-8 matching, the advance must be one
2236          character, not one byte. */
2237    
2238        else        else
2239          {          {
2240          if (g_notempty != 0)          if (g_notempty != 0)
2241            {            {
2242            int onechar = 1;            int onechar = 1;
2243              unsigned int obits = ((real_pcre *)re)->options;
2244            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2245            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2246                {
2247                int d;
2248                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2249                obits = (d == '\r')? PCRE_NEWLINE_CR :
2250                        (d == '\n')? PCRE_NEWLINE_LF :
2251                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2252                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2253                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2254                }
2255              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2256                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2257                  &&
2258                  start_offset < len - 1 &&
2259                  bptr[start_offset] == '\r' &&
2260                  bptr[start_offset+1] == '\n')
2261                onechar++;
2262              else if (use_utf8)
2263              {              {
2264              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2265                {                {
# Line 2249  while (!done) Line 2294  while (!done)
2294        character. */        character. */
2295    
2296        g_notempty = 0;        g_notempty = 0;
2297    
2298        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2299          {          {
2300          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;

Legend:
Removed from v.93  
changed lines
  Added in v.199

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12