/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 96 by nigel, Fri Mar 2 13:10:43 2007 UTC revision 211 by ph10, Thu Aug 9 09:52:43 2007 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include <config.h>
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 67  input mode under Windows. */ Line 71  input mode under Windows. */
71  #endif  #endif
72    
73    
74  #define PCRE_SPY        /* For Win32 build, import data, not export */  /* We have to include pcre_internal.h because we need the internal info for
75    displaying the results of pcre_study() and we also need to know about the
76  /* We include pcre_internal.h because we need the internal info for displaying  internal macros, structures, and other internal data values; pcretest has
77  the results of pcre_study() and we also need to know about the internal  "inside information" compared to a program that strictly follows the PCRE API.
78  macros, structures, and other internal data values; pcretest has "inside  
79  information" compared to a program that strictly follows the PCRE API. */  Although pcre_internal.h does itself include pcre.h, we explicitly include it
80    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81    appropriately for an application, not for building PCRE. */
82    
83    #include "pcre.h"
84  #include "pcre_internal.h"  #include "pcre_internal.h"
85    
86  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to the data tables that PCRE uses. So as not to have to keep
# Line 114  Makefile. */ Line 121  Makefile. */
121  #include "pcreposix.h"  #include "pcreposix.h"
122  #endif  #endif
123    
124  /* It is also possible, for the benefit of the version imported into Exim, to  /* It is also possible, for the benefit of the version currently imported into
125  build pcretest without support for UTF8 (define NOUTF8), without the interface  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
126  to the DFA matcher (NODFA), and without the doublecheck of the old "info"  interface to the DFA matcher (NODFA), and without the doublecheck of the old
127  function (define NOINFOCHECK). */  "info" function (define NOINFOCHECK). In fact, we automatically cut out the
128    UTF8 support if PCRE is built without it. */
129    
130    #ifndef SUPPORT_UTF8
131    #ifndef NOUTF8
132    #define NOUTF8
133    #endif
134    #endif
135    
136    
137  /* Other parameters */  /* Other parameters */
# Line 142  static int callout_count; Line 156  static int callout_count;
156  static int callout_extra;  static int callout_extra;
157  static int callout_fail_count;  static int callout_fail_count;
158  static int callout_fail_id;  static int callout_fail_id;
159    static int debug_lengths;
160  static int first_callout;  static int first_callout;
161  static int locale_set = 0;  static int locale_set = 0;
162  static int show_malloc;  static int show_malloc;
# Line 653  return count; Line 668  return count;
668  *************************************************/  *************************************************/
669    
670  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes, where
671  xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
672    no match.
673    
674  Arguments:  Arguments:
675    p           points after the leading '<'    p           points after the leading '<'
# Line 668  check_newline(uschar *p, FILE *f) Line 684  check_newline(uschar *p, FILE *f)
684  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
685  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
686  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
687    if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
688  if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
689  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
690  return 0;  return 0;
# Line 840  while (argc > 1 && argv[op][0] == '-') Line 857  while (argc > 1 && argv[op][0] == '-')
857      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
858      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
859        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
860          (rc == -2)? "ANYCRLF" :
861        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
862      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
863      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
# Line 851  while (argc > 1 && argv[op][0] == '-') Line 869  while (argc > 1 && argv[op][0] == '-')
869      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %d\n", rc);
870      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
871      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
872      exit(0);      goto EXIT;
873      }      }
874    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(argv[op], "-help") == 0 ||
875             strcmp(argv[op], "--help") == 0)             strcmp(argv[op], "--help") == 0)
# Line 877  offsets = (int *)malloc(size_offsets_max Line 895  offsets = (int *)malloc(size_offsets_max
895  if (offsets == NULL)  if (offsets == NULL)
896    {    {
897    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
898      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
899    yield = 1;    yield = 1;
900    goto EXIT;    goto EXIT;
901    }    }
# Line 945  while (!done) Line 963  while (!done)
963    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
964    
965    use_utf8 = 0;    use_utf8 = 0;
966      debug_lengths = 1;
967    
968    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
969    if (extend_inputline(infile, buffer) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
# Line 1127  while (!done) Line 1146  while (!done)
1146        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1147        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1148        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1149          case 'Z': debug_lengths = 0; break;
1150        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1151        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1152    
# Line 1328  while (!done) Line 1348  while (!done)
1348      if (do_debug)      if (do_debug)
1349        {        {
1350        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
1351        pcre_printint(re, outfile);        pcre_printint(re, outfile, debug_lengths);
1352        }        }
1353    
1354      if (do_showinfo)      if (do_showinfo)
# Line 1337  while (!done) Line 1357  while (!done)
1357  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1358        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1359  #endif  #endif
1360        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char, okpartial, jchanged;
1361        int nameentrysize, namecount;        int nameentrysize, namecount;
1362        const uschar *nametable;        const uschar *nametable;
1363    
# Line 1350  while (!done) Line 1370  while (!done)
1370        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1371        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1372        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1373          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1374          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1375    
1376  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1377        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
# Line 1391  while (!done) Line 1413  while (!done)
1413            }            }
1414          }          }
1415    
1416        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
       to fish it out via out back door */  
1417    
1418        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1419        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
          }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
1420    
1421        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1422          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
# Line 1419  while (!done) Line 1434  while (!done)
1434            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1435            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1436    
1437          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1438    
1439        switch (get_options & PCRE_NEWLINE_BITS)        switch (get_options & PCRE_NEWLINE_BITS)
1440          {          {
1441          case PCRE_NEWLINE_CR:          case PCRE_NEWLINE_CR:
# Line 1433  while (!done) Line 1450  while (!done)
1450          fprintf(outfile, "Forced newline sequence: CRLF\n");          fprintf(outfile, "Forced newline sequence: CRLF\n");
1451          break;          break;
1452    
1453            case PCRE_NEWLINE_ANYCRLF:
1454            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1455            break;
1456    
1457          case PCRE_NEWLINE_ANY:          case PCRE_NEWLINE_ANY:
1458          fprintf(outfile, "Forced newline sequence: ANY\n");          fprintf(outfile, "Forced newline sequence: ANY\n");
1459          break;          break;
# Line 1582  while (!done) Line 1603  while (!done)
1603    for (;;)    for (;;)
1604      {      {
1605      uschar *q;      uschar *q;
1606      uschar *bptr = dbuffer;      uschar *bptr;
1607      int *use_offsets = offsets;      int *use_offsets = offsets;
1608      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1609      int callout_data = 0;      int callout_data = 0;
# Line 1638  while (!done) Line 1659  while (!done)
1659      p = buffer;      p = buffer;
1660      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1661    
1662      q = dbuffer;      bptr = q = dbuffer;
1663      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1664        {        {
1665        int i = 0;        int i = 0;
# Line 1833  while (!done) Line 1854  while (!done)
1854            if (offsets == NULL)            if (offsets == NULL)
1855              {              {
1856              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1857                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1858              yield = 1;              yield = 1;
1859              goto EXIT;              goto EXIT;
1860              }              }
# Line 2202  while (!done) Line 2223  while (!done)
2223          }          }
2224    
2225        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2226        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2227        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2228        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2229        offset values to achieve this. We won't be at the end of the string -  
2230        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any" or
2231          "anycrlf". If the previous match was at the end of a line terminated by
2232          CRLF, an advance of one character just passes the \r, whereas we should
2233          prefer the longer newline sequence, as does the code in pcre_exec().
2234          Fudge the offset value to achieve this.
2235    
2236          Otherwise, in the case of UTF-8 matching, the advance must be one
2237          character, not one byte. */
2238    
2239        else        else
2240          {          {
2241          if (g_notempty != 0)          if (g_notempty != 0)
2242            {            {
2243            int onechar = 1;            int onechar = 1;
2244              unsigned int obits = ((real_pcre *)re)->options;
2245            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2246            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2247                {
2248                int d;
2249                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2250                obits = (d == '\r')? PCRE_NEWLINE_CR :
2251                        (d == '\n')? PCRE_NEWLINE_LF :
2252                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2253                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2254                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2255                }
2256              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2257                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2258                  &&
2259                  start_offset < len - 1 &&
2260                  bptr[start_offset] == '\r' &&
2261                  bptr[start_offset+1] == '\n')
2262                onechar++;
2263              else if (use_utf8)
2264              {              {
2265              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2266                {                {
# Line 2249  while (!done) Line 2295  while (!done)
2295        character. */        character. */
2296    
2297        g_notempty = 0;        g_notempty = 0;
2298    
2299        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2300          {          {
2301          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;

Legend:
Removed from v.96  
changed lines
  Added in v.211

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12