/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 96 by nigel, Fri Mar 2 13:10:43 2007 UTC revision 143 by ph10, Mon Apr 2 10:08:14 2007 UTC
# Line 114  Makefile. */ Line 114  Makefile. */
114  #include "pcreposix.h"  #include "pcreposix.h"
115  #endif  #endif
116    
117  /* It is also possible, for the benefit of the version imported into Exim, to  /* It is also possible, for the benefit of the version currently imported into
118  build pcretest without support for UTF8 (define NOUTF8), without the interface  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
119  to the DFA matcher (NODFA), and without the doublecheck of the old "info"  interface to the DFA matcher (NODFA), and without the doublecheck of the old
120  function (define NOINFOCHECK). */  "info" function (define NOINFOCHECK). In fact, we automatically cut out the
121    UTF8 support if PCRE is built without it. */
122    
123    #ifndef SUPPORT_UTF8
124    #ifndef NOUTF8
125    #define NOUTF8
126    #endif
127    #endif
128    
129    
130  /* Other parameters */  /* Other parameters */
# Line 851  while (argc > 1 && argv[op][0] == '-') Line 858  while (argc > 1 && argv[op][0] == '-')
858      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %d\n", rc);
859      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
860      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
861      exit(0);      goto EXIT;
862      }      }
863    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(argv[op], "-help") == 0 ||
864             strcmp(argv[op], "--help") == 0)             strcmp(argv[op], "--help") == 0)
# Line 937  while (!done) Line 944  while (!done)
944    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
945    int do_study = 0;    int do_study = 0;
946    int do_debug = debug;    int do_debug = debug;
947      int debug_lengths = 1;
948    int do_G = 0;    int do_G = 0;
949    int do_g = 0;    int do_g = 0;
950    int do_showinfo = showinfo;    int do_showinfo = showinfo;
# Line 1127  while (!done) Line 1135  while (!done)
1135        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1136        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1137        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1138          case 'Z': debug_lengths = 0; break;
1139        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1140        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1141    
# Line 1328  while (!done) Line 1337  while (!done)
1337      if (do_debug)      if (do_debug)
1338        {        {
1339        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
1340        pcre_printint(re, outfile);        pcre_printint(re, outfile, debug_lengths);
1341        }        }
1342    
1343      if (do_showinfo)      if (do_showinfo)
# Line 2202  while (!done) Line 2211  while (!done)
2211          }          }
2212    
2213        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2214        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2215        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2216        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2217        offset values to achieve this. We won't be at the end of the string -  
2218        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any".
2219          If the previous match was at the end of a line terminated by CRLF, an
2220          advance of one character just passes the \r, whereas we should prefer the
2221          longer newline sequence, as does the code in pcre_exec(). Fudge the
2222          offset value to achieve this.
2223    
2224          Otherwise, in the case of UTF-8 matching, the advance must be one
2225          character, not one byte. */
2226    
2227        else        else
2228          {          {
# Line 2214  while (!done) Line 2230  while (!done)
2230            {            {
2231            int onechar = 1;            int onechar = 1;
2232            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2233            if (use_utf8)            if ((((real_pcre *)re)->options & PCRE_NEWLINE_BITS) ==
2234                      PCRE_NEWLINE_ANY &&
2235                  start_offset < len - 1 &&
2236                  bptr[start_offset] == '\r' &&
2237                  bptr[start_offset+1] == '\n')
2238                onechar++;
2239              else if (use_utf8)
2240              {              {
2241              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2242                {                {
# Line 2249  while (!done) Line 2271  while (!done)
2271        character. */        character. */
2272    
2273        g_notempty = 0;        g_notempty = 0;
2274    
2275        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2276          {          {
2277          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;

Legend:
Removed from v.96  
changed lines
  Added in v.143

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12