/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 96 by nigel, Fri Mar 2 13:10:43 2007 UTC revision 146 by ph10, Thu Apr 5 09:17:28 2007 UTC
# Line 67  input mode under Windows. */ Line 67  input mode under Windows. */
67  #endif  #endif
68    
69    
70  #define PCRE_SPY        /* For Win32 build, import data, not export */  /* We have to include pcre_internal.h because we need the internal info for
71    displaying the results of pcre_study() and we also need to know about the
72  /* We include pcre_internal.h because we need the internal info for displaying  internal macros, structures, and other internal data values; pcretest has
73  the results of pcre_study() and we also need to know about the internal  "inside information" compared to a program that strictly follows the PCRE API.
74  macros, structures, and other internal data values; pcretest has "inside  
75  information" compared to a program that strictly follows the PCRE API. */  Although pcre_internal.h does itself include pcre.h, we explicitly include it
76    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
77    appropriately for an application, not for building PCRE. */
78    
79    #include "pcre.h"
80  #include "pcre_internal.h"  #include "pcre_internal.h"
81    
82  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to the data tables that PCRE uses. So as not to have to keep
# Line 114  Makefile. */ Line 117  Makefile. */
117  #include "pcreposix.h"  #include "pcreposix.h"
118  #endif  #endif
119    
120  /* It is also possible, for the benefit of the version imported into Exim, to  /* It is also possible, for the benefit of the version currently imported into
121  build pcretest without support for UTF8 (define NOUTF8), without the interface  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
122  to the DFA matcher (NODFA), and without the doublecheck of the old "info"  interface to the DFA matcher (NODFA), and without the doublecheck of the old
123  function (define NOINFOCHECK). */  "info" function (define NOINFOCHECK). In fact, we automatically cut out the
124    UTF8 support if PCRE is built without it. */
125    
126    #ifndef SUPPORT_UTF8
127    #ifndef NOUTF8
128    #define NOUTF8
129    #endif
130    #endif
131    
132    
133  /* Other parameters */  /* Other parameters */
# Line 851  while (argc > 1 && argv[op][0] == '-') Line 861  while (argc > 1 && argv[op][0] == '-')
861      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %d\n", rc);
862      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
863      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
864      exit(0);      goto EXIT;
865      }      }
866    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(argv[op], "-help") == 0 ||
867             strcmp(argv[op], "--help") == 0)             strcmp(argv[op], "--help") == 0)
# Line 937  while (!done) Line 947  while (!done)
947    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
948    int do_study = 0;    int do_study = 0;
949    int do_debug = debug;    int do_debug = debug;
950      int debug_lengths = 1;
951    int do_G = 0;    int do_G = 0;
952    int do_g = 0;    int do_g = 0;
953    int do_showinfo = showinfo;    int do_showinfo = showinfo;
# Line 1127  while (!done) Line 1138  while (!done)
1138        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1139        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1140        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1141          case 'Z': debug_lengths = 0; break;
1142        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1143        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1144    
# Line 1328  while (!done) Line 1340  while (!done)
1340      if (do_debug)      if (do_debug)
1341        {        {
1342        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
1343        pcre_printint(re, outfile);        pcre_printint(re, outfile, debug_lengths);
1344        }        }
1345    
1346      if (do_showinfo)      if (do_showinfo)
# Line 2202  while (!done) Line 2214  while (!done)
2214          }          }
2215    
2216        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2217        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2218        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2219        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2220        offset values to achieve this. We won't be at the end of the string -  
2221        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any".
2222          If the previous match was at the end of a line terminated by CRLF, an
2223          advance of one character just passes the \r, whereas we should prefer the
2224          longer newline sequence, as does the code in pcre_exec(). Fudge the
2225          offset value to achieve this.
2226    
2227          Otherwise, in the case of UTF-8 matching, the advance must be one
2228          character, not one byte. */
2229    
2230        else        else
2231          {          {
2232          if (g_notempty != 0)          if (g_notempty != 0)
2233            {            {
2234            int onechar = 1;            int onechar = 1;
2235              unsigned int obits = ((real_pcre *)re)->options;
2236            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2237            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2238                {
2239                int d;
2240                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2241                obits = (d == '\r')? PCRE_NEWLINE_CR :
2242                        (d == '\n')? PCRE_NEWLINE_LF :
2243                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2244                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2245                }
2246              if ((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY &&
2247                  start_offset < len - 1 &&
2248                  bptr[start_offset] == '\r' &&
2249                  bptr[start_offset+1] == '\n')
2250                onechar++;
2251              else if (use_utf8)
2252              {              {
2253              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2254                {                {
# Line 2249  while (!done) Line 2283  while (!done)
2283        character. */        character. */
2284    
2285        g_notempty = 0;        g_notempty = 0;
2286    
2287        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2288          {          {
2289          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;

Legend:
Removed from v.96  
changed lines
  Added in v.146

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12