/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 116 by ph10, Fri Mar 9 15:23:02 2007 UTC revision 149 by ph10, Mon Apr 16 15:28:08 2007 UTC
# Line 67  input mode under Windows. */ Line 67  input mode under Windows. */
67  #endif  #endif
68    
69    
70  #define PCRE_SPY        /* For Win32 build, import data, not export */  /* We have to include pcre_internal.h because we need the internal info for
71    displaying the results of pcre_study() and we also need to know about the
72  /* We include pcre_internal.h because we need the internal info for displaying  internal macros, structures, and other internal data values; pcretest has
73  the results of pcre_study() and we also need to know about the internal  "inside information" compared to a program that strictly follows the PCRE API.
74  macros, structures, and other internal data values; pcretest has "inside  
75  information" compared to a program that strictly follows the PCRE API. */  Although pcre_internal.h does itself include pcre.h, we explicitly include it
76    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
77    appropriately for an application, not for building PCRE. */
78    
79    #include "pcre.h"
80  #include "pcre_internal.h"  #include "pcre_internal.h"
81    
82  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to the data tables that PCRE uses. So as not to have to keep
# Line 660  return count; Line 663  return count;
663  *************************************************/  *************************************************/
664    
665  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes, where
666  xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
667    no match.
668    
669  Arguments:  Arguments:
670    p           points after the leading '<'    p           points after the leading '<'
# Line 675  check_newline(uschar *p, FILE *f) Line 679  check_newline(uschar *p, FILE *f)
679  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
680  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
681  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
682    if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
683  if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
684  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
685  return 0;  return 0;
# Line 847  while (argc > 1 && argv[op][0] == '-') Line 852  while (argc > 1 && argv[op][0] == '-')
852      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
853      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
854        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
855          (rc == -2)? "ANYCRLF" :
856        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
857      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
858      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
# Line 858  while (argc > 1 && argv[op][0] == '-') Line 864  while (argc > 1 && argv[op][0] == '-')
864      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %d\n", rc);
865      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
866      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
867      exit(0);      goto EXIT;
868      }      }
869    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(argv[op], "-help") == 0 ||
870             strcmp(argv[op], "--help") == 0)             strcmp(argv[op], "--help") == 0)
# Line 944  while (!done) Line 950  while (!done)
950    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
951    int do_study = 0;    int do_study = 0;
952    int do_debug = debug;    int do_debug = debug;
953    int debug_lengths = 1;    int debug_lengths = 1;
954    int do_G = 0;    int do_G = 0;
955    int do_g = 0;    int do_g = 0;
956    int do_showinfo = showinfo;    int do_showinfo = showinfo;
# Line 1135  while (!done) Line 1141  while (!done)
1141        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1142        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1143        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1144        case 'Z': debug_lengths = 0;        case 'Z': debug_lengths = 0; break;
1145        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1146        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1147    
# Line 1442  while (!done) Line 1448  while (!done)
1448          fprintf(outfile, "Forced newline sequence: CRLF\n");          fprintf(outfile, "Forced newline sequence: CRLF\n");
1449          break;          break;
1450    
1451            case PCRE_NEWLINE_ANYCRLF:
1452            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1453            break;
1454    
1455          case PCRE_NEWLINE_ANY:          case PCRE_NEWLINE_ANY:
1456          fprintf(outfile, "Forced newline sequence: ANY\n");          fprintf(outfile, "Forced newline sequence: ANY\n");
1457          break;          break;
# Line 1591  while (!done) Line 1601  while (!done)
1601    for (;;)    for (;;)
1602      {      {
1603      uschar *q;      uschar *q;
1604      uschar *bptr = dbuffer;      uschar *bptr;
1605      int *use_offsets = offsets;      int *use_offsets = offsets;
1606      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1607      int callout_data = 0;      int callout_data = 0;
# Line 1647  while (!done) Line 1657  while (!done)
1657      p = buffer;      p = buffer;
1658      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1659    
1660      q = dbuffer;      bptr = q = dbuffer;
1661      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1662        {        {
1663        int i = 0;        int i = 0;
# Line 2211  while (!done) Line 2221  while (!done)
2221          }          }
2222    
2223        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2224        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2225        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2226        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2227        offset values to achieve this. We won't be at the end of the string -  
2228        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any" or
2229          "anycrlf". If the previous match was at the end of a line terminated by
2230          CRLF, an advance of one character just passes the \r, whereas we should
2231          prefer the longer newline sequence, as does the code in pcre_exec().
2232          Fudge the offset value to achieve this.
2233    
2234          Otherwise, in the case of UTF-8 matching, the advance must be one
2235          character, not one byte. */
2236    
2237        else        else
2238          {          {
2239          if (g_notempty != 0)          if (g_notempty != 0)
2240            {            {
2241            int onechar = 1;            int onechar = 1;
2242              unsigned int obits = ((real_pcre *)re)->options;
2243            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2244            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2245                {
2246                int d;
2247                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2248                obits = (d == '\r')? PCRE_NEWLINE_CR :
2249                        (d == '\n')? PCRE_NEWLINE_LF :
2250                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2251                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2252                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2253                }
2254              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2255                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2256                  &&
2257                  start_offset < len - 1 &&
2258                  bptr[start_offset] == '\r' &&
2259                  bptr[start_offset+1] == '\n')
2260                onechar++;
2261              else if (use_utf8)
2262              {              {
2263              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2264                {                {
# Line 2258  while (!done) Line 2293  while (!done)
2293        character. */        character. */
2294    
2295        g_notempty = 0;        g_notempty = 0;
2296    
2297        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2298          {          {
2299          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;

Legend:
Removed from v.116  
changed lines
  Added in v.149

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12