/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 145 by ph10, Wed Apr 4 14:06:52 2007 UTC revision 274 by ph10, Tue Nov 20 10:05:23 2007 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 90  symbols to prevent clashes. */ Line 94  symbols to prevent clashes. */
94  #define _pcre_utf8_table4      utf8_table4  #define _pcre_utf8_table4      utf8_table4
95  #define _pcre_utt              utt  #define _pcre_utt              utt
96  #define _pcre_utt_size         utt_size  #define _pcre_utt_size         utt_size
97    #define _pcre_utt_names        utt_names
98  #define _pcre_OP_lengths       OP_lengths  #define _pcre_OP_lengths       OP_lengths
99    
100  #include "pcre_tables.c"  #include "pcre_tables.c"
# Line 152  static int callout_count; Line 157  static int callout_count;
157  static int callout_extra;  static int callout_extra;
158  static int callout_fail_count;  static int callout_fail_count;
159  static int callout_fail_id;  static int callout_fail_id;
160    static int debug_lengths;
161  static int first_callout;  static int first_callout;
162  static int locale_set = 0;  static int locale_set = 0;
163  static int show_malloc;  static int show_malloc;
# Line 659  return count; Line 665  return count;
665    
666    
667  /*************************************************  /*************************************************
668    *         Case-independent strncmp() function    *
669    *************************************************/
670    
671    /*
672    Arguments:
673      s         first string
674      t         second string
675      n         number of characters to compare
676    
677    Returns:    < 0, = 0, or > 0, according to the comparison
678    */
679    
680    static int
681    strncmpic(uschar *s, uschar *t, int n)
682    {
683    while (n--)
684      {
685      int c = tolower(*s++) - tolower(*t++);
686      if (c) return c;
687      }
688    return 0;
689    }
690    
691    
692    
693    /*************************************************
694  *         Check newline indicator                *  *         Check newline indicator                *
695  *************************************************/  *************************************************/
696    
697  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes, where
698  xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
699    no match.
700    
701  Arguments:  Arguments:
702    p           points after the leading '<'    p           points after the leading '<'
# Line 675  Returns: appropriate PCRE_NEWLINE_x Line 708  Returns: appropriate PCRE_NEWLINE_x
708  static int  static int
709  check_newline(uschar *p, FILE *f)  check_newline(uschar *p, FILE *f)
710  {  {
711  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
712  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
713  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
714  if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
715    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
716    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
717    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
718  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
719  return 0;  return 0;
720  }  }
# Line 850  while (argc > 1 && argv[op][0] == '-') Line 886  while (argc > 1 && argv[op][0] == '-')
886      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
887      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
888        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
889          (rc == -2)? "ANYCRLF" :
890        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
891        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
892        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
893                                         "all Unicode newlines");
894      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
895      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
896      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
# Line 887  offsets = (int *)malloc(size_offsets_max Line 927  offsets = (int *)malloc(size_offsets_max
927  if (offsets == NULL)  if (offsets == NULL)
928    {    {
929    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
930      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
931    yield = 1;    yield = 1;
932    goto EXIT;    goto EXIT;
933    }    }
# Line 947  while (!done) Line 987  while (!done)
987    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
988    int do_study = 0;    int do_study = 0;
989    int do_debug = debug;    int do_debug = debug;
   int debug_lengths = 1;  
990    int do_G = 0;    int do_G = 0;
991    int do_g = 0;    int do_g = 0;
992    int do_showinfo = showinfo;    int do_showinfo = showinfo;
# Line 956  while (!done) Line 995  while (!done)
995    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
996    
997    use_utf8 = 0;    use_utf8 = 0;
998      debug_lengths = 1;
999    
1000    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
1001    if (extend_inputline(infile, buffer) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
# Line 1058  while (!done) Line 1098  while (!done)
1098    
1099    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1100      {      {
1101      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1102      goto SKIP_DATA;      goto SKIP_DATA;
1103      }      }
1104    
# Line 1312  while (!done) Line 1352  while (!done)
1352      if (do_flip)      if (do_flip)
1353        {        {
1354        real_pcre *rre = (real_pcre *)re;        real_pcre *rre = (real_pcre *)re;
1355        rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));        rre->magic_number =
1356            byteflip(rre->magic_number, sizeof(rre->magic_number));
1357        rre->size = byteflip(rre->size, sizeof(rre->size));        rre->size = byteflip(rre->size, sizeof(rre->size));
1358        rre->options = byteflip(rre->options, sizeof(rre->options));        rre->options = byteflip(rre->options, sizeof(rre->options));
1359        rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));        rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1360        rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));        rre->top_bracket =
1361        rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1362        rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));        rre->top_backref =
1363        rre->name_table_offset = byteflip(rre->name_table_offset,          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1364          rre->first_byte =
1365            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1366          rre->req_byte =
1367            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1368          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1369          sizeof(rre->name_table_offset));          sizeof(rre->name_table_offset));
1370        rre->name_entry_size = byteflip(rre->name_entry_size,        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1371          sizeof(rre->name_entry_size));          sizeof(rre->name_entry_size));
1372        rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));        rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1373            sizeof(rre->name_count));
1374    
1375        if (extra != NULL)        if (extra != NULL)
1376          {          {
# Line 1349  while (!done) Line 1396  while (!done)
1396  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1397        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1398  #endif  #endif
1399        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char, okpartial, jchanged,
1400            hascrorlf;
1401        int nameentrysize, namecount;        int nameentrysize, namecount;
1402        const uschar *nametable;        const uschar *nametable;
1403    
# Line 1362  while (!done) Line 1410  while (!done)
1410        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1411        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1412        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1413          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1414          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1415          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1416    
1417  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1418        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
# Line 1403  while (!done) Line 1454  while (!done)
1454            }            }
1455          }          }
1456    
1457        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1458        to fish it out via out back door */        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1459    
1460        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1461        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
          }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
1462    
1463        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1464          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1465            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1466            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1467            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1468            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1469            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1470            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1471              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1472              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1473            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1474            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1475            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
# Line 1431  while (!done) Line 1478  while (!done)
1478            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1479            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1480    
1481          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1482    
1483        switch (get_options & PCRE_NEWLINE_BITS)        switch (get_options & PCRE_NEWLINE_BITS)
1484          {          {
1485          case PCRE_NEWLINE_CR:          case PCRE_NEWLINE_CR:
# Line 1445  while (!done) Line 1494  while (!done)
1494          fprintf(outfile, "Forced newline sequence: CRLF\n");          fprintf(outfile, "Forced newline sequence: CRLF\n");
1495          break;          break;
1496    
1497            case PCRE_NEWLINE_ANYCRLF:
1498            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1499            break;
1500    
1501          case PCRE_NEWLINE_ANY:          case PCRE_NEWLINE_ANY:
1502          fprintf(outfile, "Forced newline sequence: ANY\n");          fprintf(outfile, "Forced newline sequence: ANY\n");
1503          break;          break;
# Line 1549  while (!done) Line 1602  while (!done)
1602        else        else
1603          {          {
1604          uschar sbuf[8];          uschar sbuf[8];
1605          sbuf[0] = (true_size >> 24)  & 255;          sbuf[0] = (uschar)((true_size >> 24) & 255);
1606          sbuf[1] = (true_size >> 16)  & 255;          sbuf[1] = (uschar)((true_size >> 16) & 255);
1607          sbuf[2] = (true_size >>  8)  & 255;          sbuf[2] = (uschar)((true_size >>  8) & 255);
1608          sbuf[3] = (true_size)  & 255;          sbuf[3] = (uschar)((true_size) & 255);
1609    
1610          sbuf[4] = (true_study_size >> 24)  & 255;          sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1611          sbuf[5] = (true_study_size >> 16)  & 255;          sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1612          sbuf[6] = (true_study_size >>  8)  & 255;          sbuf[6] = (uschar)((true_study_size >>  8) & 255);
1613          sbuf[7] = (true_study_size)  & 255;          sbuf[7] = (uschar)((true_study_size) & 255);
1614    
1615          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
1616              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1594  while (!done) Line 1647  while (!done)
1647    for (;;)    for (;;)
1648      {      {
1649      uschar *q;      uschar *q;
1650      uschar *bptr = dbuffer;      uschar *bptr;
1651      int *use_offsets = offsets;      int *use_offsets = offsets;
1652      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1653      int callout_data = 0;      int callout_data = 0;
# Line 1650  while (!done) Line 1703  while (!done)
1703      p = buffer;      p = buffer;
1704      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1705    
1706      q = dbuffer;      bptr = q = dbuffer;
1707      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1708        {        {
1709        int i = 0;        int i = 0;
# Line 1845  while (!done) Line 1898  while (!done)
1898            if (offsets == NULL)            if (offsets == NULL)
1899              {              {
1900              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1901                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1902              yield = 1;              yield = 1;
1903              goto EXIT;              goto EXIT;
1904              }              }
# Line 2218  while (!done) Line 2271  while (!done)
2271        to advance the start offset, and continue. We won't be at the end of the        to advance the start offset, and continue. We won't be at the end of the
2272        string - that was checked before setting g_notempty.        string - that was checked before setting g_notempty.
2273    
2274        Complication arises in the case when the newline option is "any".        Complication arises in the case when the newline option is "any" or
2275        If the previous match was at the end of a line terminated by CRLF, an        "anycrlf". If the previous match was at the end of a line terminated by
2276        advance of one character just passes the \r, whereas we should prefer the        CRLF, an advance of one character just passes the \r, whereas we should
2277        longer newline sequence, as does the code in pcre_exec(). Fudge the        prefer the longer newline sequence, as does the code in pcre_exec().
2278        offset value to achieve this.        Fudge the offset value to achieve this.
2279    
2280        Otherwise, in the case of UTF-8 matching, the advance must be one        Otherwise, in the case of UTF-8 matching, the advance must be one
2281        character, not one byte. */        character, not one byte. */
# Line 2232  while (!done) Line 2285  while (!done)
2285          if (g_notempty != 0)          if (g_notempty != 0)
2286            {            {
2287            int onechar = 1;            int onechar = 1;
2288              unsigned int obits = ((real_pcre *)re)->options;
2289            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2290            if ((((real_pcre *)re)->options & PCRE_NEWLINE_BITS) ==            if ((obits & PCRE_NEWLINE_BITS) == 0)
2291                    PCRE_NEWLINE_ANY &&              {
2292                int d;
2293                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2294                obits = (d == '\r')? PCRE_NEWLINE_CR :
2295                        (d == '\n')? PCRE_NEWLINE_LF :
2296                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2297                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2298                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2299                }
2300              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2301                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2302                  &&
2303                start_offset < len - 1 &&                start_offset < len - 1 &&
2304                bptr[start_offset] == '\r' &&                bptr[start_offset] == '\r' &&
2305                bptr[start_offset+1] == '\n')                bptr[start_offset+1] == '\n')

Legend:
Removed from v.145  
changed lines
  Added in v.274

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12