/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 107 by ph10, Wed Mar 7 11:02:28 2007 UTC revision 376 by ph10, Sun Mar 1 12:00:59 2009 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 48  POSSIBILITY OF SUCH DAMAGE.
48  #include <locale.h>  #include <locale.h>
49  #include <errno.h>  #include <errno.h>
50    
51    #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
61  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 59  input mode under Windows. */ Line 71  input mode under Windows. */
71  #define INPUT_MODE   "r"  #define INPUT_MODE   "r"
72  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
73    
74    #define isatty _isatty         /* This is what Windows calls them, I'm told */
75    #define fileno _fileno
76    
77  #else  #else
78  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
79  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
# Line 67  input mode under Windows. */ Line 82  input mode under Windows. */
82  #endif  #endif
83    
84    
85  #define PCRE_SPY        /* For Win32 build, import data, not export */  /* We have to include pcre_internal.h because we need the internal info for
86    displaying the results of pcre_study() and we also need to know about the
87  /* We include pcre_internal.h because we need the internal info for displaying  internal macros, structures, and other internal data values; pcretest has
88  the results of pcre_study() and we also need to know about the internal  "inside information" compared to a program that strictly follows the PCRE API.
89  macros, structures, and other internal data values; pcretest has "inside  
90  information" compared to a program that strictly follows the PCRE API. */  Although pcre_internal.h does itself include pcre.h, we explicitly include it
91    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
92    appropriately for an application, not for building PCRE. */
93    
94    #include "pcre.h"
95  #include "pcre_internal.h"  #include "pcre_internal.h"
96    
97  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to some of the data tables that PCRE uses. So as not to have
98  two copies, we include the source file here, changing the names of the external  to keep two copies, we include the source file here, changing the names of the
99  symbols to prevent clashes. */  external symbols to prevent clashes. */
100    
101    #define _pcre_ucp_gentype      ucp_gentype
102  #define _pcre_utf8_table1      utf8_table1  #define _pcre_utf8_table1      utf8_table1
103  #define _pcre_utf8_table1_size utf8_table1_size  #define _pcre_utf8_table1_size utf8_table1_size
104  #define _pcre_utf8_table2      utf8_table2  #define _pcre_utf8_table2      utf8_table2
# Line 87  symbols to prevent clashes. */ Line 106  symbols to prevent clashes. */
106  #define _pcre_utf8_table4      utf8_table4  #define _pcre_utf8_table4      utf8_table4
107  #define _pcre_utt              utt  #define _pcre_utt              utt
108  #define _pcre_utt_size         utt_size  #define _pcre_utt_size         utt_size
109    #define _pcre_utt_names        utt_names
110  #define _pcre_OP_lengths       OP_lengths  #define _pcre_OP_lengths       OP_lengths
111    
112  #include "pcre_tables.c"  #include "pcre_tables.c"
# Line 149  static int callout_count; Line 169  static int callout_count;
169  static int callout_extra;  static int callout_extra;
170  static int callout_fail_count;  static int callout_fail_count;
171  static int callout_fail_id;  static int callout_fail_id;
172    static int debug_lengths;
173  static int first_callout;  static int first_callout;
174  static int locale_set = 0;  static int locale_set = 0;
175  static int show_malloc;  static int show_malloc;
# Line 180  optimal way of handling this, but hey, t Line 201  optimal way of handling this, but hey, t
201  Arguments:  Arguments:
202    f            the file to read    f            the file to read
203    start        where in buffer to start (this *must* be within buffer)    start        where in buffer to start (this *must* be within buffer)
204      prompt       for stdin or readline()
205    
206  Returns:       pointer to the start of new data  Returns:       pointer to the start of new data
207                 could be a copy of start, or could be moved                 could be a copy of start, or could be moved
# Line 187  Returns: pointer to the start of n Line 209  Returns: pointer to the start of n
209  */  */
210    
211  static uschar *  static uschar *
212  extend_inputline(FILE *f, uschar *start)  extend_inputline(FILE *f, uschar *start, const char *prompt)
213  {  {
214  uschar *here = start;  uschar *here = start;
215    
# Line 198  for (;;) Line 220  for (;;)
220    if (rlen > 1000)    if (rlen > 1000)
221      {      {
222      int dlen;      int dlen;
223      if (fgets((char *)here, rlen,  f) == NULL)  
224        return (here == start)? NULL : start;      /* If libreadline support is required, use readline() to read a line if the
225        input is a terminal. Note that readline() removes the trailing newline, so
226        we must put it back again, to be compatible with fgets(). */
227    
228    #ifdef SUPPORT_LIBREADLINE
229        if (isatty(fileno(f)))
230          {
231          size_t len;
232          char *s = readline(prompt);
233          if (s == NULL) return (here == start)? NULL : start;
234          len = strlen(s);
235          if (len > 0) add_history(s);
236          if (len > rlen - 1) len = rlen - 1;
237          memcpy(here, s, len);
238          here[len] = '\n';
239          here[len+1] = 0;
240          free(s);
241          }
242        else
243    #endif
244    
245        /* Read the next line by normal means, prompting if the file is stdin. */
246    
247          {
248          if (f == stdin) printf(prompt);
249          if (fgets((char *)here, rlen,  f) == NULL)
250            return (here == start)? NULL : start;
251          }
252    
253      dlen = (int)strlen((char *)here);      dlen = (int)strlen((char *)here);
254      if (dlen > 0 && here[dlen - 1] == '\n') return start;      if (dlen > 0 && here[dlen - 1] == '\n') return start;
255      here += dlen;      here += dlen;
# Line 656  return count; Line 706  return count;
706    
707    
708  /*************************************************  /*************************************************
709    *         Case-independent strncmp() function    *
710    *************************************************/
711    
712    /*
713    Arguments:
714      s         first string
715      t         second string
716      n         number of characters to compare
717    
718    Returns:    < 0, = 0, or > 0, according to the comparison
719    */
720    
721    static int
722    strncmpic(uschar *s, uschar *t, int n)
723    {
724    while (n--)
725      {
726      int c = tolower(*s++) - tolower(*t++);
727      if (c) return c;
728      }
729    return 0;
730    }
731    
732    
733    
734    /*************************************************
735  *         Check newline indicator                *  *         Check newline indicator                *
736  *************************************************/  *************************************************/
737    
738  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes, where
739  xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
740    no match.
741    
742  Arguments:  Arguments:
743    p           points after the leading '<'    p           points after the leading '<'
# Line 672  Returns: appropriate PCRE_NEWLINE_x Line 749  Returns: appropriate PCRE_NEWLINE_x
749  static int  static int
750  check_newline(uschar *p, FILE *f)  check_newline(uschar *p, FILE *f)
751  {  {
752  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
753  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
754  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
755  if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
756    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
757    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
758    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
759  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
760  return 0;  return 0;
761  }  }
# Line 689  return 0; Line 769  return 0;
769  static void  static void
770  usage(void)  usage(void)
771  {  {
772  printf("Usage:     pcretest [options] [<input> [<output>]]\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
773    printf("Input and output default to stdin and stdout.\n");
774    #ifdef SUPPORT_LIBREADLINE
775    printf("If input is a terminal, readline() is used to read from it.\n");
776    #else
777    printf("This version of pcretest is not linked with readline().\n");
778    #endif
779    printf("\nOptions:\n");
780  printf("  -b       show compiled code (bytecode)\n");  printf("  -b       show compiled code (bytecode)\n");
781  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
782  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
# Line 838  while (argc > 1 && argv[op][0] == '-') Line 925  while (argc > 1 && argv[op][0] == '-')
925    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
926      {      {
927      int rc;      int rc;
928        unsigned long int lrc;
929      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
930      printf("Compiled with\n");      printf("Compiled with\n");
931      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
# Line 847  while (argc > 1 && argv[op][0] == '-') Line 935  while (argc > 1 && argv[op][0] == '-')
935      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
936      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
937        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
938          (rc == -2)? "ANYCRLF" :
939        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
940        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
941        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
942                                         "all Unicode newlines");
943      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
944      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
945      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
946      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
947      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
948      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
949      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
950      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %ld\n", lrc);
951      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
952      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
953      exit(0);      goto EXIT;
954      }      }
955    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(argv[op], "-help") == 0 ||
956             strcmp(argv[op], "--help") == 0)             strcmp(argv[op], "--help") == 0)
# Line 884  offsets = (int *)malloc(size_offsets_max Line 976  offsets = (int *)malloc(size_offsets_max
976  if (offsets == NULL)  if (offsets == NULL)
977    {    {
978    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
979      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
980    yield = 1;    yield = 1;
981    goto EXIT;    goto EXIT;
982    }    }
# Line 952  while (!done) Line 1044  while (!done)
1044    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
1045    
1046    use_utf8 = 0;    use_utf8 = 0;
1047      debug_lengths = 1;
1048    
1049    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (extend_inputline(infile, buffer) == NULL) break;  
1050    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1051    fflush(outfile);    fflush(outfile);
1052    
# Line 1054  while (!done) Line 1146  while (!done)
1146    
1147    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1148      {      {
1149      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1150      goto SKIP_DATA;      goto SKIP_DATA;
1151      }      }
1152    
# Line 1070  while (!done) Line 1162  while (!done)
1162        pp++;        pp++;
1163        }        }
1164      if (*pp != 0) break;      if (*pp != 0) break;
1165      if (infile == stdin) printf("    > ");      if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     if ((pp = extend_inputline(infile, pp)) == NULL)  
1166        {        {
1167        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1168        done = 1;        done = 1;
# Line 1134  while (!done) Line 1225  while (!done)
1225        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1226        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1227        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1228          case 'Z': debug_lengths = 0; break;
1229        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1230        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1231    
# Line 1162  while (!done) Line 1254  while (!done)
1254    
1255        case '<':        case '<':
1256          {          {
1257          int x = check_newline(pp, outfile);          if (strncmp((char *)pp, "JS>", 3) == 0)
1258          if (x == 0) goto SKIP_DATA;            {
1259          options |= x;            options |= PCRE_JAVASCRIPT_COMPAT;
1260          while (*pp++ != '>');            pp += 3;
1261              }
1262            else
1263              {
1264              int x = check_newline(pp, outfile);
1265              if (x == 0) goto SKIP_DATA;
1266              options |= x;
1267              while (*pp++ != '>');
1268              }
1269          }          }
1270        break;        break;
1271    
# Line 1244  while (!done) Line 1344  while (!done)
1344          {          {
1345          for (;;)          for (;;)
1346            {            {
1347            if (extend_inputline(infile, buffer) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1348              {              {
1349              done = 1;              done = 1;
1350              goto CONTINUE;              goto CONTINUE;
# Line 1307  while (!done) Line 1407  while (!done)
1407      if (do_flip)      if (do_flip)
1408        {        {
1409        real_pcre *rre = (real_pcre *)re;        real_pcre *rre = (real_pcre *)re;
1410        rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));        rre->magic_number =
1411            byteflip(rre->magic_number, sizeof(rre->magic_number));
1412        rre->size = byteflip(rre->size, sizeof(rre->size));        rre->size = byteflip(rre->size, sizeof(rre->size));
1413        rre->options = byteflip(rre->options, sizeof(rre->options));        rre->options = byteflip(rre->options, sizeof(rre->options));
1414        rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));        rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1415        rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));        rre->top_bracket =
1416        rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1417        rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));        rre->top_backref =
1418        rre->name_table_offset = byteflip(rre->name_table_offset,          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1419          rre->first_byte =
1420            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1421          rre->req_byte =
1422            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1423          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1424          sizeof(rre->name_table_offset));          sizeof(rre->name_table_offset));
1425        rre->name_entry_size = byteflip(rre->name_entry_size,        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1426          sizeof(rre->name_entry_size));          sizeof(rre->name_entry_size));
1427        rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));        rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1428            sizeof(rre->name_count));
1429    
1430        if (extra != NULL)        if (extra != NULL)
1431          {          {
# Line 1335  while (!done) Line 1442  while (!done)
1442      if (do_debug)      if (do_debug)
1443        {        {
1444        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
1445        pcre_printint(re, outfile);        pcre_printint(re, outfile, debug_lengths);
1446        }        }
1447    
1448      if (do_showinfo)      if (do_showinfo)
# Line 1344  while (!done) Line 1451  while (!done)
1451  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1452        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1453  #endif  #endif
1454        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char, okpartial, jchanged,
1455            hascrorlf;
1456        int nameentrysize, namecount;        int nameentrysize, namecount;
1457        const uschar *nametable;        const uschar *nametable;
1458    
# Line 1357  while (!done) Line 1465  while (!done)
1465        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1466        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1467        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1468          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1469          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1470          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1471    
1472  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1473        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
# Line 1398  while (!done) Line 1509  while (!done)
1509            }            }
1510          }          }
1511    
1512        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1513        to fish it out via out back door */        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1514    
1515        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1516        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
          }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
1517    
1518        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1519          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1520            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1521            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1522            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1523            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1524            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1525            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1526              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1527              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1528            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1529            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1530            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
# Line 1426  while (!done) Line 1533  while (!done)
1533            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1534            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1535    
1536          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1537    
1538        switch (get_options & PCRE_NEWLINE_BITS)        switch (get_options & PCRE_NEWLINE_BITS)
1539          {          {
1540          case PCRE_NEWLINE_CR:          case PCRE_NEWLINE_CR:
# Line 1440  while (!done) Line 1549  while (!done)
1549          fprintf(outfile, "Forced newline sequence: CRLF\n");          fprintf(outfile, "Forced newline sequence: CRLF\n");
1550          break;          break;
1551    
1552            case PCRE_NEWLINE_ANYCRLF:
1553            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1554            break;
1555    
1556          case PCRE_NEWLINE_ANY:          case PCRE_NEWLINE_ANY:
1557          fprintf(outfile, "Forced newline sequence: ANY\n");          fprintf(outfile, "Forced newline sequence: ANY\n");
1558          break;          break;
# Line 1544  while (!done) Line 1657  while (!done)
1657        else        else
1658          {          {
1659          uschar sbuf[8];          uschar sbuf[8];
1660          sbuf[0] = (true_size >> 24)  & 255;          sbuf[0] = (uschar)((true_size >> 24) & 255);
1661          sbuf[1] = (true_size >> 16)  & 255;          sbuf[1] = (uschar)((true_size >> 16) & 255);
1662          sbuf[2] = (true_size >>  8)  & 255;          sbuf[2] = (uschar)((true_size >>  8) & 255);
1663          sbuf[3] = (true_size)  & 255;          sbuf[3] = (uschar)((true_size) & 255);
1664    
1665          sbuf[4] = (true_study_size >> 24)  & 255;          sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1666          sbuf[5] = (true_study_size >> 16)  & 255;          sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1667          sbuf[6] = (true_study_size >>  8)  & 255;          sbuf[6] = (uschar)((true_study_size >>  8) & 255);
1668          sbuf[7] = (true_study_size)  & 255;          sbuf[7] = (uschar)((true_study_size) & 255);
1669    
1670          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
1671              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1589  while (!done) Line 1702  while (!done)
1702    for (;;)    for (;;)
1703      {      {
1704      uschar *q;      uschar *q;
1705      uschar *bptr = dbuffer;      uschar *bptr;
1706      int *use_offsets = offsets;      int *use_offsets = offsets;
1707      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1708      int callout_data = 0;      int callout_data = 0;
# Line 1626  while (!done) Line 1739  while (!done)
1739      len = 0;      len = 0;
1740      for (;;)      for (;;)
1741        {        {
1742        if (infile == stdin) printf("data> ");        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
       if (extend_inputline(infile, buffer + len) == NULL)  
1743          {          {
1744          if (len > 0) break;          if (len > 0) break;
1745          done = 1;          done = 1;
# Line 1645  while (!done) Line 1757  while (!done)
1757      p = buffer;      p = buffer;
1758      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1759    
1760      q = dbuffer;      bptr = q = dbuffer;
1761      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1762        {        {
1763        int i = 0;        int i = 0;
# Line 1695  while (!done) Line 1807  while (!done)
1807              {              {
1808              unsigned char buff8[8];              unsigned char buff8[8];
1809              int ii, utn;              int ii, utn;
1810              utn = ord2utf8(c, buff8);              if (use_utf8)
1811              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                {
1812              c = buff8[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
1813                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1814                  c = buff8[ii];   /* Last byte */
1815                  }
1816                else
1817                 {
1818                 if (c > 255)
1819                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1820                     "UTF-8 mode is not enabled.\n"
1821                     "** Truncation will probably give the wrong result.\n", c);
1822                 }
1823              p = pt + 1;              p = pt + 1;
1824              break;              break;
1825              }              }
# Line 1840  while (!done) Line 1962  while (!done)
1962            if (offsets == NULL)            if (offsets == NULL)
1963              {              {
1964              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1965                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1966              yield = 1;              yield = 1;
1967              goto EXIT;              goto EXIT;
1968              }              }
# Line 1907  while (!done) Line 2029  while (!done)
2029      *q = 0;      *q = 0;
2030      len = q - dbuffer;      len = q - dbuffer;
2031    
2032        /* Move the data to the end of the buffer so that a read over the end of
2033        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2034        we are using the POSIX interface, we must include the terminating zero. */
2035    
2036    #if !defined NOPOSIX
2037        if (posix || do_posix)
2038          {
2039          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2040          bptr += buffer_size - len - 1;
2041          }
2042        else
2043    #endif
2044          {
2045          memmove(bptr + buffer_size - len, bptr, len);
2046          bptr += buffer_size - len;
2047          }
2048    
2049      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
2050        {        {
2051        printf("**Match limit not relevant for DFA matching: ignored\n");        printf("**Match limit not relevant for DFA matching: ignored\n");
# Line 2209  while (!done) Line 2348  while (!done)
2348          }          }
2349    
2350        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2351        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2352        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2353        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2354        offset values to achieve this. We won't be at the end of the string -  
2355        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any" or
2356          "anycrlf". If the previous match was at the end of a line terminated by
2357          CRLF, an advance of one character just passes the \r, whereas we should
2358          prefer the longer newline sequence, as does the code in pcre_exec().
2359          Fudge the offset value to achieve this.
2360    
2361          Otherwise, in the case of UTF-8 matching, the advance must be one
2362          character, not one byte. */
2363    
2364        else        else
2365          {          {
2366          if (g_notempty != 0)          if (g_notempty != 0)
2367            {            {
2368            int onechar = 1;            int onechar = 1;
2369              unsigned int obits = ((real_pcre *)re)->options;
2370            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2371            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2372                {
2373                int d;
2374                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2375                obits = (d == '\r')? PCRE_NEWLINE_CR :
2376                        (d == '\n')? PCRE_NEWLINE_LF :
2377                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2378                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2379                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2380                }
2381              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2382                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2383                  &&
2384                  start_offset < len - 1 &&
2385                  bptr[start_offset] == '\r' &&
2386                  bptr[start_offset+1] == '\n')
2387                onechar++;
2388              else if (use_utf8)
2389              {              {
2390              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2391                {                {
# Line 2256  while (!done) Line 2420  while (!done)
2420        character. */        character. */
2421    
2422        g_notempty = 0;        g_notempty = 0;
2423    
2424        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2425          {          {
2426          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;

Legend:
Removed from v.107  
changed lines
  Added in v.376

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12