/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 25 by nigel, Sat Feb 24 21:38:45 2007 UTC revision 49 by nigel, Sat Feb 24 21:39:33 2007 UTC
# Line 12  Line 12 
12  /* Use the internal info for displaying the results of pcre_study(). */  /* Use the internal info for displaying the results of pcre_study(). */
13    
14  #include "internal.h"  #include "internal.h"
15    
16    /* It is possible to compile this test program without including support for
17    testing the POSIX interface, though this is not available via the standard
18    Makefile. */
19    
20    #if !defined NOPOSIX
21  #include "pcreposix.h"  #include "pcreposix.h"
22    #endif
23    
24  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
25  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 22  Line 29 
29  #endif  #endif
30  #endif  #endif
31    
32  #define LOOPREPEAT 10000  #define LOOPREPEAT 20000
33    
34    
35  static FILE *outfile;  static FILE *outfile;
36  static int log_store = 0;  static int log_store = 0;
37    static size_t gotten_store;
38    
39    
40    
41    static int utf8_table1[] = {
42      0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
43    
44    static int utf8_table2[] = {
45      0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
46    
47    static int utf8_table3[] = {
48      0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
49    
50    
51    /*************************************************
52    *       Convert character value to UTF-8         *
53    *************************************************/
54    
55    /* This function takes an integer value in the range 0 - 0x7fffffff
56    and encodes it as a UTF-8 character in 0 to 6 bytes.
57    
58    Arguments:
59      cvalue     the character value
60      buffer     pointer to buffer for result - at least 6 bytes long
61    
62    Returns:     number of characters placed in the buffer
63                 -1 if input character is negative
64                 0 if input character is positive but too big (only when
65                 int is longer than 32 bits)
66    */
67    
68    static int
69    ord2utf8(int cvalue, unsigned char *buffer)
70    {
71    register int i, j;
72    for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
73      if (cvalue <= utf8_table1[i]) break;
74    if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
75    if (cvalue < 0) return -1;
76    *buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);
77    cvalue >>= 6 - i;
78    for (j = 0; j < i; j++)
79      {
80      *buffer++ = 0x80 | (cvalue & 0x3f);
81      cvalue >>= 6;
82      }
83    return i + 1;
84    }
85    
86    
87    /*************************************************
88    *            Convert UTF-8 string to value       *
89    *************************************************/
90    
91    /* This function takes one or more bytes that represents a UTF-8 character,
92    and returns the value of the character.
93    
94    Argument:
95      buffer   a pointer to the byte vector
96      vptr     a pointer to an int to receive the value
97    
98    Returns:   >  0 => the number of bytes consumed
99               -6 to 0 => malformed UTF-8 character at offset = (-return)
100    */
101    
102    int
103    utf82ord(unsigned char *buffer, int *vptr)
104    {
105    int c = *buffer++;
106    int d = c;
107    int i, j, s;
108    
109    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
110      {
111      if ((d & 0x80) == 0) break;
112      d <<= 1;
113      }
114    
115    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
116    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
117    
118    /* i now has a value in the range 1-5 */
119    
120    d = c & utf8_table3[i];
121    s = 6 - i;
122    
123    for (j = 0; j < i; j++)
124      {
125      c = *buffer++;
126      if ((c & 0xc0) != 0x80) return -(j+1);
127      d |= (c & 0x3f) << s;
128      s += 6;
129      }
130    
131    /* Check that encoding was the correct unique one */
132    
133    for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
134      if (d <= utf8_table1[j]) break;
135    if (j != i) return -(i+1);
136    
137    /* Valid value */
138    
139    *vptr = d;
140    return i+1;
141    }
142    
143    
144    
145    
146    
147    
# Line 41  static const char *OP_names[] = { Line 156  static const char *OP_names[] = {
156    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
157    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
158    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
159    "class", "Ref",    "class", "Ref", "Recurse",
160    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
161    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
162    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
163  };  };
164    
165    
166  static void print_internals(pcre *re, FILE *outfile)  static void print_internals(pcre *re)
167  {  {
168  unsigned char *code = ((real_pcre *)re)->code;  unsigned char *code = ((real_pcre *)re)->code;
169    
# Line 257  for(;;) Line 372  for(;;)
372    
373    
374    
375  /* Character string printing function. */  /* Character string printing function. A "normal" and a UTF-8 version. */
376    
377  static void pchars(unsigned char *p, int length)  static void pchars(unsigned char *p, int length, int utf8)
378  {  {
379  int c;  int c;
380  while (length-- > 0)  while (length-- > 0)
381      {
382      if (utf8)
383        {
384        int rc = utf82ord(p, &c);
385        if (rc > 0)
386          {
387          length -= rc - 1;
388          p += rc;
389          if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);
390            else fprintf(outfile, "\\x{%02x}", c);
391          continue;
392          }
393        }
394    
395       /* Not UTF-8, or malformed UTF-8  */
396    
397    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
398      else fprintf(outfile, "\\x%02x", c);      else fprintf(outfile, "\\x%02x", c);
399      }
400  }  }
401    
402    
# Line 274  compiled re. */ Line 406  compiled re. */
406    
407  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
408  {  {
409  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  gotten_store = size;
410    if (log_store)
411      fprintf(outfile, "Memory allocation (code space): %d\n",
412        (int)((int)size - offsetof(real_pcre, code[0])));
413  return malloc(size);  return malloc(size);
414  }  }
415    
416    
417    
418    
419    /* Get one piece of information from the pcre_fullinfo() function */
420    
421    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
422    {
423    int rc;
424    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
425      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
426    }
427    
428    
429    
430    
431  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
432  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
433  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 292  int study_options = 0; Line 440  int study_options = 0;
440  int op = 1;  int op = 1;
441  int timeit = 0;  int timeit = 0;
442  int showinfo = 0;  int showinfo = 0;
443    int showstore = 0;
444  int posix = 0;  int posix = 0;
445  int debug = 0;  int debug = 0;
446  int done = 0;  int done = 0;
# Line 306  outfile = stdout; Line 455  outfile = stdout;
455    
456  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
457    {    {
458    if (strcmp(argv[op], "-s") == 0) log_store = 1;    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
459        showstore = 1;
460    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
461    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
462    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
# Line 362  while (!done) Line 512  while (!done)
512    {    {
513    pcre *re = NULL;    pcre *re = NULL;
514    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
515    
516    #if !defined NOPOSIX  /* There are still compilers that require no indent */
517    regex_t preg;    regex_t preg;
518      int do_posix = 0;
519    #endif
520    
521    const char *error;    const char *error;
522    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
523    unsigned const char *tables = NULL;    unsigned const char *tables = NULL;
524    int do_study = 0;    int do_study = 0;
525    int do_debug = debug;    int do_debug = debug;
526      int do_G = 0;
527      int do_g = 0;
528    int do_showinfo = showinfo;    int do_showinfo = showinfo;
529    int do_posix = 0;    int do_showrest = 0;
530      int utf8 = 0;
531    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
532    
533    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
# Line 385  while (!done) Line 543  while (!done)
543    
544    delimiter = *p++;    delimiter = *p++;
545    
546    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
547      {      {
548      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
549      goto SKIP_DATA;      goto SKIP_DATA;
550      }      }
551    
# Line 395  while (!done) Line 553  while (!done)
553    
554    for(;;)    for(;;)
555      {      {
556      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
557          {
558          if (*pp == '\\' && pp[1] != 0) pp++;
559            else if (*pp == delimiter) break;
560          pp++;
561          }
562      if (*pp != 0) break;      if (*pp != 0) break;
563    
564      len = sizeof(buffer) - (pp - buffer);      len = sizeof(buffer) - (pp - buffer);
# Line 415  while (!done) Line 578  while (!done)
578      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
579      }      }
580    
581      /* If the first character after the delimiter is backslash, make
582      the pattern end with backslash. This is purely to provide a way
583      of testing for the error message when a pattern ends with backslash. */
584    
585      if (pp[1] == '\\') *pp++ = '\\';
586    
587    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter */
588    
589    *pp++ = 0;    *pp++ = 0;
# Line 423  while (!done) Line 592  while (!done)
592    
593    options = 0;    options = 0;
594    study_options = 0;    study_options = 0;
595      log_store = showstore;  /* default from command line */
596    
597    while (*pp != 0)    while (*pp != 0)
598      {      {
599      switch (*pp++)      switch (*pp++)
600        {        {
601          case 'g': do_g = 1; break;
602        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
603        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
604        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
605        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
606    
607          case '+': do_showrest = 1; break;
608        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
609        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
610        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
611          case 'G': do_G = 1; break;
612        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
613          case 'M': log_store = 1; break;
614    
615    #if !defined NOPOSIX
616        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
617    #endif
618    
619        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
620        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
621        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
622          case '8': options |= PCRE_UTF8; utf8 = 1; break;
623    
624        case 'L':        case 'L':
625        ppp = pp;        ppp = pp;
# Line 465  while (!done) Line 645  while (!done)
645    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
646    local character tables. */    local character tables. */
647    
648    #if !defined NOPOSIX
649    if (posix || do_posix)    if (posix || do_posix)
650      {      {
651      int rc;      int rc;
# Line 487  while (!done) Line 668  while (!done)
668    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
669    
670    else    else
671    #endif  /* !defined NOPOSIX */
672    
673      {      {
674      if (timeit)      if (timeit)
675        {        {
# Line 499  while (!done) Line 682  while (!done)
682          if (re != NULL) free(re);          if (re != NULL) free(re);
683          }          }
684        time_taken = clock() - start_time;        time_taken = clock() - start_time;
685        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
686          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          ((double)time_taken * 1000.0) /
687            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
688        }        }
689    
690      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 530  while (!done) Line 714  while (!done)
714        goto CONTINUE;        goto CONTINUE;
715        }        }
716    
717      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
718        info-returning functions. The old one has a limited interface and
719        returns only limited data. Check that it agrees with the newer one. */
720    
721      if (do_showinfo)      if (do_showinfo)
722        {        {
723        int first_char, count;        int old_first_char, old_options, old_count;
724          int count, backrefmax, first_char, need_char;
725          size_t size;
726    
727          if (do_debug) print_internals(re);
728    
729          new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
730          new_info(re, NULL, PCRE_INFO_SIZE, &size);
731          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
732          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
733          new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
734          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
735    
736        if (do_debug) print_internals(re, outfile);        old_count = pcre_info(re, &old_options, &old_first_char);
   
       count = pcre_info(re, &options, &first_char);  
737        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
738          "Error %d while reading info\n", count);          "Error %d from pcre_info()\n", count);
739        else        else
740          {          {
741          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (old_count != count) fprintf(outfile,
742          if (options == 0) fprintf(outfile, "No options\n");            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
743            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",              old_count);
744              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
745              ((options & PCRE_CASELESS) != 0)? " caseless" : "",          if (old_first_char != first_char) fprintf(outfile,
746              ((options & PCRE_EXTENDED) != 0)? " extended" : "",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
747              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",              first_char, old_first_char);
748              ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
749              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",          if (old_options != options) fprintf(outfile,
750              ((options & PCRE_EXTRA) != 0)? " extra" : "",            "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,
751              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");              old_options);
752          if (first_char == -1)          }
753            {  
754            fprintf(outfile, "First char at start or follows \\n\n");        if (size != gotten_store) fprintf(outfile,
755            }          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
756          else if (first_char < 0)          size, gotten_store);
757            {  
758            fprintf(outfile, "No first char\n");        fprintf(outfile, "Capturing subpattern count = %d\n", count);
759            }        if (backrefmax > 0)
760            fprintf(outfile, "Max back reference = %d\n", backrefmax);
761          if (options == 0) fprintf(outfile, "No options\n");
762            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
763              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
764              ((options & PCRE_CASELESS) != 0)? " caseless" : "",
765              ((options & PCRE_EXTENDED) != 0)? " extended" : "",
766              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
767              ((options & PCRE_DOTALL) != 0)? " dotall" : "",
768              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
769              ((options & PCRE_EXTRA) != 0)? " extra" : "",
770              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
771              ((options & PCRE_UTF8) != 0)? " utf8" : "");
772    
773          if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
774            fprintf(outfile, "Case state changes\n");
775    
776          if (first_char == -1)
777            {
778            fprintf(outfile, "First char at start or follows \\n\n");
779            }
780          else if (first_char < 0)
781            {
782            fprintf(outfile, "No first char\n");
783            }
784          else
785            {
786            if (isprint(first_char))
787              fprintf(outfile, "First char = \'%c\'\n", first_char);
788          else          else
789            {            fprintf(outfile, "First char = %d\n", first_char);
790            if (isprint(first_char))          }
791              fprintf(outfile, "First char = \'%c\'\n", first_char);  
792            else        if (need_char < 0)
793              fprintf(outfile, "First char = %d\n", first_char);          {
794            }          fprintf(outfile, "No need char\n");
795            }
796          else
797            {
798            if (isprint(need_char))
799              fprintf(outfile, "Need char = \'%c\'\n", need_char);
800            else
801              fprintf(outfile, "Need char = %d\n", need_char);
802          }          }
803        }        }
804    
# Line 586  while (!done) Line 816  while (!done)
816            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
817          time_taken = clock() - start_time;          time_taken = clock() - start_time;
818          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
819          fprintf(outfile, "  Study time %.2f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
820            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            ((double)time_taken * 1000.0)/
821              ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
822          }          }
823    
824        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 596  while (!done) Line 827  while (!done)
827        else if (extra == NULL)        else if (extra == NULL)
828          fprintf(outfile, "Study returned NULL\n");          fprintf(outfile, "Study returned NULL\n");
829    
       /* This looks at internal information. A bit kludgy to do it this  
       way, but it is useful for testing. */  
   
830        else if (do_showinfo)        else if (do_showinfo)
831          {          {
832          real_pcre_extra *xx = (real_pcre_extra *)extra;          uschar *start_bits = NULL;
833          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
834            if (start_bits == NULL)
835            fprintf(outfile, "No starting character set\n");            fprintf(outfile, "No starting character set\n");
836          else          else
837            {            {
# Line 611  while (!done) Line 840  while (!done)
840            fprintf(outfile, "Starting character set: ");            fprintf(outfile, "Starting character set: ");
841            for (i = 0; i < 256; i++)            for (i = 0; i < 256; i++)
842              {              {
843              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if ((start_bits[i/8] & (1<<(i%8))) != 0)
844                {                {
845                if (c > 75)                if (c > 75)
846                  {                  {
# Line 641  while (!done) Line 870  while (!done)
870    for (;;)    for (;;)
871      {      {
872      unsigned char *q;      unsigned char *q;
873        unsigned char *bptr = dbuffer;
874      int count, c;      int count, c;
875        int copystrings = 0;
876        int getstrings = 0;
877        int getlist = 0;
878        int gmatched = 0;
879        int start_offset = 0;
880        int g_notempty = 0;
881      int offsets[45];      int offsets[45];
882      int size_offsets = sizeof(offsets)/sizeof(int);      int size_offsets = sizeof(offsets)/sizeof(int);
883    
884      options = 0;      options = 0;
885    
886      if (infile == stdin) printf("  data> ");      if (infile == stdin) printf("data> ");
887      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
888        {        {
889        done = 1;        done = 1;
# Line 687  while (!done) Line 923  while (!done)
923          break;          break;
924    
925          case 'x':          case 'x':
926    
927            /* Handle \x{..} specially - new Perl thing for utf8 */
928    
929            if (*p == '{')
930              {
931              unsigned char *pt = p;
932              c = 0;
933              while (isxdigit(*(++pt)))
934                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
935              if (*pt == '}')
936                {
937                unsigned char buffer[8];
938                int ii, utn;
939                utn = ord2utf8(c, buffer);
940                for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];
941                c = buffer[ii];   /* Last byte */
942                p = pt + 1;
943                break;
944                }
945              /* Not correct form; fall through */
946              }
947    
948            /* Ordinary \x */
949    
950          c = 0;          c = 0;
951          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
952            {            {
# Line 707  while (!done) Line 967  while (!done)
967          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
968          continue;          continue;
969    
970            case 'C':
971            while(isdigit(*p)) n = n * 10 + *p++ - '0';
972            copystrings |= 1 << n;
973            continue;
974    
975            case 'G':
976            while(isdigit(*p)) n = n * 10 + *p++ - '0';
977            getstrings |= 1 << n;
978            continue;
979    
980            case 'L':
981            getlist = 1;
982            continue;
983    
984            case 'N':
985            options |= PCRE_NOTEMPTY;
986            continue;
987    
988          case 'O':          case 'O':
989          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
990          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;
# Line 724  while (!done) Line 1002  while (!done)
1002      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1003      support timing. */      support timing. */
1004    
1005    #if !defined NOPOSIX
1006      if (posix || do_posix)      if (posix || do_posix)
1007        {        {
1008        int rc;        int rc;
1009        int eflags = 0;        int eflags = 0;
1010        regmatch_t pmatch[30];        regmatch_t pmatch[sizeof(offsets)/sizeof(int)];
1011        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1012        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1013    
1014        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);
         pmatch, eflags);  
1015    
1016        if (rc != 0)        if (rc != 0)
1017          {          {
# Line 743  while (!done) Line 1021  while (!done)
1021        else        else
1022          {          {
1023          size_t i;          size_t i;
1024          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < size_offsets; i++)
1025            {            {
1026            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1027              {              {
1028              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1029              pchars(dbuffer + pmatch[i].rm_so,              pchars(dbuffer + pmatch[i].rm_so,
1030                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, utf8);
1031              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1032                if (i == 0 && do_showrest)
1033                  {
1034                  fprintf(outfile, " 0+ ");
1035                  pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);
1036                  fprintf(outfile, "\n");
1037                  }
1038              }              }
1039            }            }
1040          }          }
1041        }        }
1042    
1043      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
1044    
1045      else      else
1046    #endif  /* !defined NOPOSIX */
1047    
1048        for (;; gmatched++)    /* Loop for /g or /G */
1049        {        {
1050        if (timeit)        if (timeit)
1051          {          {
1052          register int i;          register int i;
1053          clock_t time_taken;          clock_t time_taken;
1054          clock_t start_time = clock();          clock_t start_time = clock();
1055          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
1056            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,            count = pcre_exec(re, extra, (char *)bptr, len,
1057              size_offsets);              start_offset, options | g_notempty, offsets, size_offsets);
1058          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1059          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1060            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            ((double)time_taken * 1000.0)/
1061              ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
1062          }          }
1063    
1064        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        count = pcre_exec(re, extra, (char *)bptr, len,
1065          size_offsets);          start_offset, options | g_notempty, offsets, size_offsets);
1066    
1067        if (count == 0)        if (count == 0)
1068          {          {
# Line 782  while (!done) Line 1070  while (!done)
1070          count = size_offsets/3;          count = size_offsets/3;
1071          }          }
1072    
1073          /* Matched */
1074    
1075        if (count >= 0)        if (count >= 0)
1076          {          {
1077          int i;          int i;
1078          count *= 2;          for (i = 0; i < count * 2; i += 2)
         for (i = 0; i < count; i += 2)  
1079            {            {
1080            if (offsets[i] < 0)            if (offsets[i] < 0)
1081              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1082            else            else
1083              {              {
1084              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1085              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              pchars(bptr + offsets[i], offsets[i+1] - offsets[i], utf8);
1086              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1087                if (i == 0)
1088                  {
1089                  if (do_showrest)
1090                    {
1091                    fprintf(outfile, " 0+ ");
1092                    pchars(bptr + offsets[i+1], len - offsets[i+1], utf8);
1093                    fprintf(outfile, "\n");
1094                    }
1095                  }
1096                }
1097              }
1098    
1099            for (i = 0; i < 32; i++)
1100              {
1101              if ((copystrings & (1 << i)) != 0)
1102                {
1103                char copybuffer[16];
1104                int rc = pcre_copy_substring((char *)bptr, offsets, count,
1105                  i, copybuffer, sizeof(copybuffer));
1106                if (rc < 0)
1107                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1108                else
1109                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1110                }
1111              }
1112    
1113            for (i = 0; i < 32; i++)
1114              {
1115              if ((getstrings & (1 << i)) != 0)
1116                {
1117                const char *substring;
1118                int rc = pcre_get_substring((char *)bptr, offsets, count,
1119                  i, &substring);
1120                if (rc < 0)
1121                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
1122                else
1123                  {
1124                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1125                  /* free((void *)substring); */
1126                  pcre_free_substring(substring);
1127                  }
1128                }
1129              }
1130    
1131            if (getlist)
1132              {
1133              const char **stringlist;
1134              int rc = pcre_get_substring_list((char *)bptr, offsets, count,
1135                &stringlist);
1136              if (rc < 0)
1137                fprintf(outfile, "get substring list failed %d\n", rc);
1138              else
1139                {
1140                for (i = 0; i < count; i++)
1141                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1142                if (stringlist[i] != NULL)
1143                  fprintf(outfile, "string list not terminated by NULL\n");
1144                /* free((void *)stringlist); */
1145                pcre_free_substring_list(stringlist);
1146              }              }
1147            }            }
1148          }          }
1149    
1150          /* Failed to match. If this is a /g or /G loop and we previously set
1151          g_notempty after a null match, this is not necessarily the end.
1152          We want to advance the start offset, and continue. Fudge the offset
1153          values to achieve this. We won't be at the end of the string - that
1154          was checked before setting g_notempty. */
1155    
1156        else        else
1157          {          {
1158          if (count == -1) fprintf(outfile, "No match\n");          if (g_notempty != 0)
1159            else fprintf(outfile, "Error %d\n", count);            {
1160              offsets[0] = start_offset;
1161              offsets[1] = start_offset + 1;
1162              }
1163            else
1164              {
1165              if (gmatched == 0)   /* Error if no previous matches */
1166                {
1167                if (count == -1) fprintf(outfile, "No match\n");
1168                  else fprintf(outfile, "Error %d\n", count);
1169                }
1170              break;  /* Out of the /g loop */
1171              }
1172            }
1173    
1174          /* If not /g or /G we are done */
1175    
1176          if (!do_g && !do_G) break;
1177    
1178          /* If we have matched an empty string, first check to see if we are at
1179          the end of the subject. If so, the /g loop is over. Otherwise, mimic
1180          what Perl's /g options does. This turns out to be rather cunning. First
1181          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1182          same point. If this fails (picked up above) we advance to the next
1183          character. */
1184    
1185          g_notempty = 0;
1186          if (offsets[0] == offsets[1])
1187            {
1188            if (offsets[0] == len) break;
1189            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1190          }          }
1191        }  
1192      }        /* For /g, update the start offset, leaving the rest alone */
1193    
1194          if (do_g) start_offset = offsets[1];
1195    
1196          /* For /G, update the pointer and length */
1197    
1198          else
1199            {
1200            bptr += offsets[1];
1201            len -= offsets[1];
1202            }
1203          }  /* End of loop for /g and /G */
1204        }    /* End of loop for data lines */
1205    
1206    CONTINUE:    CONTINUE:
1207    
1208    #if !defined NOPOSIX
1209    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1210    #endif
1211    
1212    if (re != NULL) free(re);    if (re != NULL) free(re);
1213    if (extra != NULL) free(extra);    if (extra != NULL) free(extra);
1214    if (tables != NULL)    if (tables != NULL)

Legend:
Removed from v.25  
changed lines
  Added in v.49

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12