/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 37 by nigel, Sat Feb 24 21:39:09 2007 UTC revision 47 by nigel, Sat Feb 24 21:39:29 2007 UTC
# Line 34  Makefile. */ Line 34  Makefile. */
34    
35  static FILE *outfile;  static FILE *outfile;
36  static int log_store = 0;  static int log_store = 0;
37    static size_t gotten_store;
38    
39    
40    
# Line 48  static const char *OP_names[] = { Line 49  static const char *OP_names[] = {
49    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
50    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
51    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
52    "class", "Ref",    "class", "Ref", "Recurse",
53    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
54    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
55    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
# Line 281  compiled re. */ Line 282  compiled re. */
282    
283  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
284  {  {
285    gotten_store = size;
286  if (log_store)  if (log_store)
287    fprintf(outfile, "Memory allocation (code space): %d\n",    fprintf(outfile, "Memory allocation (code space): %d\n",
288      (int)((int)size - offsetof(real_pcre, code[0])));      (int)((int)size - offsetof(real_pcre, code[0])));
# Line 289  return malloc(size); Line 291  return malloc(size);
291    
292    
293    
294    
295    /* Get one piece of information from the pcre_fullinfo() function */
296    
297    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
298    {
299    int rc;
300    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
301      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
302    }
303    
304    
305    
306    
307  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
308  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
309  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 376  while (!done) Line 391  while (!done)
391    
392  #if !defined NOPOSIX  /* There are still compilers that require no indent */  #if !defined NOPOSIX  /* There are still compilers that require no indent */
393    regex_t preg;    regex_t preg;
394      int do_posix = 0;
395  #endif  #endif
396    
397    const char *error;    const char *error;
# Line 387  while (!done) Line 403  while (!done)
403    int do_g = 0;    int do_g = 0;
404    int do_showinfo = showinfo;    int do_showinfo = showinfo;
405    int do_showrest = 0;    int do_showrest = 0;
   int do_posix = 0;  
406    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
407    
408    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
# Line 573  while (!done) Line 588  while (!done)
588        goto CONTINUE;        goto CONTINUE;
589        }        }
590    
591      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
592        info-returning functions. The old one has a limited interface and
593        returns only limited data. Check that it agrees with the newer one. */
594    
595      if (do_showinfo)      if (do_showinfo)
596        {        {
597        int first_char, count;        int old_first_char, old_options, old_count;
598          int count, backrefmax, first_char, need_char;
599          size_t size;
600    
601        if (do_debug) print_internals(re);        if (do_debug) print_internals(re);
602    
603        count = pcre_info(re, &options, &first_char);        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
604          new_info(re, NULL, PCRE_INFO_SIZE, &size);
605          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
606          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
607          new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
608          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
609    
610          old_count = pcre_info(re, &old_options, &old_first_char);
611        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
612          "Error %d while reading info\n", count);          "Error %d from pcre_info()\n", count);
613        else        else
614          {          {
615          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (old_count != count) fprintf(outfile,
616          if (options == 0) fprintf(outfile, "No options\n");            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
617            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",              old_count);
618              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
619              ((options & PCRE_CASELESS) != 0)? " caseless" : "",          if (old_first_char != first_char) fprintf(outfile,
620              ((options & PCRE_EXTENDED) != 0)? " extended" : "",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
621              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",              first_char, old_first_char);
622              ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
623              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",          if (old_options != options) fprintf(outfile,
624              ((options & PCRE_EXTRA) != 0)? " extra" : "",            "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,
625              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");              old_options);
626            }
         if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)  
           fprintf(outfile, "Case state changes\n");  
627    
628          if (first_char == -1)        if (size != gotten_store) fprintf(outfile,
629            {          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
630            fprintf(outfile, "First char at start or follows \\n\n");          size, gotten_store);
631            }  
632          else if (first_char < 0)        fprintf(outfile, "Capturing subpattern count = %d\n", count);
633            {        if (backrefmax > 0)
634            fprintf(outfile, "No first char\n");          fprintf(outfile, "Max back reference = %d\n", backrefmax);
635            }        if (options == 0) fprintf(outfile, "No options\n");
636            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",
637              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
638              ((options & PCRE_CASELESS) != 0)? " caseless" : "",
639              ((options & PCRE_EXTENDED) != 0)? " extended" : "",
640              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
641              ((options & PCRE_DOTALL) != 0)? " dotall" : "",
642              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
643              ((options & PCRE_EXTRA) != 0)? " extra" : "",
644              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");
645    
646          if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
647            fprintf(outfile, "Case state changes\n");
648    
649          if (first_char == -1)
650            {
651            fprintf(outfile, "First char at start or follows \\n\n");
652            }
653          else if (first_char < 0)
654            {
655            fprintf(outfile, "No first char\n");
656            }
657          else
658            {
659            if (isprint(first_char))
660              fprintf(outfile, "First char = \'%c\'\n", first_char);
661          else          else
662            {            fprintf(outfile, "First char = %d\n", first_char);
663            if (isprint(first_char))          }
             fprintf(outfile, "First char = \'%c\'\n", first_char);  
           else  
             fprintf(outfile, "First char = %d\n", first_char);  
           }  
664    
665          if (((((real_pcre *)re)->options) & PCRE_REQCHSET) != 0)        if (need_char < 0)
666            {          {
667            int req_char = ((real_pcre *)re)->req_char;          fprintf(outfile, "No need char\n");
668            if (isprint(req_char))          }
669              fprintf(outfile, "Req char = \'%c\'\n", req_char);        else
670            else          {
671              fprintf(outfile, "Req char = %d\n", req_char);          if (isprint(need_char))
672            }            fprintf(outfile, "Need char = \'%c\'\n", need_char);
673          else fprintf(outfile, "No req char\n");          else
674              fprintf(outfile, "Need char = %d\n", need_char);
675          }          }
676        }        }
677    
# Line 654  while (!done) Line 700  while (!done)
700        else if (extra == NULL)        else if (extra == NULL)
701          fprintf(outfile, "Study returned NULL\n");          fprintf(outfile, "Study returned NULL\n");
702    
       /* This looks at internal information. A bit kludgy to do it this  
       way, but it is useful for testing. */  
   
703        else if (do_showinfo)        else if (do_showinfo)
704          {          {
705          real_pcre_extra *xx = (real_pcre_extra *)extra;          uschar *start_bits = NULL;
706          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
707            if (start_bits == NULL)
708            fprintf(outfile, "No starting character set\n");            fprintf(outfile, "No starting character set\n");
709          else          else
710            {            {
# Line 669  while (!done) Line 713  while (!done)
713            fprintf(outfile, "Starting character set: ");            fprintf(outfile, "Starting character set: ");
714            for (i = 0; i < 256; i++)            for (i = 0; i < 256; i++)
715              {              {
716              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if ((start_bits[i/8] & (1<<(i%8))) != 0)
717                {                {
718                if (c > 75)                if (c > 75)
719                  {                  {
# Line 704  while (!done) Line 748  while (!done)
748      int copystrings = 0;      int copystrings = 0;
749      int getstrings = 0;      int getstrings = 0;
750      int getlist = 0;      int getlist = 0;
751        int gmatched = 0;
752      int start_offset = 0;      int start_offset = 0;
753        int g_notempty = 0;
754      int offsets[45];      int offsets[45];
755      int size_offsets = sizeof(offsets)/sizeof(int);      int size_offsets = sizeof(offsets)/sizeof(int);
756    
# Line 810  while (!done) Line 856  while (!done)
856        {        {
857        int rc;        int rc;
858        int eflags = 0;        int eflags = 0;
859        regmatch_t pmatch[30];        regmatch_t pmatch[sizeof(offsets)/sizeof(int)];
860        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
861        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
862    
863        rc = regexec(&preg, (const char *)bptr,        rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);
         sizeof(pmatch)/sizeof(regmatch_t), pmatch, eflags);  
864    
865        if (rc != 0)        if (rc != 0)
866          {          {
# Line 825  while (!done) Line 870  while (!done)
870        else        else
871          {          {
872          size_t i;          size_t i;
873          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < size_offsets; i++)
874            {            {
875            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
876              {              {
# Line 849  while (!done) Line 894  while (!done)
894      else      else
895  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
896    
897      for (;;)      for (;; gmatched++)    /* Loop for /g or /G */
898        {        {
899        if (timeit)        if (timeit)
900          {          {
# Line 858  while (!done) Line 903  while (!done)
903          clock_t start_time = clock();          clock_t start_time = clock();
904          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
905            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
906              (do_g? start_offset : 0), options, offsets, size_offsets);              start_offset, options | g_notempty, offsets, size_offsets);
907          time_taken = clock() - start_time;          time_taken = clock() - start_time;
908          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
909            ((double)time_taken * 1000.0)/            ((double)time_taken * 1000.0)/
# Line 866  while (!done) Line 911  while (!done)
911          }          }
912    
913        count = pcre_exec(re, extra, (char *)bptr, len,        count = pcre_exec(re, extra, (char *)bptr, len,
914          (do_g? start_offset : 0), options, offsets, size_offsets);          start_offset, options | g_notempty, offsets, size_offsets);
915    
916        if (count == 0)        if (count == 0)
917          {          {
# Line 874  while (!done) Line 919  while (!done)
919          count = size_offsets/3;          count = size_offsets/3;
920          }          }
921    
922          /* Matched */
923    
924        if (count >= 0)        if (count >= 0)
925          {          {
926          int i;          int i;
# Line 888  while (!done) Line 935  while (!done)
935              fprintf(outfile, "\n");              fprintf(outfile, "\n");
936              if (i == 0)              if (i == 0)
937                {                {
               start_offset = offsets[1];  
938                if (do_showrest)                if (do_showrest)
939                  {                  {
940                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
# Line 946  while (!done) Line 992  while (!done)
992              free((void *)stringlist);              free((void *)stringlist);
993              }              }
994            }            }
   
995          }          }
996    
997          /* Failed to match. If this is a /g or /G loop and we previously set
998          g_notempty after a null match, this is not necessarily the end.
999          We want to advance the start offset, and continue. Fudge the offset
1000          values to achieve this. We won't be at the end of the string - that
1001          was checked before setting g_notempty. */
1002    
1003        else        else
1004          {          {
1005          if (start_offset == 0)          if (g_notempty != 0)
1006            {            {
1007            if (count == -1) fprintf(outfile, "No match\n");            offsets[0] = start_offset;
1008              else fprintf(outfile, "Error %d\n", count);            offsets[1] = start_offset + 1;
1009              }
1010            else
1011              {
1012              if (gmatched == 0)   /* Error if no previous matches */
1013                {
1014                if (count == -1) fprintf(outfile, "No match\n");
1015                  else fprintf(outfile, "Error %d\n", count);
1016                }
1017              break;  /* Out of the /g loop */
1018            }            }
         start_offset = -1;  
1019          }          }
1020    
1021        if ((!do_g && !do_G) || start_offset <= 0) break;        /* If not /g or /G we are done */
1022        if (do_G)  
1023          if (!do_g && !do_G) break;
1024    
1025          /* If we have matched an empty string, first check to see if we are at
1026          the end of the subject. If so, the /g loop is over. Otherwise, mimic
1027          what Perl's /g options does. This turns out to be rather cunning. First
1028          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1029          same point. If this fails (picked up above) we advance to the next
1030          character. */
1031    
1032          g_notempty = 0;
1033          if (offsets[0] == offsets[1])
1034          {          {
1035          bptr += start_offset;          if (offsets[0] == len) break;
1036          len -= start_offset;          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1037          }          }
1038        }  
1039      }        /* For /g, update the start offset, leaving the rest alone */
1040    
1041          if (do_g) start_offset = offsets[1];
1042    
1043          /* For /G, update the pointer and length */
1044    
1045          else
1046            {
1047            bptr += offsets[1];
1048            len -= offsets[1];
1049            }
1050          }  /* End of loop for /g and /G */
1051        }    /* End of loop for data lines */
1052    
1053    CONTINUE:    CONTINUE:
1054    

Legend:
Removed from v.37  
changed lines
  Added in v.47

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12