/[pcre]/code/tags/pcre-8.11/pcretest.c
ViewVC logotype

Diff of /code/tags/pcre-8.11/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 76 by nigel, Sat Feb 24 21:40:37 2007 UTC revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather untidy in places.  been extended and consequently is now rather, er, *very* untidy in places.
8    
9  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
10  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 44  POSSIBILITY OF SUCH DAMAGE.
44  #include <locale.h>  #include <locale.h>
45  #include <errno.h>  #include <errno.h>
46    
 /* We need the internal info for displaying the results of pcre_study(). Also  
 for getting the opcodes for showing compiled code. */  
   
47  #define PCRE_SPY        /* For Win32 build, import data, not export */  #define PCRE_SPY        /* For Win32 build, import data, not export */
48  #include "internal.h"  
49    /* We need the internal info for displaying the results of pcre_study() and
50    other internal data; pcretest also uses some of the fixed tables, and generally
51    has "inside information" compared to a program that strictly follows the PCRE
52    API. */
53    
54    #include "pcre_internal.h"
55    
56    
57  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
58  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 87  static size_t gotten_store; Line 91  static size_t gotten_store;
91  static uschar *pbuffer = NULL;  static uschar *pbuffer = NULL;
92    
93    
 static const int utf8_table1[] = {  
   0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  
   
 static const int utf8_table2[] = {  
   0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  
   
 static const int utf8_table3[] = {  
   0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  
   
   
   
 /*************************************************  
 *         Print compiled regex                   *  
 *************************************************/  
   
 /* The code for doing this is held in a separate file that is also included in  
 pcre.c when it is compiled with the debug switch. It defines a function called  
 print_internals(), which uses a table of opcode lengths defined by the macro  
 OP_LENGTHS, whose name must be OP_lengths. It also uses a table that translates  
 Unicode property names to numbers; this is kept in a separate file. */  
   
 static uschar OP_lengths[] = { OP_LENGTHS };  
   
 #include "ucp.h"  
 #include "ucptypetable.c"  
 #include "printint.c"  
   
   
94    
95  /*************************************************  /*************************************************
96  *          Read number from string               *  *          Read number from string               *
# Line 143  return(result); Line 119  return(result);
119    
120    
121    
 /*************************************************  
 *       Convert character value to UTF-8         *  
 *************************************************/  
   
 /* This function takes an integer value in the range 0 - 0x7fffffff  
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   buffer     pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
 */  
   
 static int  
 ord2utf8(int cvalue, unsigned char *buffer)  
 {  
 register int i, j;  
 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  
   if (cvalue <= utf8_table1[i]) break;  
 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  
 if (cvalue < 0) return -1;  
   
 buffer += i;  
 for (j = i; j > 0; j--)  
  {  
  *buffer-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *buffer = utf8_table2[i] | cvalue;  
 return i + 1;  
 }  
   
122    
123  /*************************************************  /*************************************************
124  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
# Line 214  if (i == 0 || i == 6) return 0; / Line 154  if (i == 0 || i == 6) return 0; /
154  /* i now has a value in the range 1-5 */  /* i now has a value in the range 1-5 */
155    
156  s = 6*i;  s = 6*i;
157  d = (c & utf8_table3[i]) << s;  d = (c & _pcre_utf8_table3[i]) << s;
158    
159  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
160    {    {
# Line 226  for (j = 0; j < i; j++) Line 166  for (j = 0; j < i; j++)
166    
167  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
168    
169  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < _pcre_utf8_table1_size; j++)
170    if (d <= utf8_table1[j]) break;    if (d <= _pcre_utf8_table1[j]) break;
171  if (j != i) return -(i+1);  if (j != i) return -(i+1);
172    
173  /* Valid value */  /* Valid value */
# Line 403  static void *new_malloc(size_t size) Line 343  static void *new_malloc(size_t size)
343  void *block = malloc(size);  void *block = malloc(size);
344  gotten_store = size;  gotten_store = size;
345  if (show_malloc)  if (show_malloc)
346    fprintf(outfile, "malloc       %3d %p\n", size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
347  return block;  return block;
348  }  }
349    
# Line 421  static void *stack_malloc(size_t size) Line 361  static void *stack_malloc(size_t size)
361  {  {
362  void *block = malloc(size);  void *block = malloc(size);
363  if (show_malloc)  if (show_malloc)
364    fprintf(outfile, "stack_malloc %3d %p\n", size, block);    fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
365  return block;  return block;
366  }  }
367    
# Line 484  int showinfo = 0; Line 424  int showinfo = 0;
424  int showstore = 0;  int showstore = 0;
425  int size_offsets = 45;  int size_offsets = 45;
426  int size_offsets_max;  int size_offsets_max;
427  int *offsets;  int *offsets = NULL;
428  #if !defined NOPOSIX  #if !defined NOPOSIX
429  int posix = 0;  int posix = 0;
430  #endif  #endif
431  int debug = 0;  int debug = 0;
432  int done = 0;  int done = 0;
433    int all_use_dfa = 0;
434    int yield = 0;
435    
436  unsigned char *buffer;  unsigned char *buffer;
437  unsigned char *dbuffer;  unsigned char *dbuffer;
# Line 522  while (argc > 1 && argv[op][0] == '-') Line 464  while (argc > 1 && argv[op][0] == '-')
464    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
465    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
466    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
467      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
468    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
469        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
470          *endptr == 0))          *endptr == 0))
# Line 558  while (argc > 1 && argv[op][0] == '-') Line 501  while (argc > 1 && argv[op][0] == '-')
501      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
502      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
503      printf("  -C     show PCRE compile-time options and exit\n");      printf("  -C     show PCRE compile-time options and exit\n");
504      printf("  -d     debug: show compiled code; implies -i\n"      printf("  -d     debug: show compiled code; implies -i\n");
505             "  -i     show information about compiled pattern\n"      printf("  -dfa   force DFA matching for all subjects\n");
506        printf("  -i     show information about compiled pattern\n"
507             "  -m     output memory used information\n"             "  -m     output memory used information\n"
508             "  -o <n> set size of offsets vector to <n>\n");             "  -o <n> set size of offsets vector to <n>\n");
509  #if !defined NOPOSIX  #if !defined NOPOSIX
# Line 567  while (argc > 1 && argv[op][0] == '-') Line 511  while (argc > 1 && argv[op][0] == '-')
511  #endif  #endif
512      printf("  -s     output store (memory) used information\n"      printf("  -s     output store (memory) used information\n"
513             "  -t     time compilation and execution\n");             "  -t     time compilation and execution\n");
514      return 1;      yield = 1;
515        goto EXIT;
516      }      }
517    op++;    op++;
518    argc--;    argc--;
# Line 581  if (offsets == NULL) Line 526  if (offsets == NULL)
526    {    {
527    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
528      size_offsets_max * sizeof(int));      size_offsets_max * sizeof(int));
529    return 1;    yield = 1;
530      goto EXIT;
531    }    }
532    
533  /* Sort out the input and output files */  /* Sort out the input and output files */
# Line 592  if (argc > 1) Line 538  if (argc > 1)
538    if (infile == NULL)    if (infile == NULL)
539      {      {
540      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
541      return 1;      yield = 1;
542        goto EXIT;
543      }      }
544    }    }
545    
# Line 602  if (argc > 2) Line 549  if (argc > 2)
549    if (outfile == NULL)    if (outfile == NULL)
550      {      {
551      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
552      return 1;      yield = 1;
553        goto EXIT;
554      }      }
555    }    }
556    
# Line 802  while (!done) Line 750  while (!done)
750      {      {
751      switch (*pp++)      switch (*pp++)
752        {        {
753          case 'f': options |= PCRE_FIRSTLINE; break;
754        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
755        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
756        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 831  while (!done) Line 780  while (!done)
780    
781        case 'L':        case 'L':
782        ppp = pp;        ppp = pp;
783        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows */
784          while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
785        *ppp = 0;        *ppp = 0;
786        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
787          {          {
# Line 849  while (!done) Line 799  while (!done)
799        *pp = 0;        *pp = 0;
800        break;        break;
801    
802        case '\n': case ' ': break;        case '\r':                      /* So that it works in Windows */
803          case '\n':
804          case ' ':
805          break;
806    
807        default:        default:
808        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
# Line 869  while (!done) Line 822  while (!done)
822    
823      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
824      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
825        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
826      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
827    
828      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1016  while (!done) Line 970  while (!done)
970        if (do_debug)        if (do_debug)
971          {          {
972          fprintf(outfile, "------------------------------------------------------------------\n");          fprintf(outfile, "------------------------------------------------------------------\n");
973          print_internals(re, outfile);          _pcre_printint(re, outfile);
974          }          }
975    
976        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
# Line 1049  while (!done) Line 1003  while (!done)
1003    
1004        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1005          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1006          size, regex_gotten_store);          (int)size, (int)regex_gotten_store);
1007    
1008        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1009        if (backrefmax > 0)        if (backrefmax > 0)
# Line 1080  while (!done) Line 1034  while (!done)
1034          fprintf(outfile, "Partial matching not supported\n");          fprintf(outfile, "Partial matching not supported\n");
1035    
1036        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1037          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",
1038            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1039            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1040            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1041            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1042              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1043            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1044            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1045            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
# Line 1222  while (!done) Line 1177  while (!done)
1177            }            }
1178          fclose(f);          fclose(f);
1179          }          }
1180    
1181          new_free(re);
1182          if (extra != NULL) new_free(extra);
1183          if (tables != NULL) new_free((void *)tables);
1184        continue;  /* With next regex */        continue;  /* With next regex */
1185        }        }
1186      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1244  while (!done) Line 1203  while (!done)
1203      int gmatched = 0;      int gmatched = 0;
1204      int start_offset = 0;      int start_offset = 0;
1205      int g_notempty = 0;      int g_notempty = 0;
1206        int use_dfa = 0;
1207    
1208      options = 0;      options = 0;
1209    
# Line 1309  while (!done) Line 1269  while (!done)
1269              {              {
1270              unsigned char buff8[8];              unsigned char buff8[8];
1271              int ii, utn;              int ii, utn;
1272              utn = ord2utf8(c, buff8);              utn = _pcre_ord2utf8(c, buff8);
1273              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1274              c = buff8[ii];   /* Last byte */              c = buff8[ii];   /* Last byte */
1275              p = pt + 1;              p = pt + 1;
# Line 1397  while (!done) Line 1357  while (!done)
1357            }            }
1358          continue;          continue;
1359    
1360            case 'D':
1361            if (posix || do_posix)
1362              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1363            else
1364              use_dfa = 1;
1365            continue;
1366    
1367            case 'F':
1368            options |= PCRE_DFA_SHORTEST;
1369            continue;
1370    
1371          case 'G':          case 'G':
1372          if (isdigit(*p))          if (isdigit(*p))
1373            {            {
# Line 1439  while (!done) Line 1410  while (!done)
1410              {              {
1411              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1412                size_offsets_max * sizeof(int));                size_offsets_max * sizeof(int));
1413              return 1;              yield = 1;
1414                goto EXIT;
1415              }              }
1416            }            }
1417          use_size_offsets = n;          use_size_offsets = n;
# Line 1450  while (!done) Line 1422  while (!done)
1422          options |= PCRE_PARTIAL;          options |= PCRE_PARTIAL;
1423          continue;          continue;
1424    
1425            case 'R':
1426            options |= PCRE_DFA_RESTART;
1427            continue;
1428    
1429          case 'S':          case 'S':
1430          show_malloc = 1;          show_malloc = 1;
1431          continue;          continue;
# Line 1467  while (!done) Line 1443  while (!done)
1443      *q = 0;      *q = 0;
1444      len = q - dbuffer;      len = q - dbuffer;
1445    
1446        if ((all_use_dfa || use_dfa) && find_match_limit)
1447          {
1448          printf("**Match limit not relevant for DFA matching: ignored\n");
1449          find_match_limit = 0;
1450          }
1451    
1452      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1453      support timing or playing with the match limit or callout data. */      support timing or playing with the match limit or callout data. */
1454    
# Line 1524  while (!done) Line 1506  while (!done)
1506          register int i;          register int i;
1507          clock_t time_taken;          clock_t time_taken;
1508          clock_t start_time = clock();          clock_t start_time = clock();
1509    
1510            if (all_use_dfa || use_dfa)
1511              {
1512              int workspace[1000];
1513              for (i = 0; i < LOOPREPEAT; i++)
1514                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1515                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1516                  sizeof(workspace)/sizeof(int));
1517              }
1518            else
1519    
1520          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1521            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1522              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1523    
1524          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1525          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1526            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
# Line 1597  while (!done) Line 1591  while (!done)
1591        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
1592        value of match_limit. */        value of match_limit. */
1593    
1594        else        else if (all_use_dfa || use_dfa)
1595          {          {
1596          count = pcre_exec(re, extra, (char *)bptr, len,          int workspace[1000];
1597            start_offset, options | g_notempty, use_offsets, use_size_offsets);          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1598              options | g_notempty, use_offsets, use_size_offsets, workspace,
1599              sizeof(workspace)/sizeof(int));
1600            if (count == 0)
1601              {
1602              fprintf(outfile, "Matched, but too many subsidiary matches\n");
1603              count = use_size_offsets/2;
1604              }
1605          }          }
1606    
1607        if (count == 0)        else
1608          {          {
1609          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
1610          count = use_size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
1611            if (count == 0)
1612              {
1613              fprintf(outfile, "Matched, but too many substrings\n");
1614              count = use_size_offsets/3;
1615              }
1616          }          }
1617    
1618        /* Matched */        /* Matched */
# Line 1692  while (!done) Line 1698  while (!done)
1698    
1699        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
1700          {          {
1701          fprintf(outfile, "Partial match\n");          fprintf(outfile, "Partial match");
1702            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
1703              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
1704                bptr + use_offsets[0]);
1705            fprintf(outfile, "\n");
1706          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
1707          }          }
1708    
# Line 1770  while (!done) Line 1780  while (!done)
1780    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1781  #endif  #endif
1782    
1783    if (re != NULL) free(re);    if (re != NULL) new_free(re);
1784    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
1785    if (tables != NULL)    if (tables != NULL)
1786      {      {
1787      free((void *)tables);      new_free((void *)tables);
1788      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
1789      }      }
1790    }    }
1791    
1792  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
1793  return 0;  
1794    EXIT:
1795    
1796    if (infile != NULL && infile != stdin) fclose(infile);
1797    if (outfile != NULL && outfile != stdout) fclose(outfile);
1798    
1799    free(buffer);
1800    free(dbuffer);
1801    free(pbuffer);
1802    free(offsets);
1803    
1804    return yield;
1805  }  }
1806    
1807  /* End */  /* End of pcretest.c */

Legend:
Removed from v.76  
changed lines
  Added in v.77

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12