/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3 by nigel, Sat Feb 24 21:38:01 2007 UTC revision 55 by nigel, Sat Feb 24 21:39:46 2007 UTC
# Line 7  Line 7 
7  #include <string.h>  #include <string.h>
8  #include <stdlib.h>  #include <stdlib.h>
9  #include <time.h>  #include <time.h>
10    #include <locale.h>
11    
12  /* Use the internal info for displaying the results of pcre_study(). */  /* Use the internal info for displaying the results of pcre_study(). */
13    
14  #include "internal.h"  #include "internal.h"
15    
16    /* It is possible to compile this test program without including support for
17    testing the POSIX interface, though this is not available via the standard
18    Makefile. */
19    
20    #if !defined NOPOSIX
21  #include "pcreposix.h"  #include "pcreposix.h"
22    #endif
23    
24  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
25  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 21  Line 29 
29  #endif  #endif
30  #endif  #endif
31    
32    #define LOOPREPEAT 20000
33    
34    
35  static FILE *outfile;  static FILE *outfile;
36  static int log_store = 0;  static int log_store = 0;
37    static size_t gotten_store;
38    
39    
40    
41    static int utf8_table1[] = {
42      0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
43    
44    static int utf8_table2[] = {
45      0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
46    
47    static int utf8_table3[] = {
48      0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
49    
50    
51    /*************************************************
52    *       Convert character value to UTF-8         *
53    *************************************************/
54    
55    /* This function takes an integer value in the range 0 - 0x7fffffff
56    and encodes it as a UTF-8 character in 0 to 6 bytes.
57    
58    Arguments:
59      cvalue     the character value
60      buffer     pointer to buffer for result - at least 6 bytes long
61    
62    Returns:     number of characters placed in the buffer
63                 -1 if input character is negative
64                 0 if input character is positive but too big (only when
65                 int is longer than 32 bits)
66    */
67    
68    static int
69    ord2utf8(int cvalue, unsigned char *buffer)
70    {
71    register int i, j;
72    for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
73      if (cvalue <= utf8_table1[i]) break;
74    if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
75    if (cvalue < 0) return -1;
76    *buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);
77    cvalue >>= 6 - i;
78    for (j = 0; j < i; j++)
79      {
80      *buffer++ = 0x80 | (cvalue & 0x3f);
81      cvalue >>= 6;
82      }
83    return i + 1;
84    }
85    
86    
87    /*************************************************
88    *            Convert UTF-8 string to value       *
89    *************************************************/
90    
91    /* This function takes one or more bytes that represents a UTF-8 character,
92    and returns the value of the character.
93    
94    Argument:
95      buffer   a pointer to the byte vector
96      vptr     a pointer to an int to receive the value
97    
98    Returns:   >  0 => the number of bytes consumed
99               -6 to 0 => malformed UTF-8 character at offset = (-return)
100    */
101    
102    int
103    utf82ord(unsigned char *buffer, int *vptr)
104    {
105    int c = *buffer++;
106    int d = c;
107    int i, j, s;
108    
109    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
110      {
111      if ((d & 0x80) == 0) break;
112      d <<= 1;
113      }
114    
115    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
116    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
117    
118    /* i now has a value in the range 1-5 */
119    
120    d = c & utf8_table3[i];
121    s = 6 - i;
122    
123    for (j = 0; j < i; j++)
124      {
125      c = *buffer++;
126      if ((c & 0xc0) != 0x80) return -(j+1);
127      d |= (c & 0x3f) << s;
128      s += 6;
129      }
130    
131    /* Check that encoding was the correct unique one */
132    
133    for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
134      if (d <= utf8_table1[j]) break;
135    if (j != i) return -(i+1);
136    
137    /* Valid value */
138    
139    *vptr = d;
140    return i+1;
141    }
142    
143    
144    
145    
146    
147    
148  /* Debugging function to print the internal form of the regex. This is the same  /* Debugging function to print the internal form of the regex. This is the same
149  code as contained in pcre.c under the DEBUG macro. */  code as contained in pcre.c under the DEBUG macro. */
150    
151  static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d",  static const char *OP_names[] = {
152    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",    "End", "\\A", "\\B", "\\b", "\\D", "\\d",
153    "not",    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
154      "Opt", "^", "$", "Any", "chars", "not",
155    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
156    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
157    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
158    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
159    "class", "Ref",    "class", "Ref", "Recurse",
160    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
161    "Brazero", "Braminzero", "Bra"    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
162      "Brazero", "Braminzero", "Branumber", "Bra"
163  };  };
164    
165    
# Line 47  static void print_internals(pcre *re) Line 167  static void print_internals(pcre *re)
167  {  {
168  unsigned char *code = ((real_pcre *)re)->code;  unsigned char *code = ((real_pcre *)re)->code;
169    
170  printf("------------------------------------------------------------------\n");  fprintf(outfile, "------------------------------------------------------------------\n");
171    
172  for(;;)  for(;;)
173    {    {
174    int c;    int c;
175    int charlength;    int charlength;
176    
177    printf("%3d ", code - ((real_pcre *)re)->code);    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));
178    
179    if (*code >= OP_BRA)    if (*code >= OP_BRA)
180      {      {
181      printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);      if (*code - OP_BRA > EXTRACT_BASIC_MAX)
182          fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);
183        else
184          fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
185      code += 2;      code += 2;
186      }      }
187    
188    else switch(*code)    else switch(*code)
189      {      {
190      case OP_END:      case OP_END:
191      printf("    %s\n", OP_names[*code]);      fprintf(outfile, "    %s\n", OP_names[*code]);
192      printf("------------------------------------------------------------------\n");      fprintf(outfile, "------------------------------------------------------------------\n");
193      return;      return;
194    
195        case OP_OPT:
196        fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);
197        code++;
198        break;
199    
200      case OP_CHARS:      case OP_CHARS:
201      charlength = *(++code);      charlength = *(++code);
202      printf("%3d ", charlength);      fprintf(outfile, "%3d ", charlength);
203      while (charlength-- > 0)      while (charlength-- > 0)
204        if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);        if (isprint(c = *(++code))) fprintf(outfile, "%c", c);
205            else fprintf(outfile, "\\x%02x", c);
206      break;      break;
207    
208      case OP_KETRMAX:      case OP_KETRMAX:
# Line 82  for(;;) Line 211  for(;;)
211      case OP_KET:      case OP_KET:
212      case OP_ASSERT:      case OP_ASSERT:
213      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
214        case OP_ASSERTBACK:
215        case OP_ASSERTBACK_NOT:
216      case OP_ONCE:      case OP_ONCE:
217      printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);      case OP_COND:
218        case OP_BRANUMBER:
219        case OP_REVERSE:
220        case OP_CREF:
221        fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
222      code += 2;      code += 2;
223      break;      break;
224    
# Line 100  for(;;) Line 235  for(;;)
235      case OP_TYPEQUERY:      case OP_TYPEQUERY:
236      case OP_TYPEMINQUERY:      case OP_TYPEMINQUERY:
237      if (*code >= OP_TYPESTAR)      if (*code >= OP_TYPESTAR)
238        printf("    %s", OP_names[code[1]]);        fprintf(outfile, "    %s", OP_names[code[1]]);
239      else if (isprint(c = code[1])) printf("    %c", c);      else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);
240        else printf("    \\x%02x", c);        else fprintf(outfile, "    \\x%02x", c);
241      printf("%s", OP_names[*code++]);      fprintf(outfile, "%s", OP_names[*code++]);
242      break;      break;
243    
244      case OP_EXACT:      case OP_EXACT:
245      case OP_UPTO:      case OP_UPTO:
246      case OP_MINUPTO:      case OP_MINUPTO:
247      if (isprint(c = code[3])) printf("    %c{", c);      if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);
248        else printf("    \\x%02x{", c);        else fprintf(outfile, "    \\x%02x{", c);
249      if (*code != OP_EXACT) printf(",");      if (*code != OP_EXACT) fprintf(outfile, ",");
250      printf("%d}", (code[1] << 8) + code[2]);      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
251      if (*code == OP_MINUPTO) printf("?");      if (*code == OP_MINUPTO) fprintf(outfile, "?");
252      code += 3;      code += 3;
253      break;      break;
254    
255      case OP_TYPEEXACT:      case OP_TYPEEXACT:
256      case OP_TYPEUPTO:      case OP_TYPEUPTO:
257      case OP_TYPEMINUPTO:      case OP_TYPEMINUPTO:
258      printf("    %s{", OP_names[code[3]]);      fprintf(outfile, "    %s{", OP_names[code[3]]);
259      if (*code != OP_TYPEEXACT) printf(",");      if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");
260      printf("%d}", (code[1] << 8) + code[2]);      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
261      if (*code == OP_TYPEMINUPTO) printf("?");      if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");
262      code += 3;      code += 3;
263      break;      break;
264    
265      case OP_NOT:      case OP_NOT:
266      if (isprint(c = *(++code))) printf("    [^%c]", c);      if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);
267        else printf("    [^\\x%02x]", c);        else fprintf(outfile, "    [^\\x%02x]", c);
268      break;      break;
269    
270      case OP_NOTSTAR:      case OP_NOTSTAR:
# Line 138  for(;;) Line 273  for(;;)
273      case OP_NOTMINPLUS:      case OP_NOTMINPLUS:
274      case OP_NOTQUERY:      case OP_NOTQUERY:
275      case OP_NOTMINQUERY:      case OP_NOTMINQUERY:
276      if (isprint(c = code[1])) printf("    [^%c]", c);      if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);
277        else printf("    [^\\x%02x]", c);        else fprintf(outfile, "    [^\\x%02x]", c);
278      printf("%s", OP_names[*code++]);      fprintf(outfile, "%s", OP_names[*code++]);
279      break;      break;
280    
281      case OP_NOTEXACT:      case OP_NOTEXACT:
282      case OP_NOTUPTO:      case OP_NOTUPTO:
283      case OP_NOTMINUPTO:      case OP_NOTMINUPTO:
284      if (isprint(c = code[3])) printf("    [^%c]{", c);      if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);
285        else printf("    [^\\x%02x]{", c);        else fprintf(outfile, "    [^\\x%02x]{", c);
286      if (*code != OP_NOTEXACT) printf(",");      if (*code != OP_NOTEXACT) fprintf(outfile, ",");
287      printf("%d}", (code[1] << 8) + code[2]);      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
288      if (*code == OP_NOTMINUPTO) printf("?");      if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");
289      code += 3;      code += 3;
290      break;      break;
291    
292      case OP_REF:      case OP_REF:
293      printf("    \\%d", *(++code));      fprintf(outfile, "    \\%d", (code[1] << 8) | code[2]);
294      break;      code += 3;
295        goto CLASS_REF_REPEAT;
296    
297      case OP_CLASS:      case OP_CLASS:
298        {        {
299        int i, min, max;        int i, min, max;
   
300        code++;        code++;
301        printf("    [");        fprintf(outfile, "    [");
302    
303        for (i = 0; i < 256; i++)        for (i = 0; i < 256; i++)
304          {          {
# Line 172  for(;;) Line 307  for(;;)
307            int j;            int j;
308            for (j = i+1; j < 256; j++)            for (j = i+1; j < 256; j++)
309              if ((code[j/8] & (1 << (j&7))) == 0) break;              if ((code[j/8] & (1 << (j&7))) == 0) break;
310            if (i == '-' || i == ']') printf("\\");            if (i == '-' || i == ']') fprintf(outfile, "\\");
311            if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);            if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);
312            if (--j > i)            if (--j > i)
313              {              {
314              printf("-");              fprintf(outfile, "-");
315              if (j == '-' || j == ']') printf("\\");              if (j == '-' || j == ']') fprintf(outfile, "\\");
316              if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);              if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);
317              }              }
318            i = j;            i = j;
319            }            }
320          }          }
321        printf("]");        fprintf(outfile, "]");
322        code += 32;        code += 32;
323    
324          CLASS_REF_REPEAT:
325    
326        switch(*code)        switch(*code)
327          {          {
328          case OP_CRSTAR:          case OP_CRSTAR:
# Line 194  for(;;) Line 331  for(;;)
331          case OP_CRMINPLUS:          case OP_CRMINPLUS:
332          case OP_CRQUERY:          case OP_CRQUERY:
333          case OP_CRMINQUERY:          case OP_CRMINQUERY:
334          printf("%s", OP_names[*code]);          fprintf(outfile, "%s", OP_names[*code]);
335          break;          break;
336    
337          case OP_CRRANGE:          case OP_CRRANGE:
338          case OP_CRMINRANGE:          case OP_CRMINRANGE:
339          min = (code[1] << 8) + code[2];          min = (code[1] << 8) + code[2];
340          max = (code[3] << 8) + code[4];          max = (code[3] << 8) + code[4];
341          if (max == 0) printf("{%d,}", min);          if (max == 0) fprintf(outfile, "{%d,}", min);
342          else printf("{%d,%d}", min, max);          else fprintf(outfile, "{%d,%d}", min, max);
343          if (*code == OP_CRMINRANGE) printf("?");          if (*code == OP_CRMINRANGE) fprintf(outfile, "?");
344          code += 4;          code += 4;
345          break;          break;
346    
# Line 216  for(;;) Line 353  for(;;)
353      /* Anything else is just a one-node item */      /* Anything else is just a one-node item */
354    
355      default:      default:
356      printf("    %s", OP_names[*code]);      fprintf(outfile, "    %s", OP_names[*code]);
357      break;      break;
358      }      }
359    
360    code++;    code++;
361    printf("\n");    fprintf(outfile, "\n");
362    }    }
363  }  }
364    
365    
366    
367  /* Character string printing function. */  /* Character string printing function. A "normal" and a UTF-8 version. */
368    
369  static void pchars(unsigned char *p, int length)  static void pchars(unsigned char *p, int length, int utf8)
370  {  {
371  int c;  int c;
372  while (length-- > 0)  while (length-- > 0)
373      {
374      if (utf8)
375        {
376        int rc = utf82ord(p, &c);
377        if (rc > 0)
378          {
379          length -= rc - 1;
380          p += rc;
381          if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);
382            else fprintf(outfile, "\\x{%02x}", c);
383          continue;
384          }
385        }
386    
387       /* Not UTF-8, or malformed UTF-8  */
388    
389    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
390      else fprintf(outfile, "\\x%02x", c);      else fprintf(outfile, "\\x%02x", c);
391      }
392  }  }
393    
394    
# Line 244  compiled re. */ Line 398  compiled re. */
398    
399  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
400  {  {
401  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  gotten_store = size;
402    if (log_store)
403      fprintf(outfile, "Memory allocation (code space): %d\n",
404        (int)((int)size - offsetof(real_pcre, code[0])));
405  return malloc(size);  return malloc(size);
406  }  }
407    
408    
409    
410    
411    /* Get one piece of information from the pcre_fullinfo() function */
412    
413    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
414    {
415    int rc;
416    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
417      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
418    }
419    
420    
421    
422    
423  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
424  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
425  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 262  int study_options = 0; Line 432  int study_options = 0;
432  int op = 1;  int op = 1;
433  int timeit = 0;  int timeit = 0;
434  int showinfo = 0;  int showinfo = 0;
435    int showstore = 0;
436    int size_offsets = 45;
437    int size_offsets_max;
438    int *offsets;
439    #if !defined NOPOSIX
440  int posix = 0;  int posix = 0;
441    #endif
442  int debug = 0;  int debug = 0;
443    int done = 0;
444  unsigned char buffer[30000];  unsigned char buffer[30000];
445  unsigned char dbuffer[1024];  unsigned char dbuffer[1024];
446    
# Line 275  outfile = stdout; Line 452  outfile = stdout;
452    
453  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
454    {    {
455    if (strcmp(argv[op], "-s") == 0) log_store = 1;    char *endptr;
456    
457      if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
458        showstore = 1;
459    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
460    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
461    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
462      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
463          ((size_offsets = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
464        {
465        op++;
466        argc--;
467        }
468    #if !defined NOPOSIX
469    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
470    #endif
471    else    else
472      {      {
473      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
474        printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
475        printf("  -d     debug: show compiled code; implies -i\n"
476               "  -i     show information about compiled pattern\n"
477               "  -o <n> set size of offsets vector to <n>\n");
478    #if !defined NOPOSIX
479        printf("  -p     use POSIX interface\n");
480    #endif
481        printf("  -s     output store information\n"
482               "  -t     time compilation and execution\n");
483      return 1;      return 1;
484      }      }
485    op++;    op++;
486    argc--;    argc--;
487    }    }
488    
489    /* Get the store for the offsets vector, and remember what it was */
490    
491    size_offsets_max = size_offsets;
492    offsets = malloc(size_offsets_max * sizeof(int));
493    if (offsets == NULL)
494      {
495      printf("** Failed to get %d bytes of memory for offsets vector\n",
496        size_offsets_max * sizeof(int));
497      return 1;
498      }
499    
500  /* Sort out the input and output files */  /* Sort out the input and output files */
501    
502  if (argc > 1)  if (argc > 1)
# Line 315  if (argc > 2) Line 523  if (argc > 2)
523    
524  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
525    
526  /* Heading line, then prompt for first re if stdin */  /* Heading line, then prompt for first regex if stdin */
527    
 fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");  
528  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  fprintf(outfile, "PCRE version %s\n\n", pcre_version());
529    
530  /* Main loop */  /* Main loop */
531    
532  for (;;)  while (!done)
533    {    {
534    pcre *re = NULL;    pcre *re = NULL;
535    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
536    
537    #if !defined NOPOSIX  /* There are still compilers that require no indent */
538    regex_t preg;    regex_t preg;
   char *error;  
   unsigned char *p, *pp;  
   int do_study = 0;  
   int do_debug = 0;  
539    int do_posix = 0;    int do_posix = 0;
540    #endif
541    
542      const char *error;
543      unsigned char *p, *pp, *ppp;
544      const unsigned char *tables = NULL;
545      int do_study = 0;
546      int do_debug = debug;
547      int do_G = 0;
548      int do_g = 0;
549      int do_showinfo = showinfo;
550      int do_showrest = 0;
551      int utf8 = 0;
552    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
553    
554    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
555    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
556    if (infile != stdin) fprintf(outfile, (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
557    
558    p = buffer;    p = buffer;
559    while (isspace(*p)) p++;    while (isspace(*p)) p++;
# Line 347  for (;;) Line 564  for (;;)
564    
565    delimiter = *p++;    delimiter = *p++;
566    
567    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
568      {      {
569      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
570      goto SKIP_DATA;      goto SKIP_DATA;
571      }      }
572    
# Line 357  for (;;) Line 574  for (;;)
574    
575    for(;;)    for(;;)
576      {      {
577      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
578          {
579          if (*pp == '\\' && pp[1] != 0) pp++;
580            else if (*pp == delimiter) break;
581          pp++;
582          }
583      if (*pp != 0) break;      if (*pp != 0) break;
584    
585      len = sizeof(buffer) - (pp - buffer);      len = sizeof(buffer) - (pp - buffer);
# Line 371  for (;;) Line 593  for (;;)
593      if (fgets((char *)pp, len, infile) == NULL)      if (fgets((char *)pp, len, infile) == NULL)
594        {        {
595        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
596        goto END_OFF;        done = 1;
597          goto CONTINUE;
598        }        }
599      if (infile != stdin) fprintf(outfile, (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
600      }      }
601    
602      /* If the first character after the delimiter is backslash, make
603      the pattern end with backslash. This is purely to provide a way
604      of testing for the error message when a pattern ends with backslash. */
605    
606      if (pp[1] == '\\') *pp++ = '\\';
607    
608    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter */
609    
610    *pp++ = 0;    *pp++ = 0;
# Line 384  for (;;) Line 613  for (;;)
613    
614    options = 0;    options = 0;
615    study_options = 0;    study_options = 0;
616      log_store = showstore;  /* default from command line */
617    
618    while (*pp != 0)    while (*pp != 0)
619      {      {
620      switch (*pp++)      switch (*pp++)
621        {        {
622          case 'g': do_g = 1; break;
623        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
624        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
625        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
626        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
627    
628          case '+': do_showrest = 1; break;
629        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
630        case 'D': do_debug = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
631        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
632          case 'G': do_G = 1; break;
633          case 'I': do_showinfo = 1; break;
634          case 'M': log_store = 1; break;
635    
636    #if !defined NOPOSIX
637        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
638    #endif
639    
640        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
641        case 'I': study_options |= PCRE_CASELESS; break;        case 'U': options |= PCRE_UNGREEDY; break;
642        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
643          case '8': options |= PCRE_UTF8; utf8 = 1; break;
644    
645          case 'L':
646          ppp = pp;
647          while (*ppp != '\n' && *ppp != ' ') ppp++;
648          *ppp = 0;
649          if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
650            {
651            fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
652            goto SKIP_DATA;
653            }
654          tables = pcre_maketables();
655          pp = ppp;
656          break;
657    
658        case '\n': case ' ': break;        case '\n': case ' ': break;
659        default:        default:
660        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
# Line 406  for (;;) Line 662  for (;;)
662        }        }
663      }      }
664    
665    /* Handle compiing via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
666    timing, showing, or debugging options. */    timing, showing, or debugging options, nor the ability to pass over
667      local character tables. */
668    
669    #if !defined NOPOSIX
670    if (posix || do_posix)    if (posix || do_posix)
671      {      {
672      int rc;      int rc;
# Line 431  for (;;) Line 689  for (;;)
689    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
690    
691    else    else
692    #endif  /* !defined NOPOSIX */
693    
694      {      {
695      if (timeit)      if (timeit)
696        {        {
697        register int i;        register int i;
698        clock_t time_taken;        clock_t time_taken;
699        clock_t start_time = clock();        clock_t start_time = clock();
700        for (i = 0; i < 4000; i++)        for (i = 0; i < LOOPREPEAT; i++)
701          {          {
702          re = pcre_compile((char *)p, options, &error, &erroroffset);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
703          if (re != NULL) free(re);          if (re != NULL) free(re);
704          }          }
705        time_taken = clock() - start_time;        time_taken = clock() - start_time;
706        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
707          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          ((double)time_taken * 1000.0) /
708            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
709        }        }
710    
711      re = pcre_compile((char *)p, options, &error, &erroroffset);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
712    
713      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
714      if non-interactive. */      if non-interactive. */
# Line 461  for (;;) Line 722  for (;;)
722          for (;;)          for (;;)
723            {            {
724            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
725              goto END_OFF;              {
726                done = 1;
727                goto CONTINUE;
728                }
729            len = (int)strlen((char *)buffer);            len = (int)strlen((char *)buffer);
730            while (len > 0 && isspace(buffer[len-1])) len--;            while (len > 0 && isspace(buffer[len-1])) len--;
731            if (len == 0) break;            if (len == 0) break;
732            }            }
733          fprintf(outfile, "\n");          fprintf(outfile, "\n");
734          }          }
735        continue;        goto CONTINUE;
736        }        }
737    
738      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
739        info-returning functions. The old one has a limited interface and
740        returns only limited data. Check that it agrees with the newer one. */
741    
742      if (showinfo || do_debug)      if (do_showinfo)
743        {        {
744        int first_char, count;        unsigned long int get_options;
745          int old_first_char, old_options, old_count;
746        if (debug || do_debug) print_internals(re);        int count, backrefmax, first_char, need_char;
747          size_t size;
748    
749          if (do_debug) print_internals(re);
750    
751          new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
752          new_info(re, NULL, PCRE_INFO_SIZE, &size);
753          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
754          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
755          new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
756          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
757    
758        count = pcre_info(re, &options, &first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
759        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
760          "Error %d while reading info\n", count);          "Error %d from pcre_info()\n", count);
761        else        else
762          {          {
763          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (old_count != count) fprintf(outfile,
764          if (options == 0) fprintf(outfile, "No options\n");            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
765            else fprintf(outfile, "Options:%s%s%s%s%s%s%s\n",              old_count);
766              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
767              ((options & PCRE_CASELESS) != 0)? " caseless" : "",          if (old_first_char != first_char) fprintf(outfile,
768              ((options & PCRE_EXTENDED) != 0)? " extended" : "",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
769              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",              first_char, old_first_char);
770              ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
771              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",          if (old_options != (int)get_options) fprintf(outfile,
772              ((options & PCRE_EXTRA) != 0)? " extra" : "");            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
773          if (first_char == -1)              get_options, old_options);
774            {          }
775            fprintf(outfile, "First char at start or follows \\n\n");  
776            }        if (size != gotten_store) fprintf(outfile,
777          else if (first_char < 0)          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
778            {          size, gotten_store);
779            fprintf(outfile, "No first char\n");  
780            }        fprintf(outfile, "Capturing subpattern count = %d\n", count);
781          if (backrefmax > 0)
782            fprintf(outfile, "Max back reference = %d\n", backrefmax);
783          if (get_options == 0) fprintf(outfile, "No options\n");
784            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
785              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
786              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
787              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
788              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
789              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
790              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
791              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
792              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
793              ((get_options & PCRE_UTF8) != 0)? " utf8" : "");
794    
795          if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
796            fprintf(outfile, "Case state changes\n");
797    
798          if (first_char == -1)
799            {
800            fprintf(outfile, "First char at start or follows \\n\n");
801            }
802          else if (first_char < 0)
803            {
804            fprintf(outfile, "No first char\n");
805            }
806          else
807            {
808            if (isprint(first_char))
809              fprintf(outfile, "First char = \'%c\'\n", first_char);
810          else          else
811            {            fprintf(outfile, "First char = %d\n", first_char);
812            if (isprint(first_char))          }
813              fprintf(outfile, "First char = \'%c\'\n", first_char);  
814            else        if (need_char < 0)
815              fprintf(outfile, "First char = %d\n", first_char);          {
816            }          fprintf(outfile, "No need char\n");
817            }
818          else
819            {
820            if (isprint(need_char))
821              fprintf(outfile, "Need char = \'%c\'\n", need_char);
822            else
823              fprintf(outfile, "Need char = %d\n", need_char);
824          }          }
825        }        }
826    
# Line 522  for (;;) Line 834  for (;;)
834          register int i;          register int i;
835          clock_t time_taken;          clock_t time_taken;
836          clock_t start_time = clock();          clock_t start_time = clock();
837          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
838            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
839          time_taken = clock() - start_time;          time_taken = clock() - start_time;
840          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
841          fprintf(outfile, "  Study time %.2f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
842            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            ((double)time_taken * 1000.0)/
843              ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
844          }          }
845    
846        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 536  for (;;) Line 849  for (;;)
849        else if (extra == NULL)        else if (extra == NULL)
850          fprintf(outfile, "Study returned NULL\n");          fprintf(outfile, "Study returned NULL\n");
851    
852        /* This looks at internal information. A bit kludgy to do it this        else if (do_showinfo)
       way, but it is useful for testing. */  
   
       else if (showinfo || do_debug)  
853          {          {
854          real_pcre_extra *xx = (real_pcre_extra *)extra;          uschar *start_bits = NULL;
855          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
856            if (start_bits == NULL)
857            fprintf(outfile, "No starting character set\n");            fprintf(outfile, "No starting character set\n");
858          else          else
859            {            {
# Line 551  for (;;) Line 862  for (;;)
862            fprintf(outfile, "Starting character set: ");            fprintf(outfile, "Starting character set: ");
863            for (i = 0; i < 256; i++)            for (i = 0; i < 256; i++)
864              {              {
865              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if ((start_bits[i/8] & (1<<(i%8))) != 0)
866                {                {
867                if (c > 75)                if (c > 75)
868                  {                  {
# Line 580  for (;;) Line 891  for (;;)
891    
892    for (;;)    for (;;)
893      {      {
894      unsigned char *pp;      unsigned char *q;
895        unsigned char *bptr = dbuffer;
896        int use_size_offsets = size_offsets;
897      int count, c;      int count, c;
898      int offsets[30];      int copystrings = 0;
899      int size_offsets = sizeof(offsets)/sizeof(int);      int getstrings = 0;
900        int getlist = 0;
901        int gmatched = 0;
902        int start_offset = 0;
903        int g_notempty = 0;
904    
905      options = 0;      options = 0;
906    
907      if (infile == stdin) printf("  data> ");      if (infile == stdin) printf("data> ");
908      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) goto END_OFF;      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
909      if (infile != stdin) fprintf(outfile, (char *)buffer);        {
910          done = 1;
911          goto CONTINUE;
912          }
913        if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
914    
915      len = (int)strlen((char *)buffer);      len = (int)strlen((char *)buffer);
916      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
# Line 599  for (;;) Line 920  for (;;)
920      p = buffer;      p = buffer;
921      while (isspace(*p)) p++;      while (isspace(*p)) p++;
922    
923      pp = dbuffer;      q = dbuffer;
924      while ((c = *p++) != 0)      while ((c = *p++) != 0)
925        {        {
926        int i = 0;        int i = 0;
# Line 623  for (;;) Line 944  for (;;)
944          break;          break;
945    
946          case 'x':          case 'x':
947    
948            /* Handle \x{..} specially - new Perl thing for utf8 */
949    
950            if (*p == '{')
951              {
952              unsigned char *pt = p;
953              c = 0;
954              while (isxdigit(*(++pt)))
955                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
956              if (*pt == '}')
957                {
958                unsigned char buffer[8];
959                int ii, utn;
960                utn = ord2utf8(c, buffer);
961                for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];
962                c = buffer[ii];   /* Last byte */
963                p = pt + 1;
964                break;
965                }
966              /* Not correct form; fall through */
967              }
968    
969            /* Ordinary \x */
970    
971          c = 0;          c = 0;
972          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
973            {            {
# Line 643  for (;;) Line 988  for (;;)
988          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
989          continue;          continue;
990    
991          case 'E':          case 'C':
992          options |= PCRE_DOLLAR_ENDONLY;          while(isdigit(*p)) n = n * 10 + *p++ - '0';
993            copystrings |= 1 << n;
994          continue;          continue;
995    
996          case 'I':          case 'G':
997          options |= PCRE_CASELESS;          while(isdigit(*p)) n = n * 10 + *p++ - '0';
998            getstrings |= 1 << n;
999          continue;          continue;
1000    
1001          case 'M':          case 'L':
1002          options |= PCRE_MULTILINE;          getlist = 1;
1003          continue;          continue;
1004    
1005          case 'S':          case 'N':
1006          options |= PCRE_DOTALL;          options |= PCRE_NOTEMPTY;
1007          continue;          continue;
1008    
1009          case 'O':          case 'O':
1010          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1011          if (n <= sizeof(offsets)/sizeof(int)) size_offsets = n;          if (n > size_offsets_max)
1012              {
1013    
1014    if (offsets != NULL)
1015    
1016              free(offsets);
1017              size_offsets_max = n;
1018              offsets = malloc(size_offsets_max * sizeof(int));
1019              if (offsets == NULL)
1020                {
1021                printf("** Failed to get %d bytes of memory for offsets vector\n",
1022                  size_offsets_max * sizeof(int));
1023                return 1;
1024                }
1025              }
1026            use_size_offsets = n;
1027    
1028    if (n == 0)
1029      {
1030      free(offsets);
1031      offsets = NULL;
1032      size_offsets_max = 0;
1033      }
1034    
1035          continue;          continue;
1036    
1037          case 'Z':          case 'Z':
1038          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1039          continue;          continue;
1040          }          }
1041        *pp++ = c;        *q++ = c;
1042        }        }
1043      *pp = 0;      *q = 0;
1044      len = pp - dbuffer;      len = q - dbuffer;
1045    
1046      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1047      support timing. */      support timing. */
1048    
1049    #if !defined NOPOSIX
1050      if (posix || do_posix)      if (posix || do_posix)
1051        {        {
1052        int rc;        int rc;
1053        int eflags = 0;        int eflags = 0;
1054        regmatch_t pmatch[30];        regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
1055        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1056        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1057    
1058        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         pmatch, eflags);  
1059    
1060        if (rc != 0)        if (rc != 0)
1061          {          {
# Line 694  for (;;) Line 1064  for (;;)
1064          }          }
1065        else        else
1066          {          {
1067          int i;          size_t i;
1068          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < use_size_offsets; i++)
1069            {            {
1070            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1071              {              {
1072              fprintf(outfile, "%2d: ", i);              fprintf(outfile, "%2d: ", (int)i);
1073              pchars(dbuffer + pmatch[i].rm_so,              pchars(dbuffer + pmatch[i].rm_so,
1074                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, utf8);
1075              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1076                if (i == 0 && do_showrest)
1077                  {
1078                  fprintf(outfile, " 0+ ");
1079                  pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);
1080                  fprintf(outfile, "\n");
1081                  }
1082              }              }
1083            }            }
1084          }          }
1085          free(pmatch);
1086        }        }
1087    
1088      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
1089    
1090      else      else
1091    #endif  /* !defined NOPOSIX */
1092    
1093        for (;; gmatched++)    /* Loop for /g or /G */
1094        {        {
1095        if (timeit)        if (timeit)
1096          {          {
1097          register int i;          register int i;
1098          clock_t time_taken;          clock_t time_taken;
1099          clock_t start_time = clock();          clock_t start_time = clock();
1100          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
1101            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,            count = pcre_exec(re, extra, (char *)bptr, len,
1102              size_offsets);              start_offset, options | g_notempty, offsets, use_size_offsets);
1103          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1104          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1105            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            ((double)time_taken * 1000.0)/
1106              ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
1107          }          }
1108    
1109        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        count = pcre_exec(re, extra, (char *)bptr, len,
1110          size_offsets);          start_offset, options | g_notempty, offsets, use_size_offsets);
1111    
1112        if (count == 0)        if (count == 0)
1113          {          {
1114          fprintf(outfile, "Matched, but too many substrings\n");          fprintf(outfile, "Matched, but too many substrings\n");
1115          count = size_offsets/2;          count = use_size_offsets/3;
1116          }          }
1117    
1118          /* Matched */
1119    
1120        if (count >= 0)        if (count >= 0)
1121          {          {
1122          int i;          int i;
1123          count *= 2;          for (i = 0; i < count * 2; i += 2)
         for (i = 0; i < count; i += 2)  
1124            {            {
1125            if (offsets[i] < 0)            if (offsets[i] < 0)
1126              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1127            else            else
1128              {              {
1129              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1130              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              pchars(bptr + offsets[i], offsets[i+1] - offsets[i], utf8);
1131              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1132                if (i == 0)
1133                  {
1134                  if (do_showrest)
1135                    {
1136                    fprintf(outfile, " 0+ ");
1137                    pchars(bptr + offsets[i+1], len - offsets[i+1], utf8);
1138                    fprintf(outfile, "\n");
1139                    }
1140                  }
1141                }
1142              }
1143    
1144            for (i = 0; i < 32; i++)
1145              {
1146              if ((copystrings & (1 << i)) != 0)
1147                {
1148                char copybuffer[16];
1149                int rc = pcre_copy_substring((char *)bptr, offsets, count,
1150                  i, copybuffer, sizeof(copybuffer));
1151                if (rc < 0)
1152                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1153                else
1154                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1155                }
1156              }
1157    
1158            for (i = 0; i < 32; i++)
1159              {
1160              if ((getstrings & (1 << i)) != 0)
1161                {
1162                const char *substring;
1163                int rc = pcre_get_substring((char *)bptr, offsets, count,
1164                  i, &substring);
1165                if (rc < 0)
1166                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
1167                else
1168                  {
1169                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1170                  /* free((void *)substring); */
1171                  pcre_free_substring(substring);
1172                  }
1173                }
1174              }
1175    
1176            if (getlist)
1177              {
1178              const char **stringlist;
1179              int rc = pcre_get_substring_list((char *)bptr, offsets, count,
1180                &stringlist);
1181              if (rc < 0)
1182                fprintf(outfile, "get substring list failed %d\n", rc);
1183              else
1184                {
1185                for (i = 0; i < count; i++)
1186                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1187                if (stringlist[i] != NULL)
1188                  fprintf(outfile, "string list not terminated by NULL\n");
1189                /* free((void *)stringlist); */
1190                pcre_free_substring_list(stringlist);
1191                }
1192              }
1193            }
1194    
1195          /* Failed to match. If this is a /g or /G loop and we previously set
1196          g_notempty after a null match, this is not necessarily the end.
1197          We want to advance the start offset, and continue. Fudge the offset
1198          values to achieve this. We won't be at the end of the string - that
1199          was checked before setting g_notempty. */
1200    
1201          else
1202            {
1203            if (g_notempty != 0)
1204              {
1205              offsets[0] = start_offset;
1206              offsets[1] = start_offset + 1;
1207              }
1208            else
1209              {
1210              if (gmatched == 0)   /* Error if no previous matches */
1211                {
1212                if (count == -1) fprintf(outfile, "No match\n");
1213                  else fprintf(outfile, "Error %d\n", count);
1214              }              }
1215              break;  /* Out of the /g loop */
1216            }            }
1217          }          }
1218    
1219          /* If not /g or /G we are done */
1220    
1221          if (!do_g && !do_G) break;
1222    
1223          /* If we have matched an empty string, first check to see if we are at
1224          the end of the subject. If so, the /g loop is over. Otherwise, mimic
1225          what Perl's /g options does. This turns out to be rather cunning. First
1226          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1227          same point. If this fails (picked up above) we advance to the next
1228          character. */
1229    
1230          g_notempty = 0;
1231          if (offsets[0] == offsets[1])
1232            {
1233            if (offsets[0] == len) break;
1234            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1235            }
1236    
1237          /* For /g, update the start offset, leaving the rest alone */
1238    
1239          if (do_g) start_offset = offsets[1];
1240    
1241          /* For /G, update the pointer and length */
1242    
1243        else        else
1244          {          {
1245          if (count == -1) fprintf(outfile, "No match\n");          bptr += offsets[1];
1246            else fprintf(outfile, "Error %d\n", count);          len -= offsets[1];
1247          }          }
1248        }        }  /* End of loop for /g and /G */
1249      }      }    /* End of loop for data lines */
1250    
1251      CONTINUE:
1252    
1253    #if !defined NOPOSIX
1254    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1255    #endif
1256    
1257    if (re != NULL) free(re);    if (re != NULL) free(re);
1258    if (extra != NULL) free(extra);    if (extra != NULL) free(extra);
1259      if (tables != NULL)
1260        {
1261        free((void *)tables);
1262        setlocale(LC_CTYPE, "C");
1263        }
1264    }    }
1265    
 END_OFF:  
1266  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1267  return 0;  return 0;
1268  }  }

Legend:
Removed from v.3  
changed lines
  Added in v.55

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12