/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 7 by nigel, Sat Feb 24 21:38:09 2007 UTC revision 49 by nigel, Sat Feb 24 21:39:33 2007 UTC
# Line 7  Line 7 
7  #include <string.h>  #include <string.h>
8  #include <stdlib.h>  #include <stdlib.h>
9  #include <time.h>  #include <time.h>
10    #include <locale.h>
11    
12  /* Use the internal info for displaying the results of pcre_study(). */  /* Use the internal info for displaying the results of pcre_study(). */
13    
14  #include "internal.h"  #include "internal.h"
15    
16    /* It is possible to compile this test program without including support for
17    testing the POSIX interface, though this is not available via the standard
18    Makefile. */
19    
20    #if !defined NOPOSIX
21  #include "pcreposix.h"  #include "pcreposix.h"
22    #endif
23    
24  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
25  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 21  Line 29 
29  #endif  #endif
30  #endif  #endif
31    
32    #define LOOPREPEAT 20000
33    
34    
35  static FILE *outfile;  static FILE *outfile;
36  static int log_store = 0;  static int log_store = 0;
37    static size_t gotten_store;
38    
39    
40    
41    static int utf8_table1[] = {
42      0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
43    
44    static int utf8_table2[] = {
45      0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
46    
47    static int utf8_table3[] = {
48      0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
49    
50    
51    /*************************************************
52    *       Convert character value to UTF-8         *
53    *************************************************/
54    
55    /* This function takes an integer value in the range 0 - 0x7fffffff
56    and encodes it as a UTF-8 character in 0 to 6 bytes.
57    
58    Arguments:
59      cvalue     the character value
60      buffer     pointer to buffer for result - at least 6 bytes long
61    
62    Returns:     number of characters placed in the buffer
63                 -1 if input character is negative
64                 0 if input character is positive but too big (only when
65                 int is longer than 32 bits)
66    */
67    
68    static int
69    ord2utf8(int cvalue, unsigned char *buffer)
70    {
71    register int i, j;
72    for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
73      if (cvalue <= utf8_table1[i]) break;
74    if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
75    if (cvalue < 0) return -1;
76    *buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);
77    cvalue >>= 6 - i;
78    for (j = 0; j < i; j++)
79      {
80      *buffer++ = 0x80 | (cvalue & 0x3f);
81      cvalue >>= 6;
82      }
83    return i + 1;
84    }
85    
86    
87    /*************************************************
88    *            Convert UTF-8 string to value       *
89    *************************************************/
90    
91    /* This function takes one or more bytes that represents a UTF-8 character,
92    and returns the value of the character.
93    
94    Argument:
95      buffer   a pointer to the byte vector
96      vptr     a pointer to an int to receive the value
97    
98    Returns:   >  0 => the number of bytes consumed
99               -6 to 0 => malformed UTF-8 character at offset = (-return)
100    */
101    
102    int
103    utf82ord(unsigned char *buffer, int *vptr)
104    {
105    int c = *buffer++;
106    int d = c;
107    int i, j, s;
108    
109    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
110      {
111      if ((d & 0x80) == 0) break;
112      d <<= 1;
113      }
114    
115    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
116    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
117    
118    /* i now has a value in the range 1-5 */
119    
120    d = c & utf8_table3[i];
121    s = 6 - i;
122    
123    for (j = 0; j < i; j++)
124      {
125      c = *buffer++;
126      if ((c & 0xc0) != 0x80) return -(j+1);
127      d |= (c & 0x3f) << s;
128      s += 6;
129      }
130    
131    /* Check that encoding was the correct unique one */
132    
133    for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
134      if (d <= utf8_table1[j]) break;
135    if (j != i) return -(i+1);
136    
137    /* Valid value */
138    
139    *vptr = d;
140    return i+1;
141    }
142    
143    
144    
145    
146    
147    
# Line 32  code as contained in pcre.c under the DE Line 150  code as contained in pcre.c under the DE
150    
151  static const char *OP_names[] = {  static const char *OP_names[] = {
152    "End", "\\A", "\\B", "\\b", "\\D", "\\d",    "End", "\\A", "\\B", "\\b", "\\D", "\\d",
153    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
154    "not",    "Opt", "^", "$", "Any", "chars", "not",
155    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
156    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
157    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
158    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
159    "class", "Ref",    "class", "Ref", "Recurse",
160    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
161      "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
162    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
163  };  };
164    
# Line 48  static void print_internals(pcre *re) Line 167  static void print_internals(pcre *re)
167  {  {
168  unsigned char *code = ((real_pcre *)re)->code;  unsigned char *code = ((real_pcre *)re)->code;
169    
170  printf("------------------------------------------------------------------\n");  fprintf(outfile, "------------------------------------------------------------------\n");
171    
172  for(;;)  for(;;)
173    {    {
174    int c;    int c;
175    int charlength;    int charlength;
176    
177    printf("%3d ", code - ((real_pcre *)re)->code);    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));
178    
179    if (*code >= OP_BRA)    if (*code >= OP_BRA)
180      {      {
181      printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);      fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
182      code += 2;      code += 2;
183      }      }
184    
185    else switch(*code)    else switch(*code)
186      {      {
187      case OP_END:      case OP_END:
188      printf("    %s\n", OP_names[*code]);      fprintf(outfile, "    %s\n", OP_names[*code]);
189      printf("------------------------------------------------------------------\n");      fprintf(outfile, "------------------------------------------------------------------\n");
190      return;      return;
191    
192        case OP_OPT:
193        fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);
194        code++;
195        break;
196    
197        case OP_COND:
198        fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);
199        code += 2;
200        break;
201    
202        case OP_CREF:
203        fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);
204        code++;
205        break;
206    
207      case OP_CHARS:      case OP_CHARS:
208      charlength = *(++code);      charlength = *(++code);
209      printf("%3d ", charlength);      fprintf(outfile, "%3d ", charlength);
210      while (charlength-- > 0)      while (charlength-- > 0)
211        if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);        if (isprint(c = *(++code))) fprintf(outfile, "%c", c);
212            else fprintf(outfile, "\\x%02x", c);
213      break;      break;
214    
215      case OP_KETRMAX:      case OP_KETRMAX:
# Line 83  for(;;) Line 218  for(;;)
218      case OP_KET:      case OP_KET:
219      case OP_ASSERT:      case OP_ASSERT:
220      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
221        case OP_ASSERTBACK:
222        case OP_ASSERTBACK_NOT:
223      case OP_ONCE:      case OP_ONCE:
224      printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);      fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
225        code += 2;
226        break;
227    
228        case OP_REVERSE:
229        fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
230      code += 2;      code += 2;
231      break;      break;
232    
# Line 101  for(;;) Line 243  for(;;)
243      case OP_TYPEQUERY:      case OP_TYPEQUERY:
244      case OP_TYPEMINQUERY:      case OP_TYPEMINQUERY:
245      if (*code >= OP_TYPESTAR)      if (*code >= OP_TYPESTAR)
246        printf("    %s", OP_names[code[1]]);        fprintf(outfile, "    %s", OP_names[code[1]]);
247      else if (isprint(c = code[1])) printf("    %c", c);      else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);
248        else printf("    \\x%02x", c);        else fprintf(outfile, "    \\x%02x", c);
249      printf("%s", OP_names[*code++]);      fprintf(outfile, "%s", OP_names[*code++]);
250      break;      break;
251    
252      case OP_EXACT:      case OP_EXACT:
253      case OP_UPTO:      case OP_UPTO:
254      case OP_MINUPTO:      case OP_MINUPTO:
255      if (isprint(c = code[3])) printf("    %c{", c);      if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);
256        else printf("    \\x%02x{", c);        else fprintf(outfile, "    \\x%02x{", c);
257      if (*code != OP_EXACT) printf(",");      if (*code != OP_EXACT) fprintf(outfile, ",");
258      printf("%d}", (code[1] << 8) + code[2]);      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
259      if (*code == OP_MINUPTO) printf("?");      if (*code == OP_MINUPTO) fprintf(outfile, "?");
260      code += 3;      code += 3;
261      break;      break;
262    
263      case OP_TYPEEXACT:      case OP_TYPEEXACT:
264      case OP_TYPEUPTO:      case OP_TYPEUPTO:
265      case OP_TYPEMINUPTO:      case OP_TYPEMINUPTO:
266      printf("    %s{", OP_names[code[3]]);      fprintf(outfile, "    %s{", OP_names[code[3]]);
267      if (*code != OP_TYPEEXACT) printf(",");      if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");
268      printf("%d}", (code[1] << 8) + code[2]);      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
269      if (*code == OP_TYPEMINUPTO) printf("?");      if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");
270      code += 3;      code += 3;
271      break;      break;
272    
273      case OP_NOT:      case OP_NOT:
274      if (isprint(c = *(++code))) printf("    [^%c]", c);      if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);
275        else printf("    [^\\x%02x]", c);        else fprintf(outfile, "    [^\\x%02x]", c);
276      break;      break;
277    
278      case OP_NOTSTAR:      case OP_NOTSTAR:
# Line 139  for(;;) Line 281  for(;;)
281      case OP_NOTMINPLUS:      case OP_NOTMINPLUS:
282      case OP_NOTQUERY:      case OP_NOTQUERY:
283      case OP_NOTMINQUERY:      case OP_NOTMINQUERY:
284      if (isprint(c = code[1])) printf("    [^%c]", c);      if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);
285        else printf("    [^\\x%02x]", c);        else fprintf(outfile, "    [^\\x%02x]", c);
286      printf("%s", OP_names[*code++]);      fprintf(outfile, "%s", OP_names[*code++]);
287      break;      break;
288    
289      case OP_NOTEXACT:      case OP_NOTEXACT:
290      case OP_NOTUPTO:      case OP_NOTUPTO:
291      case OP_NOTMINUPTO:      case OP_NOTMINUPTO:
292      if (isprint(c = code[3])) printf("    [^%c]{", c);      if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);
293        else printf("    [^\\x%02x]{", c);        else fprintf(outfile, "    [^\\x%02x]{", c);
294      if (*code != OP_NOTEXACT) printf(",");      if (*code != OP_NOTEXACT) fprintf(outfile, ",");
295      printf("%d}", (code[1] << 8) + code[2]);      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
296      if (*code == OP_NOTMINUPTO) printf("?");      if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");
297      code += 3;      code += 3;
298      break;      break;
299    
300      case OP_REF:      case OP_REF:
301      printf("    \\%d", *(++code));      fprintf(outfile, "    \\%d", *(++code));
302      break;      code++;
303        goto CLASS_REF_REPEAT;
304    
305      case OP_CLASS:      case OP_CLASS:
306        {        {
307        int i, min, max;        int i, min, max;
   
308        code++;        code++;
309        printf("    [");        fprintf(outfile, "    [");
310    
311        for (i = 0; i < 256; i++)        for (i = 0; i < 256; i++)
312          {          {
# Line 173  for(;;) Line 315  for(;;)
315            int j;            int j;
316            for (j = i+1; j < 256; j++)            for (j = i+1; j < 256; j++)
317              if ((code[j/8] & (1 << (j&7))) == 0) break;              if ((code[j/8] & (1 << (j&7))) == 0) break;
318            if (i == '-' || i == ']') printf("\\");            if (i == '-' || i == ']') fprintf(outfile, "\\");
319            if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);            if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);
320            if (--j > i)            if (--j > i)
321              {              {
322              printf("-");              fprintf(outfile, "-");
323              if (j == '-' || j == ']') printf("\\");              if (j == '-' || j == ']') fprintf(outfile, "\\");
324              if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);              if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);
325              }              }
326            i = j;            i = j;
327            }            }
328          }          }
329        printf("]");        fprintf(outfile, "]");
330        code += 32;        code += 32;
331    
332          CLASS_REF_REPEAT:
333    
334        switch(*code)        switch(*code)
335          {          {
336          case OP_CRSTAR:          case OP_CRSTAR:
# Line 195  for(;;) Line 339  for(;;)
339          case OP_CRMINPLUS:          case OP_CRMINPLUS:
340          case OP_CRQUERY:          case OP_CRQUERY:
341          case OP_CRMINQUERY:          case OP_CRMINQUERY:
342          printf("%s", OP_names[*code]);          fprintf(outfile, "%s", OP_names[*code]);
343          break;          break;
344    
345          case OP_CRRANGE:          case OP_CRRANGE:
346          case OP_CRMINRANGE:          case OP_CRMINRANGE:
347          min = (code[1] << 8) + code[2];          min = (code[1] << 8) + code[2];
348          max = (code[3] << 8) + code[4];          max = (code[3] << 8) + code[4];
349          if (max == 0) printf("{%d,}", min);          if (max == 0) fprintf(outfile, "{%d,}", min);
350          else printf("{%d,%d}", min, max);          else fprintf(outfile, "{%d,%d}", min, max);
351          if (*code == OP_CRMINRANGE) printf("?");          if (*code == OP_CRMINRANGE) fprintf(outfile, "?");
352          code += 4;          code += 4;
353          break;          break;
354    
# Line 217  for(;;) Line 361  for(;;)
361      /* Anything else is just a one-node item */      /* Anything else is just a one-node item */
362    
363      default:      default:
364      printf("    %s", OP_names[*code]);      fprintf(outfile, "    %s", OP_names[*code]);
365      break;      break;
366      }      }
367    
368    code++;    code++;
369    printf("\n");    fprintf(outfile, "\n");
370    }    }
371  }  }
372    
373    
374    
375  /* Character string printing function. */  /* Character string printing function. A "normal" and a UTF-8 version. */
376    
377  static void pchars(unsigned char *p, int length)  static void pchars(unsigned char *p, int length, int utf8)
378  {  {
379  int c;  int c;
380  while (length-- > 0)  while (length-- > 0)
381      {
382      if (utf8)
383        {
384        int rc = utf82ord(p, &c);
385        if (rc > 0)
386          {
387          length -= rc - 1;
388          p += rc;
389          if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);
390            else fprintf(outfile, "\\x{%02x}", c);
391          continue;
392          }
393        }
394    
395       /* Not UTF-8, or malformed UTF-8  */
396    
397    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
398      else fprintf(outfile, "\\x%02x", c);      else fprintf(outfile, "\\x%02x", c);
399      }
400  }  }
401    
402    
# Line 245  compiled re. */ Line 406  compiled re. */
406    
407  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
408  {  {
409  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  gotten_store = size;
410    if (log_store)
411      fprintf(outfile, "Memory allocation (code space): %d\n",
412        (int)((int)size - offsetof(real_pcre, code[0])));
413  return malloc(size);  return malloc(size);
414  }  }
415    
416    
417    
418    
419    /* Get one piece of information from the pcre_fullinfo() function */
420    
421    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
422    {
423    int rc;
424    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
425      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
426    }
427    
428    
429    
430    
431  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
432  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
433  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 263  int study_options = 0; Line 440  int study_options = 0;
440  int op = 1;  int op = 1;
441  int timeit = 0;  int timeit = 0;
442  int showinfo = 0;  int showinfo = 0;
443    int showstore = 0;
444  int posix = 0;  int posix = 0;
445  int debug = 0;  int debug = 0;
446    int done = 0;
447  unsigned char buffer[30000];  unsigned char buffer[30000];
448  unsigned char dbuffer[1024];  unsigned char dbuffer[1024];
449    
# Line 276  outfile = stdout; Line 455  outfile = stdout;
455    
456  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
457    {    {
458    if (strcmp(argv[op], "-s") == 0) log_store = 1;    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
459        showstore = 1;
460    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
461    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
462    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
# Line 284  while (argc > 1 && argv[op][0] == '-') Line 464  while (argc > 1 && argv[op][0] == '-')
464    else    else
465      {      {
466      printf("*** Unknown option %s\n", argv[op]);      printf("*** Unknown option %s\n", argv[op]);
467        printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");
468        printf("  -d   debug: show compiled code; implies -i\n"
469               "  -i   show information about compiled pattern\n"
470               "  -p   use POSIX interface\n"
471               "  -s   output store information\n"
472               "  -t   time compilation and execution\n");
473      return 1;      return 1;
474      }      }
475    op++;    op++;
# Line 316  if (argc > 2) Line 502  if (argc > 2)
502    
503  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
504    
505  /* Heading line, then prompt for first re if stdin */  /* Heading line, then prompt for first regex if stdin */
506    
 fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");  
507  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  fprintf(outfile, "PCRE version %s\n\n", pcre_version());
508    
509  /* Main loop */  /* Main loop */
510    
511  for (;;)  while (!done)
512    {    {
513    pcre *re = NULL;    pcre *re = NULL;
514    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
515    
516    #if !defined NOPOSIX  /* There are still compilers that require no indent */
517    regex_t preg;    regex_t preg;
518      int do_posix = 0;
519    #endif
520    
521    const char *error;    const char *error;
522    unsigned char *p, *pp;    unsigned char *p, *pp, *ppp;
523      unsigned const char *tables = NULL;
524    int do_study = 0;    int do_study = 0;
525    int do_debug = 0;    int do_debug = debug;
526    int do_posix = 0;    int do_G = 0;
527      int do_g = 0;
528      int do_showinfo = showinfo;
529      int do_showrest = 0;
530      int utf8 = 0;
531    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
532    
533    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
534    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
535    if (infile != stdin) fprintf(outfile, (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
536    
537    p = buffer;    p = buffer;
538    while (isspace(*p)) p++;    while (isspace(*p)) p++;
# Line 348  for (;;) Line 543  for (;;)
543    
544    delimiter = *p++;    delimiter = *p++;
545    
546    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
547      {      {
548      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
549      goto SKIP_DATA;      goto SKIP_DATA;
550      }      }
551    
# Line 358  for (;;) Line 553  for (;;)
553    
554    for(;;)    for(;;)
555      {      {
556      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
557          {
558          if (*pp == '\\' && pp[1] != 0) pp++;
559            else if (*pp == delimiter) break;
560          pp++;
561          }
562      if (*pp != 0) break;      if (*pp != 0) break;
563    
564      len = sizeof(buffer) - (pp - buffer);      len = sizeof(buffer) - (pp - buffer);
# Line 372  for (;;) Line 572  for (;;)
572      if (fgets((char *)pp, len, infile) == NULL)      if (fgets((char *)pp, len, infile) == NULL)
573        {        {
574        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
575        goto END_OFF;        done = 1;
576          goto CONTINUE;
577        }        }
578      if (infile != stdin) fprintf(outfile, (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
579      }      }
580    
581      /* If the first character after the delimiter is backslash, make
582      the pattern end with backslash. This is purely to provide a way
583      of testing for the error message when a pattern ends with backslash. */
584    
585      if (pp[1] == '\\') *pp++ = '\\';
586    
587    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter */
588    
589    *pp++ = 0;    *pp++ = 0;
# Line 385  for (;;) Line 592  for (;;)
592    
593    options = 0;    options = 0;
594    study_options = 0;    study_options = 0;
595      log_store = showstore;  /* default from command line */
596    
597    while (*pp != 0)    while (*pp != 0)
598      {      {
599      switch (*pp++)      switch (*pp++)
600        {        {
601          case 'g': do_g = 1; break;
602        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
603        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
604        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
605        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
606    
607          case '+': do_showrest = 1; break;
608        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
609        case 'D': do_debug = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
610        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
611          case 'G': do_G = 1; break;
612          case 'I': do_showinfo = 1; break;
613          case 'M': log_store = 1; break;
614    
615    #if !defined NOPOSIX
616        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
617    #endif
618    
619        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
620        case 'I': study_options |= PCRE_CASELESS; break;        case 'U': options |= PCRE_UNGREEDY; break;
621        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
622          case '8': options |= PCRE_UTF8; utf8 = 1; break;
623    
624          case 'L':
625          ppp = pp;
626          while (*ppp != '\n' && *ppp != ' ') ppp++;
627          *ppp = 0;
628          if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
629            {
630            fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
631            goto SKIP_DATA;
632            }
633          tables = pcre_maketables();
634          pp = ppp;
635          break;
636    
637        case '\n': case ' ': break;        case '\n': case ' ': break;
638        default:        default:
639        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
# Line 407  for (;;) Line 641  for (;;)
641        }        }
642      }      }
643    
644    /* Handle compiing via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
645    timing, showing, or debugging options. */    timing, showing, or debugging options, nor the ability to pass over
646      local character tables. */
647    
648    #if !defined NOPOSIX
649    if (posix || do_posix)    if (posix || do_posix)
650      {      {
651      int rc;      int rc;
# Line 432  for (;;) Line 668  for (;;)
668    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
669    
670    else    else
671    #endif  /* !defined NOPOSIX */
672    
673      {      {
674      if (timeit)      if (timeit)
675        {        {
676        register int i;        register int i;
677        clock_t time_taken;        clock_t time_taken;
678        clock_t start_time = clock();        clock_t start_time = clock();
679        for (i = 0; i < 4000; i++)        for (i = 0; i < LOOPREPEAT; i++)
680          {          {
681          re = pcre_compile((char *)p, options, &error, &erroroffset);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
682          if (re != NULL) free(re);          if (re != NULL) free(re);
683          }          }
684        time_taken = clock() - start_time;        time_taken = clock() - start_time;
685        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
686          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          ((double)time_taken * 1000.0) /
687            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
688        }        }
689    
690      re = pcre_compile((char *)p, options, &error, &erroroffset);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
691    
692      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
693      if non-interactive. */      if non-interactive. */
# Line 462  for (;;) Line 701  for (;;)
701          for (;;)          for (;;)
702            {            {
703            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
704              goto END_OFF;              {
705                done = 1;
706                goto CONTINUE;
707                }
708            len = (int)strlen((char *)buffer);            len = (int)strlen((char *)buffer);
709            while (len > 0 && isspace(buffer[len-1])) len--;            while (len > 0 && isspace(buffer[len-1])) len--;
710            if (len == 0) break;            if (len == 0) break;
711            }            }
712          fprintf(outfile, "\n");          fprintf(outfile, "\n");
713          }          }
714        continue;        goto CONTINUE;
715        }        }
716    
717      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
718        info-returning functions. The old one has a limited interface and
719        returns only limited data. Check that it agrees with the newer one. */
720    
721      if (showinfo || do_debug)      if (do_showinfo)
722        {        {
723        int first_char, count;        int old_first_char, old_options, old_count;
724          int count, backrefmax, first_char, need_char;
725        if (debug || do_debug) print_internals(re);        size_t size;
726    
727          if (do_debug) print_internals(re);
728    
729          new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
730          new_info(re, NULL, PCRE_INFO_SIZE, &size);
731          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
732          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
733          new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
734          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
735    
736        count = pcre_info(re, &options, &first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
737        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
738          "Error %d while reading info\n", count);          "Error %d from pcre_info()\n", count);
739        else        else
740          {          {
741          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (old_count != count) fprintf(outfile,
742          if (options == 0) fprintf(outfile, "No options\n");            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
743            else fprintf(outfile, "Options:%s%s%s%s%s%s%s\n",              old_count);
744              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
745              ((options & PCRE_CASELESS) != 0)? " caseless" : "",          if (old_first_char != first_char) fprintf(outfile,
746              ((options & PCRE_EXTENDED) != 0)? " extended" : "",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
747              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",              first_char, old_first_char);
748              ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
749              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",          if (old_options != options) fprintf(outfile,
750              ((options & PCRE_EXTRA) != 0)? " extra" : "");            "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,
751          if (first_char == -1)              old_options);
752            {          }
753            fprintf(outfile, "First char at start or follows \\n\n");  
754            }        if (size != gotten_store) fprintf(outfile,
755          else if (first_char < 0)          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
756            {          size, gotten_store);
757            fprintf(outfile, "No first char\n");  
758            }        fprintf(outfile, "Capturing subpattern count = %d\n", count);
759          if (backrefmax > 0)
760            fprintf(outfile, "Max back reference = %d\n", backrefmax);
761          if (options == 0) fprintf(outfile, "No options\n");
762            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
763              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
764              ((options & PCRE_CASELESS) != 0)? " caseless" : "",
765              ((options & PCRE_EXTENDED) != 0)? " extended" : "",
766              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
767              ((options & PCRE_DOTALL) != 0)? " dotall" : "",
768              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
769              ((options & PCRE_EXTRA) != 0)? " extra" : "",
770              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
771              ((options & PCRE_UTF8) != 0)? " utf8" : "");
772    
773          if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
774            fprintf(outfile, "Case state changes\n");
775    
776          if (first_char == -1)
777            {
778            fprintf(outfile, "First char at start or follows \\n\n");
779            }
780          else if (first_char < 0)
781            {
782            fprintf(outfile, "No first char\n");
783            }
784          else
785            {
786            if (isprint(first_char))
787              fprintf(outfile, "First char = \'%c\'\n", first_char);
788          else          else
789            {            fprintf(outfile, "First char = %d\n", first_char);
790            if (isprint(first_char))          }
791              fprintf(outfile, "First char = \'%c\'\n", first_char);  
792            else        if (need_char < 0)
793              fprintf(outfile, "First char = %d\n", first_char);          {
794            }          fprintf(outfile, "No need char\n");
795            }
796          else
797            {
798            if (isprint(need_char))
799              fprintf(outfile, "Need char = \'%c\'\n", need_char);
800            else
801              fprintf(outfile, "Need char = %d\n", need_char);
802          }          }
803        }        }
804    
# Line 523  for (;;) Line 812  for (;;)
812          register int i;          register int i;
813          clock_t time_taken;          clock_t time_taken;
814          clock_t start_time = clock();          clock_t start_time = clock();
815          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
816            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
817          time_taken = clock() - start_time;          time_taken = clock() - start_time;
818          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
819          fprintf(outfile, "  Study time %.2f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
820            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            ((double)time_taken * 1000.0)/
821              ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
822          }          }
823    
824        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 537  for (;;) Line 827  for (;;)
827        else if (extra == NULL)        else if (extra == NULL)
828          fprintf(outfile, "Study returned NULL\n");          fprintf(outfile, "Study returned NULL\n");
829    
830        /* This looks at internal information. A bit kludgy to do it this        else if (do_showinfo)
       way, but it is useful for testing. */  
   
       else if (showinfo || do_debug)  
831          {          {
832          real_pcre_extra *xx = (real_pcre_extra *)extra;          uschar *start_bits = NULL;
833          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
834            if (start_bits == NULL)
835            fprintf(outfile, "No starting character set\n");            fprintf(outfile, "No starting character set\n");
836          else          else
837            {            {
# Line 552  for (;;) Line 840  for (;;)
840            fprintf(outfile, "Starting character set: ");            fprintf(outfile, "Starting character set: ");
841            for (i = 0; i < 256; i++)            for (i = 0; i < 256; i++)
842              {              {
843              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if ((start_bits[i/8] & (1<<(i%8))) != 0)
844                {                {
845                if (c > 75)                if (c > 75)
846                  {                  {
# Line 581  for (;;) Line 869  for (;;)
869    
870    for (;;)    for (;;)
871      {      {
872      unsigned char *pp;      unsigned char *q;
873        unsigned char *bptr = dbuffer;
874      int count, c;      int count, c;
875      int offsets[30];      int copystrings = 0;
876        int getstrings = 0;
877        int getlist = 0;
878        int gmatched = 0;
879        int start_offset = 0;
880        int g_notempty = 0;
881        int offsets[45];
882      int size_offsets = sizeof(offsets)/sizeof(int);      int size_offsets = sizeof(offsets)/sizeof(int);
883    
884      options = 0;      options = 0;
885    
886      if (infile == stdin) printf("  data> ");      if (infile == stdin) printf("data> ");
887      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) goto END_OFF;      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
888      if (infile != stdin) fprintf(outfile, (char *)buffer);        {
889          done = 1;
890          goto CONTINUE;
891          }
892        if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
893    
894      len = (int)strlen((char *)buffer);      len = (int)strlen((char *)buffer);
895      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
# Line 600  for (;;) Line 899  for (;;)
899      p = buffer;      p = buffer;
900      while (isspace(*p)) p++;      while (isspace(*p)) p++;
901    
902      pp = dbuffer;      q = dbuffer;
903      while ((c = *p++) != 0)      while ((c = *p++) != 0)
904        {        {
905        int i = 0;        int i = 0;
# Line 624  for (;;) Line 923  for (;;)
923          break;          break;
924    
925          case 'x':          case 'x':
926    
927            /* Handle \x{..} specially - new Perl thing for utf8 */
928    
929            if (*p == '{')
930              {
931              unsigned char *pt = p;
932              c = 0;
933              while (isxdigit(*(++pt)))
934                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
935              if (*pt == '}')
936                {
937                unsigned char buffer[8];
938                int ii, utn;
939                utn = ord2utf8(c, buffer);
940                for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];
941                c = buffer[ii];   /* Last byte */
942                p = pt + 1;
943                break;
944                }
945              /* Not correct form; fall through */
946              }
947    
948            /* Ordinary \x */
949    
950          c = 0;          c = 0;
951          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
952            {            {
# Line 644  for (;;) Line 967  for (;;)
967          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
968          continue;          continue;
969    
970          case 'E':          case 'C':
971          options |= PCRE_DOLLAR_ENDONLY;          while(isdigit(*p)) n = n * 10 + *p++ - '0';
972            copystrings |= 1 << n;
973          continue;          continue;
974    
975          case 'I':          case 'G':
976          options |= PCRE_CASELESS;          while(isdigit(*p)) n = n * 10 + *p++ - '0';
977            getstrings |= 1 << n;
978          continue;          continue;
979    
980          case 'M':          case 'L':
981          options |= PCRE_MULTILINE;          getlist = 1;
982          continue;          continue;
983    
984          case 'S':          case 'N':
985          options |= PCRE_DOTALL;          options |= PCRE_NOTEMPTY;
986          continue;          continue;
987    
988          case 'O':          case 'O':
989          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
990          if (n <= (int)sizeof(offsets)/sizeof(int)) size_offsets = n;          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;
991          continue;          continue;
992    
993          case 'Z':          case 'Z':
994          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
995          continue;          continue;
996          }          }
997        *pp++ = c;        *q++ = c;
998        }        }
999      *pp = 0;      *q = 0;
1000      len = pp - dbuffer;      len = q - dbuffer;
1001    
1002      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1003      support timing. */      support timing. */
1004    
1005    #if !defined NOPOSIX
1006      if (posix || do_posix)      if (posix || do_posix)
1007        {        {
1008        int rc;        int rc;
1009        int eflags = 0;        int eflags = 0;
1010        regmatch_t pmatch[30];        regmatch_t pmatch[sizeof(offsets)/sizeof(int)];
1011        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1012        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1013    
1014        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);
         pmatch, eflags);  
1015    
1016        if (rc != 0)        if (rc != 0)
1017          {          {
# Line 696  for (;;) Line 1021  for (;;)
1021        else        else
1022          {          {
1023          size_t i;          size_t i;
1024          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < size_offsets; i++)
1025            {            {
1026            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1027              {              {
1028              fprintf(outfile, "%2d: ", i);              fprintf(outfile, "%2d: ", (int)i);
1029              pchars(dbuffer + pmatch[i].rm_so,              pchars(dbuffer + pmatch[i].rm_so,
1030                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, utf8);
1031              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1032                if (i == 0 && do_showrest)
1033                  {
1034                  fprintf(outfile, " 0+ ");
1035                  pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);
1036                  fprintf(outfile, "\n");
1037                  }
1038              }              }
1039            }            }
1040          }          }
1041        }        }
1042    
1043      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
1044    
1045      else      else
1046    #endif  /* !defined NOPOSIX */
1047    
1048        for (;; gmatched++)    /* Loop for /g or /G */
1049        {        {
1050        if (timeit)        if (timeit)
1051          {          {
1052          register int i;          register int i;
1053          clock_t time_taken;          clock_t time_taken;
1054          clock_t start_time = clock();          clock_t start_time = clock();
1055          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
1056            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,            count = pcre_exec(re, extra, (char *)bptr, len,
1057              size_offsets);              start_offset, options | g_notempty, offsets, size_offsets);
1058          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1059          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1060            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            ((double)time_taken * 1000.0)/
1061              ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
1062          }          }
1063    
1064        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        count = pcre_exec(re, extra, (char *)bptr, len,
1065          size_offsets);          start_offset, options | g_notempty, offsets, size_offsets);
1066    
1067        if (count == 0)        if (count == 0)
1068          {          {
1069          fprintf(outfile, "Matched, but too many substrings\n");          fprintf(outfile, "Matched, but too many substrings\n");
1070          count = size_offsets/2;          count = size_offsets/3;
1071          }          }
1072    
1073          /* Matched */
1074    
1075        if (count >= 0)        if (count >= 0)
1076          {          {
1077          int i;          int i;
1078          count *= 2;          for (i = 0; i < count * 2; i += 2)
         for (i = 0; i < count; i += 2)  
1079            {            {
1080            if (offsets[i] < 0)            if (offsets[i] < 0)
1081              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1082            else            else
1083              {              {
1084              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1085              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              pchars(bptr + offsets[i], offsets[i+1] - offsets[i], utf8);
1086              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1087                if (i == 0)
1088                  {
1089                  if (do_showrest)
1090                    {
1091                    fprintf(outfile, " 0+ ");
1092                    pchars(bptr + offsets[i+1], len - offsets[i+1], utf8);
1093                    fprintf(outfile, "\n");
1094                    }
1095                  }
1096                }
1097              }
1098    
1099            for (i = 0; i < 32; i++)
1100              {
1101              if ((copystrings & (1 << i)) != 0)
1102                {
1103                char copybuffer[16];
1104                int rc = pcre_copy_substring((char *)bptr, offsets, count,
1105                  i, copybuffer, sizeof(copybuffer));
1106                if (rc < 0)
1107                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1108                else
1109                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1110                }
1111              }
1112    
1113            for (i = 0; i < 32; i++)
1114              {
1115              if ((getstrings & (1 << i)) != 0)
1116                {
1117                const char *substring;
1118                int rc = pcre_get_substring((char *)bptr, offsets, count,
1119                  i, &substring);
1120                if (rc < 0)
1121                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
1122                else
1123                  {
1124                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1125                  /* free((void *)substring); */
1126                  pcre_free_substring(substring);
1127                  }
1128                }
1129              }
1130    
1131            if (getlist)
1132              {
1133              const char **stringlist;
1134              int rc = pcre_get_substring_list((char *)bptr, offsets, count,
1135                &stringlist);
1136              if (rc < 0)
1137                fprintf(outfile, "get substring list failed %d\n", rc);
1138              else
1139                {
1140                for (i = 0; i < count; i++)
1141                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1142                if (stringlist[i] != NULL)
1143                  fprintf(outfile, "string list not terminated by NULL\n");
1144                /* free((void *)stringlist); */
1145                pcre_free_substring_list(stringlist);
1146                }
1147              }
1148            }
1149    
1150          /* Failed to match. If this is a /g or /G loop and we previously set
1151          g_notempty after a null match, this is not necessarily the end.
1152          We want to advance the start offset, and continue. Fudge the offset
1153          values to achieve this. We won't be at the end of the string - that
1154          was checked before setting g_notempty. */
1155    
1156          else
1157            {
1158            if (g_notempty != 0)
1159              {
1160              offsets[0] = start_offset;
1161              offsets[1] = start_offset + 1;
1162              }
1163            else
1164              {
1165              if (gmatched == 0)   /* Error if no previous matches */
1166                {
1167                if (count == -1) fprintf(outfile, "No match\n");
1168                  else fprintf(outfile, "Error %d\n", count);
1169              }              }
1170              break;  /* Out of the /g loop */
1171            }            }
1172          }          }
1173    
1174          /* If not /g or /G we are done */
1175    
1176          if (!do_g && !do_G) break;
1177    
1178          /* If we have matched an empty string, first check to see if we are at
1179          the end of the subject. If so, the /g loop is over. Otherwise, mimic
1180          what Perl's /g options does. This turns out to be rather cunning. First
1181          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1182          same point. If this fails (picked up above) we advance to the next
1183          character. */
1184    
1185          g_notempty = 0;
1186          if (offsets[0] == offsets[1])
1187            {
1188            if (offsets[0] == len) break;
1189            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1190            }
1191    
1192          /* For /g, update the start offset, leaving the rest alone */
1193    
1194          if (do_g) start_offset = offsets[1];
1195    
1196          /* For /G, update the pointer and length */
1197    
1198        else        else
1199          {          {
1200          if (count == -1) fprintf(outfile, "No match\n");          bptr += offsets[1];
1201            else fprintf(outfile, "Error %d\n", count);          len -= offsets[1];
1202          }          }
1203        }        }  /* End of loop for /g and /G */
1204      }      }    /* End of loop for data lines */
1205    
1206      CONTINUE:
1207    
1208    #if !defined NOPOSIX
1209    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1210    #endif
1211    
1212    if (re != NULL) free(re);    if (re != NULL) free(re);
1213    if (extra != NULL) free(extra);    if (extra != NULL) free(extra);
1214      if (tables != NULL)
1215        {
1216        free((void *)tables);
1217        setlocale(LC_CTYPE, "C");
1218        }
1219    }    }
1220    
 END_OFF:  
1221  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1222  return 0;  return 0;
1223  }  }

Legend:
Removed from v.7  
changed lines
  Added in v.49

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12