/[pcre]/code/trunk/pcre_study.c
ViewVC logotype

Diff of /code/trunk/pcre_study.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC revision 611 by ph10, Wed Jun 29 08:49:21 2011 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  supporting functions. */  supporting functions. */
43    
44    
45    #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49  #include "pcre_internal.h"  #include "pcre_internal.h"
50    
51    #define SET_BIT(c) start_bits[c/8] |= (1 << (c&7))
52    
53    /* Returns from set_start_bits() */
54    
55    enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN };
56    
57    
58    
59    /*************************************************
60    *   Find the minimum subject length for a group  *
61    *************************************************/
62    
63    /* Scan a parenthesized group and compute the minimum length of subject that
64    is needed to match it. This is a lower bound; it does not mean there is a
65    string of that length that matches. In UTF8 mode, the result is in characters
66    rather than bytes.
67    
68    Arguments:
69      code       pointer to start of group (the bracket)
70      startcode  pointer to start of the whole pattern
71      options    the compiling options
72    
73    Returns:   the minimum length
74               -1 if \C was encountered
75               -2 internal error (missing capturing bracket)
76               -3 internal error (opcode not listed)
77    */
78    
79    static int
80    find_minlength(const uschar *code, const uschar *startcode, int options)
81    {
82    int length = -1;
83    BOOL utf8 = (options & PCRE_UTF8) != 0;
84    BOOL had_recurse = FALSE;
85    register int branchlength = 0;
86    register uschar *cc = (uschar *)code + 1 + LINK_SIZE;
87    
88    if (*code == OP_CBRA || *code == OP_SCBRA ||
89        *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += 2;
90    
91    /* Scan along the opcodes for this branch. If we get to the end of the
92    branch, check the length against that of the other branches. */
93    
94    for (;;)
95      {
96      int d, min;
97      uschar *cs, *ce;
98      register int op = *cc;
99    
100      switch (op)
101        {
102        case OP_COND:
103        case OP_SCOND:
104    
105        /* If there is only one branch in a condition, the implied branch has zero
106        length, so we don't add anything. This covers the DEFINE "condition"
107        automatically. */
108    
109        cs = cc + GET(cc, 1);
110        if (*cs != OP_ALT)
111          {
112          cc = cs + 1 + LINK_SIZE;
113          break;
114          }
115    
116        /* Otherwise we can fall through and treat it the same as any other
117        subpattern. */
118    
119        case OP_CBRA:
120        case OP_SCBRA:
121        case OP_BRA:
122        case OP_SBRA:
123        case OP_CBRAPOS:
124        case OP_SCBRAPOS:
125        case OP_BRAPOS:
126        case OP_SBRAPOS:
127        case OP_ONCE:
128        d = find_minlength(cc, startcode, options);
129        if (d < 0) return d;
130        branchlength += d;
131        do cc += GET(cc, 1); while (*cc == OP_ALT);
132        cc += 1 + LINK_SIZE;
133        break;
134    
135        /* Reached end of a branch; if it's a ket it is the end of a nested
136        call. If it's ALT it is an alternation in a nested call. If it is
137        END it's the end of the outer call. All can be handled by the same code. */
138    
139        case OP_ALT:
140        case OP_KET:
141        case OP_KETRMAX:
142        case OP_KETRMIN:
143        case OP_KETRPOS:
144        case OP_END:
145        if (length < 0 || (!had_recurse && branchlength < length))
146          length = branchlength;
147        if (*cc != OP_ALT) return length;
148        cc += 1 + LINK_SIZE;
149        branchlength = 0;
150        had_recurse = FALSE;
151        break;
152    
153        /* Skip over assertive subpatterns */
154    
155        case OP_ASSERT:
156        case OP_ASSERT_NOT:
157        case OP_ASSERTBACK:
158        case OP_ASSERTBACK_NOT:
159        do cc += GET(cc, 1); while (*cc == OP_ALT);
160        /* Fall through */
161    
162        /* Skip over things that don't match chars */
163    
164        case OP_REVERSE:
165        case OP_CREF:
166        case OP_NCREF:
167        case OP_RREF:
168        case OP_NRREF:
169        case OP_DEF:
170        case OP_CALLOUT:
171        case OP_SOD:
172        case OP_SOM:
173        case OP_EOD:
174        case OP_EODN:
175        case OP_CIRC:
176        case OP_CIRCM:
177        case OP_DOLL:
178        case OP_DOLLM:
179        case OP_NOT_WORD_BOUNDARY:
180        case OP_WORD_BOUNDARY:
181        cc += _pcre_OP_lengths[*cc];
182        break;
183    
184        /* Skip over a subpattern that has a {0} or {0,x} quantifier */
185    
186        case OP_BRAZERO:
187        case OP_BRAMINZERO:
188        case OP_BRAPOSZERO:
189        case OP_SKIPZERO:
190        cc += _pcre_OP_lengths[*cc];
191        do cc += GET(cc, 1); while (*cc == OP_ALT);
192        cc += 1 + LINK_SIZE;
193        break;
194    
195        /* Handle literal characters and + repetitions */
196    
197        case OP_CHAR:
198        case OP_CHARI:
199        case OP_NOT:
200        case OP_NOTI:
201        case OP_PLUS:
202        case OP_PLUSI:
203        case OP_MINPLUS:
204        case OP_MINPLUSI:
205        case OP_POSPLUS:
206        case OP_POSPLUSI:
207        case OP_NOTPLUS:
208        case OP_NOTPLUSI:
209        case OP_NOTMINPLUS:
210        case OP_NOTMINPLUSI:
211        case OP_NOTPOSPLUS:
212        case OP_NOTPOSPLUSI:
213        branchlength++;
214        cc += 2;
215    #ifdef SUPPORT_UTF8
216        if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
217    #endif
218        break;
219    
220        case OP_TYPEPLUS:
221        case OP_TYPEMINPLUS:
222        case OP_TYPEPOSPLUS:
223        branchlength++;
224        cc += (cc[1] == OP_PROP || cc[1] == OP_NOTPROP)? 4 : 2;
225        break;
226    
227        /* Handle exact repetitions. The count is already in characters, but we
228        need to skip over a multibyte character in UTF8 mode.  */
229    
230        case OP_EXACT:
231        case OP_EXACTI:
232        case OP_NOTEXACT:
233        case OP_NOTEXACTI:
234        branchlength += GET2(cc,1);
235        cc += 4;
236    #ifdef SUPPORT_UTF8
237        if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
238    #endif
239        break;
240    
241        case OP_TYPEEXACT:
242        branchlength += GET2(cc,1);
243        cc += (cc[3] == OP_PROP || cc[3] == OP_NOTPROP)? 6 : 4;
244        break;
245    
246        /* Handle single-char non-literal matchers */
247    
248        case OP_PROP:
249        case OP_NOTPROP:
250        cc += 2;
251        /* Fall through */
252    
253        case OP_NOT_DIGIT:
254        case OP_DIGIT:
255        case OP_NOT_WHITESPACE:
256        case OP_WHITESPACE:
257        case OP_NOT_WORDCHAR:
258        case OP_WORDCHAR:
259        case OP_ANY:
260        case OP_ALLANY:
261        case OP_EXTUNI:
262        case OP_HSPACE:
263        case OP_NOT_HSPACE:
264        case OP_VSPACE:
265        case OP_NOT_VSPACE:
266        branchlength++;
267        cc++;
268        break;
269    
270        /* "Any newline" might match two characters, but it also might match just
271        one. */
272    
273        case OP_ANYNL:
274        branchlength += 1;
275        cc++;
276        break;
277    
278        /* The single-byte matcher means we can't proceed in UTF-8 mode */
279    
280        case OP_ANYBYTE:
281    #ifdef SUPPORT_UTF8
282        if (utf8) return -1;
283    #endif
284        branchlength++;
285        cc++;
286        break;
287    
288        /* For repeated character types, we have to test for \p and \P, which have
289        an extra two bytes of parameters. */
290    
291        case OP_TYPESTAR:
292        case OP_TYPEMINSTAR:
293        case OP_TYPEQUERY:
294        case OP_TYPEMINQUERY:
295        case OP_TYPEPOSSTAR:
296        case OP_TYPEPOSQUERY:
297        if (cc[1] == OP_PROP || cc[1] == OP_NOTPROP) cc += 2;
298        cc += _pcre_OP_lengths[op];
299        break;
300    
301        case OP_TYPEUPTO:
302        case OP_TYPEMINUPTO:
303        case OP_TYPEPOSUPTO:
304        if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2;
305        cc += _pcre_OP_lengths[op];
306        break;
307    
308        /* Check a class for variable quantification */
309    
310    #ifdef SUPPORT_UTF8
311        case OP_XCLASS:
312        cc += GET(cc, 1) - 33;
313        /* Fall through */
314    #endif
315    
316        case OP_CLASS:
317        case OP_NCLASS:
318        cc += 33;
319    
320        switch (*cc)
321          {
322          case OP_CRPLUS:
323          case OP_CRMINPLUS:
324          branchlength++;
325          /* Fall through */
326    
327          case OP_CRSTAR:
328          case OP_CRMINSTAR:
329          case OP_CRQUERY:
330          case OP_CRMINQUERY:
331          cc++;
332          break;
333    
334          case OP_CRRANGE:
335          case OP_CRMINRANGE:
336          branchlength += GET2(cc,1);
337          cc += 5;
338          break;
339    
340          default:
341          branchlength++;
342          break;
343          }
344        break;
345    
346        /* Backreferences and subroutine calls are treated in the same way: we find
347        the minimum length for the subpattern. A recursion, however, causes an
348        a flag to be set that causes the length of this branch to be ignored. The
349        logic is that a recursion can only make sense if there is another
350        alternation that stops the recursing. That will provide the minimum length
351        (when no recursion happens). A backreference within the group that it is
352        referencing behaves in the same way.
353    
354        If PCRE_JAVASCRIPT_COMPAT is set, a backreference to an unset bracket
355        matches an empty string (by default it causes a matching failure), so in
356        that case we must set the minimum length to zero. */
357    
358        case OP_REF:
359        case OP_REFI:
360        if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
361          {
362          ce = cs = (uschar *)_pcre_find_bracket(startcode, utf8, GET2(cc, 1));
363          if (cs == NULL) return -2;
364          do ce += GET(ce, 1); while (*ce == OP_ALT);
365          if (cc > cs && cc < ce)
366            {
367            d = 0;
368            had_recurse = TRUE;
369            }
370          else d = find_minlength(cs, startcode, options);
371          }
372        else d = 0;
373        cc += 3;
374    
375        /* Handle repeated back references */
376    
377        switch (*cc)
378          {
379          case OP_CRSTAR:
380          case OP_CRMINSTAR:
381          case OP_CRQUERY:
382          case OP_CRMINQUERY:
383          min = 0;
384          cc++;
385          break;
386    
387          case OP_CRPLUS:
388          case OP_CRMINPLUS:
389          min = 1;
390          cc++;
391          break;
392    
393          case OP_CRRANGE:
394          case OP_CRMINRANGE:
395          min = GET2(cc, 1);
396          cc += 5;
397          break;
398    
399          default:
400          min = 1;
401          break;
402          }
403    
404        branchlength += min * d;
405        break;
406    
407        case OP_RECURSE:
408        cs = ce = (uschar *)startcode + GET(cc, 1);
409        if (cs == NULL) return -2;
410        do ce += GET(ce, 1); while (*ce == OP_ALT);
411        if (cc > cs && cc < ce)
412          had_recurse = TRUE;
413        else
414          branchlength += find_minlength(cs, startcode, options);
415        cc += 1 + LINK_SIZE;
416        break;
417    
418        /* Anything else does not or need not match a character. We can get the
419        item's length from the table, but for those that can match zero occurrences
420        of a character, we must take special action for UTF-8 characters. As it
421        happens, the "NOT" versions of these opcodes are used at present only for
422        ASCII characters, so they could be omitted from this list. However, in
423        future that may change, so we include them here so as not to leave a
424        gotcha for a future maintainer. */
425    
426        case OP_UPTO:
427        case OP_UPTOI:
428        case OP_NOTUPTO:
429        case OP_NOTUPTOI:
430        case OP_MINUPTO:
431        case OP_MINUPTOI:
432        case OP_NOTMINUPTO:
433        case OP_NOTMINUPTOI:
434        case OP_POSUPTO:
435        case OP_POSUPTOI:
436        case OP_NOTPOSUPTO:
437        case OP_NOTPOSUPTOI:
438    
439        case OP_STAR:
440        case OP_STARI:
441        case OP_NOTSTAR:
442        case OP_NOTSTARI:
443        case OP_MINSTAR:
444        case OP_MINSTARI:
445        case OP_NOTMINSTAR:
446        case OP_NOTMINSTARI:
447        case OP_POSSTAR:
448        case OP_POSSTARI:
449        case OP_NOTPOSSTAR:
450        case OP_NOTPOSSTARI:
451    
452        case OP_QUERY:
453        case OP_QUERYI:
454        case OP_NOTQUERY:
455        case OP_NOTQUERYI:
456        case OP_MINQUERY:
457        case OP_MINQUERYI:
458        case OP_NOTMINQUERY:
459        case OP_NOTMINQUERYI:
460        case OP_POSQUERY:
461        case OP_POSQUERYI:
462        case OP_NOTPOSQUERY:
463        case OP_NOTPOSQUERYI:
464    
465        cc += _pcre_OP_lengths[op];
466    #ifdef SUPPORT_UTF8
467        if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
468    #endif
469        break;
470    
471        /* Skip these, but we need to add in the name length. */
472    
473        case OP_MARK:
474        case OP_PRUNE_ARG:
475        case OP_SKIP_ARG:
476        cc += _pcre_OP_lengths[op] + cc[1];
477        break;
478    
479        case OP_THEN_ARG:
480        cc += _pcre_OP_lengths[op] + cc[1+LINK_SIZE];
481        break;
482    
483        /* The remaining opcodes are just skipped over. */
484    
485        case OP_ACCEPT:
486        case OP_CLOSE:
487        case OP_COMMIT:
488        case OP_FAIL:
489        case OP_PRUNE:
490        case OP_SET_SOM:
491        case OP_SKIP:
492        case OP_THEN:
493        cc += _pcre_OP_lengths[op];
494        break;
495    
496        /* This should not occur: we list all opcodes explicitly so that when
497        new ones get added they are properly considered. */
498    
499        default:
500        return -3;
501        }
502      }
503    /* Control never gets here */
504    }
505    
506    
507    
508  /*************************************************  /*************************************************
509  *      Set a bit and maybe its alternate case    *  *      Set a bit and maybe its alternate case    *
510  *************************************************/  *************************************************/
511    
512  /* Given a character, set its bit in the table, and also the bit for the other  /* Given a character, set its first byte's bit in the table, and also the
513  version of a letter if we are caseless.  corresponding bit for the other version of a letter if we are caseless. In
514    UTF-8 mode, for characters greater than 127, we can only do the caseless thing
515    when Unicode property support is available.
516    
517  Arguments:  Arguments:
518    start_bits    points to the bit map    start_bits    points to the bit map
519    c             is the character    p             points to the character
520    caseless      the caseless flag    caseless      the caseless flag
521    cd            the block with char table pointers    cd            the block with char table pointers
522      utf8          TRUE for UTF-8 mode
523    
524  Returns:        nothing  Returns:        pointer after the character
525    */
526    
527    static const uschar *
528    set_table_bit(uschar *start_bits, const uschar *p, BOOL caseless,
529      compile_data *cd, BOOL utf8)
530    {
531    unsigned int c = *p;
532    
533    SET_BIT(c);
534    
535    #ifdef SUPPORT_UTF8
536    if (utf8 && c > 127)
537      {
538      GETCHARINC(c, p);
539    #ifdef SUPPORT_UCP
540      if (caseless)
541        {
542        uschar buff[8];
543        c = UCD_OTHERCASE(c);
544        (void)_pcre_ord2utf8(c, buff);
545        SET_BIT(buff[0]);
546        }
547    #endif
548      return p;
549      }
550    #endif
551    
552    /* Not UTF-8 mode, or character is less than 127. */
553    
554    if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
555    return p + 1;
556    }
557    
558    
559    
560    /*************************************************
561    *     Set bits for a positive character type     *
562    *************************************************/
563    
564    /* This function sets starting bits for a character type. In UTF-8 mode, we can
565    only do a direct setting for bytes less than 128, as otherwise there can be
566    confusion with bytes in the middle of UTF-8 characters. In a "traditional"
567    environment, the tables will only recognize ASCII characters anyway, but in at
568    least one Windows environment, some higher bytes bits were set in the tables.
569    So we deal with that case by considering the UTF-8 encoding.
570    
571    Arguments:
572      start_bits     the starting bitmap
573      cbit type      the type of character wanted
574      table_limit    32 for non-UTF-8; 16 for UTF-8
575      cd             the block with char table pointers
576    
577    Returns:         nothing
578  */  */
579    
580  static void  static void
581  set_bit(uschar *start_bits, unsigned int c, BOOL caseless, compile_data *cd)  set_type_bits(uschar *start_bits, int cbit_type, int table_limit,
582      compile_data *cd)
583  {  {
584  start_bits[c/8] |= (1 << (c&7));  register int c;
585  if (caseless && (cd->ctypes[c] & ctype_letter) != 0)  for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type];
586    start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));  if (table_limit == 32) return;
587    for (c = 128; c < 256; c++)
588      {
589      if ((cd->cbits[c/8] & (1 << (c&7))) != 0)
590        {
591        uschar buff[8];
592        (void)_pcre_ord2utf8(c, buff);
593        SET_BIT(buff[0]);
594        }
595      }
596    }
597    
598    
599    /*************************************************
600    *     Set bits for a negative character type     *
601    *************************************************/
602    
603    /* This function sets starting bits for a negative character type such as \D.
604    In UTF-8 mode, we can only do a direct setting for bytes less than 128, as
605    otherwise there can be confusion with bytes in the middle of UTF-8 characters.
606    Unlike in the positive case, where we can set appropriate starting bits for
607    specific high-valued UTF-8 characters, in this case we have to set the bits for
608    all high-valued characters. The lowest is 0xc2, but we overkill by starting at
609    0xc0 (192) for simplicity.
610    
611    Arguments:
612      start_bits     the starting bitmap
613      cbit type      the type of character wanted
614      table_limit    32 for non-UTF-8; 16 for UTF-8
615      cd             the block with char table pointers
616    
617    Returns:         nothing
618    */
619    
620    static void
621    set_nottype_bits(uschar *start_bits, int cbit_type, int table_limit,
622      compile_data *cd)
623    {
624    register int c;
625    for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type];
626    if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff;
627  }  }
628    
629    
630    
631  /*************************************************  /*************************************************
632  *          Create bitmap of starting chars       *  *          Create bitmap of starting bytes       *
633  *************************************************/  *************************************************/
634    
635  /* This function scans a compiled unanchored expression and attempts to build a  /* This function scans a compiled unanchored expression recursively and
636  bitmap of the set of initial characters. If it can't, it returns FALSE. As time  attempts to build a bitmap of the set of possible starting bytes. As time goes
637  goes by, we may be able to get more clever at doing this.  by, we may be able to get more clever at doing this. The SSB_CONTINUE return is
638    useful for parenthesized groups in patterns such as (a*)b where the group
639    provides some optional starting bytes but scanning must continue at the outer
640    level to find at least one mandatory byte. At the outermost level, this
641    function fails unless the result is SSB_DONE.
642    
643  Arguments:  Arguments:
644    code         points to an expression    code         points to an expression
645    start_bits   points to a 32-byte table, initialized to 0    start_bits   points to a 32-byte table, initialized to 0
   caseless     the current state of the caseless flag  
646    utf8         TRUE if in UTF-8 mode    utf8         TRUE if in UTF-8 mode
647    cd           the block with char table pointers    cd           the block with char table pointers
648    
649  Returns:       TRUE if table built, FALSE otherwise  Returns:       SSB_FAIL     => Failed to find any starting bytes
650                   SSB_DONE     => Found mandatory starting bytes
651                   SSB_CONTINUE => Found optional starting bytes
652                   SSB_UNKNOWN  => Hit an unrecognized opcode
653  */  */
654    
655  static BOOL  static int
656  set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,  set_start_bits(const uschar *code, uschar *start_bits, BOOL utf8,
657    BOOL utf8, compile_data *cd)    compile_data *cd)
658  {  {
659  register int c;  register int c;
660    int yield = SSB_DONE;
661    int table_limit = utf8? 16:32;
662    
663  #if 0  #if 0
664  /* ========================================================================= */  /* ========================================================================= */
# Line 114  volatile int dummy; Line 679  volatile int dummy;
679    
680  do  do
681    {    {
   const uschar *tcode = code + 1 + LINK_SIZE;  
682    BOOL try_next = TRUE;    BOOL try_next = TRUE;
683      const uschar *tcode = code + 1 + LINK_SIZE;
684    
685    while (try_next)    if (*code == OP_CBRA || *code == OP_SCBRA ||
686      {        *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += 2;
     /* If a branch starts with a bracket or a positive lookahead assertion,  
     recurse to set bits from within them. That's all for this branch. */  
687    
688      if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)    while (try_next)    /* Loop for items in this branch */
689        {
690        int rc;
691        switch(*tcode)
692        {        {
693        if (!set_start_bits(tcode, start_bits, caseless, utf8, cd))        /* If we reach something we don't understand, it means a new opcode has
694          return FALSE;        been created that hasn't been added to this code. Hopefully this problem
695        try_next = FALSE;        will be discovered during testing. */
       }  
696    
     else switch(*tcode)  
       {  
697        default:        default:
698        return FALSE;        return SSB_UNKNOWN;
699    
700        /* Skip over callout */        /* Fail for a valid opcode that implies no starting bits. */
701    
702        case OP_CALLOUT:        case OP_ACCEPT:
703        tcode += 2 + 2*LINK_SIZE;        case OP_ALLANY:
704          case OP_ANY:
705          case OP_ANYBYTE:
706          case OP_CIRC:
707          case OP_CIRCM:
708          case OP_CLOSE:
709          case OP_COMMIT:
710          case OP_COND:
711          case OP_CREF:
712          case OP_DEF:
713          case OP_DOLL:
714          case OP_DOLLM:
715          case OP_END:
716          case OP_EOD:
717          case OP_EODN:
718          case OP_EXTUNI:
719          case OP_FAIL:
720          case OP_MARK:
721          case OP_NCREF:
722          case OP_NOT:
723          case OP_NOTEXACT:
724          case OP_NOTEXACTI:
725          case OP_NOTI:
726          case OP_NOTMINPLUS:
727          case OP_NOTMINPLUSI:
728          case OP_NOTMINQUERY:
729          case OP_NOTMINQUERYI:
730          case OP_NOTMINSTAR:
731          case OP_NOTMINSTARI:
732          case OP_NOTMINUPTO:
733          case OP_NOTMINUPTOI:
734          case OP_NOTPLUS:
735          case OP_NOTPLUSI:
736          case OP_NOTPOSPLUS:
737          case OP_NOTPOSPLUSI:
738          case OP_NOTPOSQUERY:
739          case OP_NOTPOSQUERYI:
740          case OP_NOTPOSSTAR:
741          case OP_NOTPOSSTARI:
742          case OP_NOTPOSUPTO:
743          case OP_NOTPOSUPTOI:
744          case OP_NOTPROP:
745          case OP_NOTQUERY:
746          case OP_NOTQUERYI:
747          case OP_NOTSTAR:
748          case OP_NOTSTARI:
749          case OP_NOTUPTO:
750          case OP_NOTUPTOI:
751          case OP_NOT_HSPACE:
752          case OP_NOT_VSPACE:
753          case OP_NOT_WORD_BOUNDARY:
754          case OP_NRREF:
755          case OP_PROP:
756          case OP_PRUNE:
757          case OP_PRUNE_ARG:
758          case OP_RECURSE:
759          case OP_REF:
760          case OP_REFI:
761          case OP_REVERSE:
762          case OP_RREF:
763          case OP_SCOND:
764          case OP_SET_SOM:
765          case OP_SKIP:
766          case OP_SKIP_ARG:
767          case OP_SOD:
768          case OP_SOM:
769          case OP_THEN:
770          case OP_THEN_ARG:
771          case OP_WORD_BOUNDARY:
772          case OP_XCLASS:
773          return SSB_FAIL;
774    
775          /* If we hit a bracket or a positive lookahead assertion, recurse to set
776          bits from within the subpattern. If it can't find anything, we have to
777          give up. If it finds some mandatory character(s), we are done for this
778          branch. Otherwise, carry on scanning after the subpattern. */
779    
780          case OP_BRA:
781          case OP_SBRA:
782          case OP_CBRA:
783          case OP_SCBRA:
784          case OP_BRAPOS:
785          case OP_SBRAPOS:
786          case OP_CBRAPOS:
787          case OP_SCBRAPOS:
788          case OP_ONCE:
789          case OP_ASSERT:
790          rc = set_start_bits(tcode, start_bits, utf8, cd);
791          if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
792          if (rc == SSB_DONE) try_next = FALSE; else
793            {
794            do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
795            tcode += 1 + LINK_SIZE;
796            }
797        break;        break;
798    
799        /* Skip over extended extraction bracket number */        /* If we hit ALT or KET, it means we haven't found anything mandatory in
800          this branch, though we might have found something optional. For ALT, we
801          continue with the next alternative, but we have to arrange that the final
802          result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET,
803          return SSB_CONTINUE: if this is the top level, that indicates failure,
804          but after a nested subpattern, it causes scanning to continue. */
805    
806        case OP_BRANUMBER:        case OP_ALT:
807        tcode += 3;        yield = SSB_CONTINUE;
808          try_next = FALSE;
809          break;
810    
811          case OP_KET:
812          case OP_KETRMAX:
813          case OP_KETRMIN:
814          case OP_KETRPOS:
815          return SSB_CONTINUE;
816    
817          /* Skip over callout */
818    
819          case OP_CALLOUT:
820          tcode += 2 + 2*LINK_SIZE;
821        break;        break;
822    
823        /* Skip over lookbehind and negative lookahead assertions */        /* Skip over lookbehind and negative lookahead assertions */
# Line 152  do Line 826  do
826        case OP_ASSERTBACK:        case OP_ASSERTBACK:
827        case OP_ASSERTBACK_NOT:        case OP_ASSERTBACK_NOT:
828        do tcode += GET(tcode, 1); while (*tcode == OP_ALT);        do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
829        tcode += 1+LINK_SIZE;        tcode += 1 + LINK_SIZE;
       break;  
   
       /* Skip over an option setting, changing the caseless flag */  
   
       case OP_OPT:  
       caseless = (tcode[1] & PCRE_CASELESS) != 0;  
       tcode += 2;  
830        break;        break;
831    
832        /* BRAZERO does the bracket, but carries on. */        /* BRAZERO does the bracket, but carries on. */
833    
834        case OP_BRAZERO:        case OP_BRAZERO:
835        case OP_BRAMINZERO:        case OP_BRAMINZERO:
836        if (!set_start_bits(++tcode, start_bits, caseless, utf8, cd))        case OP_BRAPOSZERO:
837          return FALSE;        rc = set_start_bits(++tcode, start_bits, utf8, cd);
838          if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
839  /* =========================================================================  /* =========================================================================
840        See the comment at the head of this function concerning the next line,        See the comment at the head of this function concerning the next line,
841        which was an old fudge for the benefit of OS/2.        which was an old fudge for the benefit of OS/2.
842        dummy = 1;        dummy = 1;
843    ========================================================================= */    ========================================================================= */
844        do tcode += GET(tcode,1); while (*tcode == OP_ALT);        do tcode += GET(tcode,1); while (*tcode == OP_ALT);
845        tcode += 1+LINK_SIZE;        tcode += 1 + LINK_SIZE;
846          break;
847    
848          /* SKIPZERO skips the bracket. */
849    
850          case OP_SKIPZERO:
851          tcode++;
852          do tcode += GET(tcode,1); while (*tcode == OP_ALT);
853          tcode += 1 + LINK_SIZE;
854        break;        break;
855    
856        /* Single-char * or ? sets the bit and tries the next item */        /* Single-char * or ? sets the bit and tries the next item */
857    
858        case OP_STAR:        case OP_STAR:
859        case OP_MINSTAR:        case OP_MINSTAR:
860          case OP_POSSTAR:
861        case OP_QUERY:        case OP_QUERY:
862        case OP_MINQUERY:        case OP_MINQUERY:
863        set_bit(start_bits, tcode[1], caseless, cd);        case OP_POSQUERY:
864        tcode += 2;        tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf8);
865  #ifdef SUPPORT_UTF8        break;
866        if (utf8) while ((*tcode & 0xc0) == 0x80) tcode++;  
867  #endif        case OP_STARI:
868          case OP_MINSTARI:
869          case OP_POSSTARI:
870          case OP_QUERYI:
871          case OP_MINQUERYI:
872          case OP_POSQUERYI:
873          tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf8);
874        break;        break;
875    
876        /* Single-char upto sets the bit and tries the next */        /* Single-char upto sets the bit and tries the next */
877    
878        case OP_UPTO:        case OP_UPTO:
879        case OP_MINUPTO:        case OP_MINUPTO:
880        set_bit(start_bits, tcode[3], caseless, cd);        case OP_POSUPTO:
881        tcode += 4;        tcode = set_table_bit(start_bits, tcode + 3, FALSE, cd, utf8);
882  #ifdef SUPPORT_UTF8        break;
883        if (utf8) while ((*tcode & 0xc0) == 0x80) tcode++;  
884  #endif        case OP_UPTOI:
885          case OP_MINUPTOI:
886          case OP_POSUPTOI:
887          tcode = set_table_bit(start_bits, tcode + 3, TRUE, cd, utf8);
888        break;        break;
889    
890        /* At least one single char sets the bit and stops */        /* At least one single char sets the bit and stops */
891    
892        case OP_EXACT:       /* Fall through */        case OP_EXACT:
893        tcode += 2;        tcode += 2;
894          /* Fall through */
895        case OP_CHAR:        case OP_CHAR:
       case OP_CHARNC:  
896        case OP_PLUS:        case OP_PLUS:
897        case OP_MINPLUS:        case OP_MINPLUS:
898        set_bit(start_bits, tcode[1], caseless, cd);        case OP_POSPLUS:
899          (void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf8);
900          try_next = FALSE;
901          break;
902    
903          case OP_EXACTI:
904          tcode += 2;
905          /* Fall through */
906          case OP_CHARI:
907          case OP_PLUSI:
908          case OP_MINPLUSI:
909          case OP_POSPLUSI:
910          (void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf8);
911          try_next = FALSE;
912          break;
913    
914          /* Special spacing and line-terminating items. These recognize specific
915          lists of characters. The difference between VSPACE and ANYNL is that the
916          latter can match the two-character CRLF sequence, but that is not
917          relevant for finding the first character, so their code here is
918          identical. */
919    
920          case OP_HSPACE:
921          SET_BIT(0x09);
922          SET_BIT(0x20);
923          if (utf8)
924            {
925            SET_BIT(0xC2);  /* For U+00A0 */
926            SET_BIT(0xE1);  /* For U+1680, U+180E */
927            SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */
928            SET_BIT(0xE3);  /* For U+3000 */
929            }
930          else SET_BIT(0xA0);
931          try_next = FALSE;
932          break;
933    
934          case OP_ANYNL:
935          case OP_VSPACE:
936          SET_BIT(0x0A);
937          SET_BIT(0x0B);
938          SET_BIT(0x0C);
939          SET_BIT(0x0D);
940          if (utf8)
941            {
942            SET_BIT(0xC2);  /* For U+0085 */
943            SET_BIT(0xE2);  /* For U+2028, U+2029 */
944            }
945          else SET_BIT(0x85);
946        try_next = FALSE;        try_next = FALSE;
947        break;        break;
948    
949        /* Single character type sets the bits and stops */        /* Single character types set the bits and stop. Note that if PCRE_UCP
950          is set, we do not see these op codes because \d etc are converted to
951          properties. Therefore, these apply in the case when only characters less
952          than 256 are recognized to match the types. */
953    
954        case OP_NOT_DIGIT:        case OP_NOT_DIGIT:
955        for (c = 0; c < 32; c++)        set_nottype_bits(start_bits, cbit_digit, table_limit, cd);
         start_bits[c] |= ~cd->cbits[c+cbit_digit];  
956        try_next = FALSE;        try_next = FALSE;
957        break;        break;
958    
959        case OP_DIGIT:        case OP_DIGIT:
960        for (c = 0; c < 32; c++)        set_type_bits(start_bits, cbit_digit, table_limit, cd);
         start_bits[c] |= cd->cbits[c+cbit_digit];  
961        try_next = FALSE;        try_next = FALSE;
962        break;        break;
963    
964        /* The cbit_space table has vertical tab as whitespace; we have to        /* The cbit_space table has vertical tab as whitespace; we have to
965        discard it. */        ensure it is set as not whitespace. */
966    
967        case OP_NOT_WHITESPACE:        case OP_NOT_WHITESPACE:
968        for (c = 0; c < 32; c++)        set_nottype_bits(start_bits, cbit_space, table_limit, cd);
969          {        start_bits[1] |= 0x08;
         int d = cd->cbits[c+cbit_space];  
         if (c == 1) d &= ~0x08;  
         start_bits[c] |= ~d;  
         }  
970        try_next = FALSE;        try_next = FALSE;
971        break;        break;
972    
973        /* The cbit_space table has vertical tab as whitespace; we have to        /* The cbit_space table has vertical tab as whitespace; we have to
974        discard it. */        not set it from the table. */
975    
976        case OP_WHITESPACE:        case OP_WHITESPACE:
977        for (c = 0; c < 32; c++)        c = start_bits[1];    /* Save in case it was already set */
978          {        set_type_bits(start_bits, cbit_space, table_limit, cd);
979          int d = cd->cbits[c+cbit_space];        start_bits[1] = (start_bits[1] & ~0x08) | c;
         if (c == 1) d &= ~0x08;  
         start_bits[c] |= d;  
         }  
980        try_next = FALSE;        try_next = FALSE;
981        break;        break;
982    
983        case OP_NOT_WORDCHAR:        case OP_NOT_WORDCHAR:
984        for (c = 0; c < 32; c++)        set_nottype_bits(start_bits, cbit_word, table_limit, cd);
         start_bits[c] |= ~cd->cbits[c+cbit_word];  
985        try_next = FALSE;        try_next = FALSE;
986        break;        break;
987    
988        case OP_WORDCHAR:        case OP_WORDCHAR:
989        for (c = 0; c < 32; c++)        set_type_bits(start_bits, cbit_word, table_limit, cd);
         start_bits[c] |= cd->cbits[c+cbit_word];  
990        try_next = FALSE;        try_next = FALSE;
991        break;        break;
992    
# Line 271  do Line 995  do
995    
996        case OP_TYPEPLUS:        case OP_TYPEPLUS:
997        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
998          case OP_TYPEPOSPLUS:
999        tcode++;        tcode++;
1000        break;        break;
1001    
# Line 283  do Line 1008  do
1008    
1009        case OP_TYPEUPTO:        case OP_TYPEUPTO:
1010        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
1011          case OP_TYPEPOSUPTO:
1012        tcode += 2;               /* Fall through */        tcode += 2;               /* Fall through */
1013    
1014        case OP_TYPESTAR:        case OP_TYPESTAR:
1015        case OP_TYPEMINSTAR:        case OP_TYPEMINSTAR:
1016          case OP_TYPEPOSSTAR:
1017        case OP_TYPEQUERY:        case OP_TYPEQUERY:
1018        case OP_TYPEMINQUERY:        case OP_TYPEMINQUERY:
1019          case OP_TYPEPOSQUERY:
1020        switch(tcode[1])        switch(tcode[1])
1021          {          {
1022            default:
1023          case OP_ANY:          case OP_ANY:
1024          return FALSE;          case OP_ALLANY:
1025            return SSB_FAIL;
1026    
1027            case OP_HSPACE:
1028            SET_BIT(0x09);
1029            SET_BIT(0x20);
1030            if (utf8)
1031              {
1032              SET_BIT(0xC2);  /* For U+00A0 */
1033              SET_BIT(0xE1);  /* For U+1680, U+180E */
1034              SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */
1035              SET_BIT(0xE3);  /* For U+3000 */
1036              }
1037            else SET_BIT(0xA0);
1038            break;
1039    
1040            case OP_ANYNL:
1041            case OP_VSPACE:
1042            SET_BIT(0x0A);
1043            SET_BIT(0x0B);
1044            SET_BIT(0x0C);
1045            SET_BIT(0x0D);
1046            if (utf8)
1047              {
1048              SET_BIT(0xC2);  /* For U+0085 */
1049              SET_BIT(0xE2);  /* For U+2028, U+2029 */
1050              }
1051            else SET_BIT(0x85);
1052            break;
1053    
1054          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
1055          for (c = 0; c < 32; c++)          set_nottype_bits(start_bits, cbit_digit, table_limit, cd);
           start_bits[c] |= ~cd->cbits[c+cbit_digit];  
1056          break;          break;
1057    
1058          case OP_DIGIT:          case OP_DIGIT:
1059          for (c = 0; c < 32; c++)          set_type_bits(start_bits, cbit_digit, table_limit, cd);
           start_bits[c] |= cd->cbits[c+cbit_digit];  
1060          break;          break;
1061    
1062          /* The cbit_space table has vertical tab as whitespace; we have to          /* The cbit_space table has vertical tab as whitespace; we have to
1063          discard it. */          ensure it gets set as not whitespace. */
1064    
1065          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
1066          for (c = 0; c < 32; c++)          set_nottype_bits(start_bits, cbit_space, table_limit, cd);
1067            {          start_bits[1] |= 0x08;
           int d = cd->cbits[c+cbit_space];  
           if (c == 1) d &= ~0x08;  
           start_bits[c] |= ~d;  
           }  
1068          break;          break;
1069    
1070          /* The cbit_space table has vertical tab as whitespace; we have to          /* The cbit_space table has vertical tab as whitespace; we have to
1071          discard it. */          avoid setting it. */
1072    
1073          case OP_WHITESPACE:          case OP_WHITESPACE:
1074          for (c = 0; c < 32; c++)          c = start_bits[1];    /* Save in case it was already set */
1075            {          set_type_bits(start_bits, cbit_space, table_limit, cd);
1076            int d = cd->cbits[c+cbit_space];          start_bits[1] = (start_bits[1] & ~0x08) | c;
           if (c == 1) d &= ~0x08;  
           start_bits[c] |= d;  
           }  
1077          break;          break;
1078    
1079          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
1080          for (c = 0; c < 32; c++)          set_nottype_bits(start_bits, cbit_word, table_limit, cd);
           start_bits[c] |= ~cd->cbits[c+cbit_word];  
1081          break;          break;
1082    
1083          case OP_WORDCHAR:          case OP_WORDCHAR:
1084          for (c = 0; c < 32; c++)          set_type_bits(start_bits, cbit_word, table_limit, cd);
           start_bits[c] |= cd->cbits[c+cbit_word];  
1085          break;          break;
1086          }          }
1087    
# Line 349  do Line 1095  do
1095        character with a value > 255. */        character with a value > 255. */
1096    
1097        case OP_NCLASS:        case OP_NCLASS:
1098    #ifdef SUPPORT_UTF8
1099        if (utf8)        if (utf8)
1100          {          {
1101          start_bits[24] |= 0xf0;              /* Bits for 0xc4 - 0xc8 */          start_bits[24] |= 0xf0;              /* Bits for 0xc4 - 0xc8 */
1102          memset(start_bits+25, 0xff, 7);      /* Bits for 0xc9 - 0xff */          memset(start_bits+25, 0xff, 7);      /* Bits for 0xc9 - 0xff */
1103          }          }
1104    #endif
1105        /* Fall through */        /* Fall through */
1106    
1107        case OP_CLASS:        case OP_CLASS:
# Line 366  do Line 1114  do
1114          value is > 127. In fact, there are only two possible starting bytes for          value is > 127. In fact, there are only two possible starting bytes for
1115          characters in the range 128 - 255. */          characters in the range 128 - 255. */
1116    
1117    #ifdef SUPPORT_UTF8
1118          if (utf8)          if (utf8)
1119            {            {
1120            for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];            for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
# Line 383  do Line 1132  do
1132          /* In non-UTF-8 mode, the two bit maps are completely compatible. */          /* In non-UTF-8 mode, the two bit maps are completely compatible. */
1133    
1134          else          else
1135    #endif
1136            {            {
1137            for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];            for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
1138            }            }
1139    
1140          /* Advance past the bit map, and act on what follows */          /* Advance past the bit map, and act on what follows. For a zero
1141            minimum repeat, continue; otherwise stop processing. */
1142    
1143          tcode += 32;          tcode += 32;
1144          switch (*tcode)          switch (*tcode)
# Line 404  do Line 1155  do
1155            if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;            if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;
1156              else try_next = FALSE;              else try_next = FALSE;
1157            break;            break;
1158    
1159            default:            default:
1160            try_next = FALSE;            try_next = FALSE;
1161            break;            break;
# Line 418  do Line 1169  do
1169    code += GET(code, 1);   /* Advance to next branch */    code += GET(code, 1);   /* Advance to next branch */
1170    }    }
1171  while (*code == OP_ALT);  while (*code == OP_ALT);
1172  return TRUE;  return yield;
1173  }  }
1174    
1175    
1176    
1177    
1178    
1179  /*************************************************  /*************************************************
1180  *          Study a compiled expression           *  *          Study a compiled expression           *
1181  *************************************************/  *************************************************/
# Line 438  Arguments: Line 1191  Arguments:
1191              set NULL unless error              set NULL unless error
1192    
1193  Returns:    pointer to a pcre_extra block, with study_data filled in and the  Returns:    pointer to a pcre_extra block, with study_data filled in and the
1194                appropriate flag set;                appropriate flags set;
1195              NULL on error or if no optimization possible              NULL on error or if no optimization possible
1196  */  */
1197    
1198  PCRE_DATA_SCOPE pcre_extra *  PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
1199  pcre_study(const pcre *external_re, int options, const char **errorptr)  pcre_study(const pcre *external_re, int options, const char **errorptr)
1200  {  {
1201    int min;
1202    BOOL bits_set = FALSE;
1203  uschar start_bits[32];  uschar start_bits[32];
1204  pcre_extra *extra;  pcre_extra *extra;
1205  pcre_study_data *study;  pcre_study_data *study;
# Line 471  code = (uschar *)re + re->name_table_off Line 1226  code = (uschar *)re + re->name_table_off
1226    (re->name_count * re->name_entry_size);    (re->name_count * re->name_entry_size);
1227    
1228  /* For an anchored pattern, or an unanchored pattern that has a first char, or  /* For an anchored pattern, or an unanchored pattern that has a first char, or
1229  a multiline pattern that matches only at "line starts", no further processing  a multiline pattern that matches only at "line starts", there is no point in
1230  at present. */  seeking a list of starting bytes. */
1231    
1232  if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)  if ((re->options & PCRE_ANCHORED) == 0 &&
1233    return NULL;      (re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) == 0)
1234      {
1235      int rc;
1236    
1237  /* Set the character tables in the block that is passed around */    /* Set the character tables in the block that is passed around */
1238    
1239  tables = re->tables;    tables = re->tables;
1240  if (tables == NULL)    if (tables == NULL)
1241    (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,      (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
1242    (void *)(&tables));      (void *)(&tables));
1243    
1244  compile_block.lcc = tables + lcc_offset;    compile_block.lcc = tables + lcc_offset;
1245  compile_block.fcc = tables + fcc_offset;    compile_block.fcc = tables + fcc_offset;
1246  compile_block.cbits = tables + cbits_offset;    compile_block.cbits = tables + cbits_offset;
1247  compile_block.ctypes = tables + ctypes_offset;    compile_block.ctypes = tables + ctypes_offset;
1248    
1249  /* See if we can find a fixed set of initial characters for the pattern. */    /* See if we can find a fixed set of initial characters for the pattern. */
1250    
1251  memset(start_bits, 0, 32 * sizeof(uschar));    memset(start_bits, 0, 32 * sizeof(uschar));
1252  if (!set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,    rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0,
1253    (re->options & PCRE_UTF8) != 0, &compile_block)) return NULL;      &compile_block);
1254      bits_set = rc == SSB_DONE;
1255      if (rc == SSB_UNKNOWN) *errorptr = "internal error: opcode not recognized";
1256      }
1257    
1258    /* Find the minimum length of subject string. */
1259    
1260    switch(min = find_minlength(code, code, re->options))
1261      {
1262      case -2: *errorptr = "internal error: missing capturing bracket"; break;
1263      case -3: *errorptr = "internal error: opcode not recognized"; break;
1264      default: break;
1265      }
1266    
1267    /* Return NULL if there's been an error or if no optimization is possible. */
1268    
1269    if (*errorptr != NULL || (!bits_set && min < 0)) return NULL;
1270    
1271  /* Get a pcre_extra block and a pcre_study_data block. The study data is put in  /* Get a pcre_extra block and a pcre_study_data block. The study data is put in
1272  the latter, which is pointed to by the former, which may also get additional  the latter, which is pointed to by the former, which may also get additional
# Line 516  extra->flags = PCRE_EXTRA_STUDY_DATA; Line 1289  extra->flags = PCRE_EXTRA_STUDY_DATA;
1289  extra->study_data = study;  extra->study_data = study;
1290    
1291  study->size = sizeof(pcre_study_data);  study->size = sizeof(pcre_study_data);
1292  study->options = PCRE_STUDY_MAPPED;  study->flags = 0;
1293  memcpy(study->start_bits, start_bits, sizeof(start_bits));  
1294    if (bits_set)
1295      {
1296      study->flags |= PCRE_STUDY_MAPPED;
1297      memcpy(study->start_bits, start_bits, sizeof(start_bits));
1298      }
1299    
1300    if (min >= 0)
1301      {
1302      study->flags |= PCRE_STUDY_MINLEN;
1303      study->minlength = min;
1304      }
1305    
1306  return extra;  return extra;
1307  }  }

Legend:
Removed from v.91  
changed lines
  Added in v.611

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12