/[pcre]/code/trunk/pcre_internal.h
ViewVC logotype

Diff of /code/trunk/pcre_internal.h

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC revision 93 by nigel, Sat Feb 24 21:41:42 2007 UTC
# Line 54  functions whose names all begin with "_p Line 54  functions whose names all begin with "_p
54  /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef  /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
55  inline, and there are *still* stupid compilers about that don't like indented  inline, and there are *still* stupid compilers about that don't like indented
56  pre-processor statements, or at least there were when I first wrote this. After  pre-processor statements, or at least there were when I first wrote this. After
57  all, it had only been about 10 years then... */  all, it had only been about 10 years then...
58    
59    It turns out that the Mac Debugging.h header also defines the macro DPRINTF, so
60    be absolutely sure we get our version. */
61    
62    #undef DPRINTF
63  #ifdef DEBUG  #ifdef DEBUG
64  #define DPRINTF(p) printf p  #define DPRINTF(p) printf p
65  #else  #else
66  #define DPRINTF(p) /*nothing*/  #define DPRINTF(p) /* Nothing */
67  #endif  #endif
68    
69    
# Line 114  Unix, where it is defined in sys/types, Line 118  Unix, where it is defined in sys/types,
118    
119  typedef unsigned char uschar;  typedef unsigned char uschar;
120    
121  /* PCRE is able to support 3 different kinds of newline (CR, LF, CRLF). The  /* This is an unsigned int value that no character can ever have. UTF-8
122  following macro is used to package up testing for newlines. NLBLOCK is defined  characters only go up to 0x7fffffff (though Unicode doesn't go beyond
123  in the various modules to indicate in which datablock the parameters exist. */  0x0010ffff). */
124    
125    #define NOTACHAR 0xffffffff
126    
127    /* PCRE is able to support several different kinds of newline (CR, LF, CRLF,
128    and "all" at present). The following macros are used to package up testing for
129    newlines. NLBLOCK, PSSTART, and PSEND are defined in the various modules to
130    indicate in which datablock the parameters exist, and what the start/end of
131    string field names are. */
132    
133    #define NLTYPE_FIXED   0     /* Newline is a fixed length string */
134    #define NLTYPE_ANY     1     /* Newline is any Unicode line ending */
135    
136    /* This macro checks for a newline at the given position */
137    
138  #define IS_NEWLINE(p) \  #define IS_NEWLINE(p) \
139    ((p)[0] == NLBLOCK->nl[0] && \    ((NLBLOCK->nltype != NLTYPE_FIXED)? \
140    (NLBLOCK->nllen == 1 || (p)[1] == NLBLOCK->nl[1]))      ((p) < NLBLOCK->PSEND && \
141         _pcre_is_newline((p), NLBLOCK->PSEND, &(NLBLOCK->nllen), utf8) \
142        ) \
143        : \
144        ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
145         (p)[0] == NLBLOCK->nl[0] && \
146         (NLBLOCK->nllen == 1 || (p)[1] == NLBLOCK->nl[1]) \
147        ) \
148      )
149    
150    /* This macro checks for a newline immediately preceding the given position */
151    
152    #define WAS_NEWLINE(p) \
153      ((NLBLOCK->nltype != NLTYPE_FIXED)? \
154        ((p) > NLBLOCK->PSSTART && \
155         _pcre_was_newline((p), NLBLOCK->PSSTART, &(NLBLOCK->nllen), utf8) \
156        ) \
157        : \
158        ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
159         (p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \
160         (NLBLOCK->nllen == 1 || (p)[-NLBLOCK->nllen+1] == NLBLOCK->nl[1]) \
161        ) \
162      )
163    
164  /* When PCRE is compiled as a C++ library, the subject pointer can be replaced  /* When PCRE is compiled as a C++ library, the subject pointer can be replaced
165  with a custom type. This makes it possible, for example, to allow pcre_exec()  with a custom type. This makes it possible, for example, to allow pcre_exec()
# Line 278  we know we are in UTF-8 mode. */ Line 317  we know we are in UTF-8 mode. */
317    
318  #define GETCHAR(c, eptr) \  #define GETCHAR(c, eptr) \
319    c = *eptr; \    c = *eptr; \
320    if ((c & 0xc0) == 0xc0) \    if (c >= 0xc0) \
321      { \      { \
322      int gcii; \      int gcii; \
323      int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \      int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
# Line 296  pointer. */ Line 335  pointer. */
335    
336  #define GETCHARTEST(c, eptr) \  #define GETCHARTEST(c, eptr) \
337    c = *eptr; \    c = *eptr; \
338    if (utf8 && (c & 0xc0) == 0xc0) \    if (utf8 && c >= 0xc0) \
339      { \      { \
340      int gcii; \      int gcii; \
341      int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \      int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
# Line 314  know we are in UTF-8 mode. */ Line 353  know we are in UTF-8 mode. */
353    
354  #define GETCHARINC(c, eptr) \  #define GETCHARINC(c, eptr) \
355    c = *eptr++; \    c = *eptr++; \
356    if ((c & 0xc0) == 0xc0) \    if (c >= 0xc0) \
357      { \      { \
358      int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \      int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
359      int gcss = 6*gcaa; \      int gcss = 6*gcaa; \
# Line 330  know we are in UTF-8 mode. */ Line 369  know we are in UTF-8 mode. */
369    
370  #define GETCHARINCTEST(c, eptr) \  #define GETCHARINCTEST(c, eptr) \
371    c = *eptr++; \    c = *eptr++; \
372    if (utf8 && (c & 0xc0) == 0xc0) \    if (utf8 && c >= 0xc0) \
373      { \      { \
374      int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \      int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
375      int gcss = 6*gcaa; \      int gcss = 6*gcaa; \
# Line 347  if there are extra bytes. This is called Line 386  if there are extra bytes. This is called
386    
387  #define GETCHARLEN(c, eptr, len) \  #define GETCHARLEN(c, eptr, len) \
388    c = *eptr; \    c = *eptr; \
389    if ((c & 0xc0) == 0xc0) \    if (c >= 0xc0) \
390      { \      { \
391      int gcii; \      int gcii; \
392      int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \      int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
# Line 400  bits. */ Line 439  bits. */
439  /* Masks for identifying the public options that are permitted at compile  /* Masks for identifying the public options that are permitted at compile
440  time, run time, or study time, respectively. */  time, run time, or study time, respectively. */
441    
442    #define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY)
443    
444  #define PUBLIC_OPTIONS \  #define PUBLIC_OPTIONS \
445    (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \    (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
446     PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \     PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
447     PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \     PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
448     PCRE_DUPNAMES|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)     PCRE_DUPNAMES|PCRE_NEWLINE_BITS)
449    
450  #define PUBLIC_EXEC_OPTIONS \  #define PUBLIC_EXEC_OPTIONS \
451    (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \    (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
452     PCRE_PARTIAL|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)     PCRE_PARTIAL|PCRE_NEWLINE_BITS)
453    
454  #define PUBLIC_DFA_EXEC_OPTIONS \  #define PUBLIC_DFA_EXEC_OPTIONS \
455    (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \    (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
456     PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART|PCRE_NEWLINE_CR| \     PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART|PCRE_NEWLINE_BITS)
    PCRE_NEWLINE_LF)  
457    
458  #define PUBLIC_STUDY_OPTIONS 0   /* None defined */  #define PUBLIC_STUDY_OPTIONS 0   /* None defined */
459    
# Line 445  typedef int BOOL; Line 485  typedef int BOOL;
485  #define FALSE   0  #define FALSE   0
486  #define TRUE    1  #define TRUE    1
487    
488  /* Escape items that are just an encoding of a particular data value. Note that  /* Escape items that are just an encoding of a particular data value. */
 ESC_n is defined as yet another macro, which is set in config.h to either \n  
 (the default) or \r (which some people want). */  
489    
490  #ifndef ESC_e  #ifndef ESC_e
491  #define ESC_e 27  #define ESC_e 27
# Line 458  ESC_n is defined as yet another macro, w Line 496  ESC_n is defined as yet another macro, w
496  #endif  #endif
497    
498  #ifndef ESC_n  #ifndef ESC_n
499  #define ESC_n NEWLINE  #define ESC_n '\n'
500  #endif  #endif
501    
502  #ifndef ESC_r  #ifndef ESC_r
# Line 497  value such as \n. They must have non-zer Line 535  value such as \n. They must have non-zer
535  their negation. Also, they must appear in the same order as in the opcode  their negation. Also, they must appear in the same order as in the opcode
536  definitions below, up to ESC_z. There's a dummy for OP_ANY because it  definitions below, up to ESC_z. There's a dummy for OP_ANY because it
537  corresponds to "." rather than an escape sequence. The final one must be  corresponds to "." rather than an escape sequence. The final one must be
538  ESC_REF as subsequent values are used for \1, \2, \3, etc. There is are two  ESC_REF as subsequent values are used for backreferences (\1, \2, \3, etc).
539  tests in the code for an escape greater than ESC_b and less than ESC_Z to  There are two tests in the code for an escape greater than ESC_b and less than
540  detect the types that may be repeated. These are the types that consume  ESC_Z to detect the types that may be repeated. These are the types that
541  characters. If any new escapes are put in between that don't consume a  consume characters. If any new escapes are put in between that don't consume a
542  character, that code will have to change. */  character, that code will have to change. */
543    
544  enum { ESC_A = 1, ESC_G, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W,  enum { ESC_A = 1, ESC_G, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W,
545         ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_X, ESC_Z, ESC_z, ESC_E,         ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_R, ESC_X, ESC_Z, ESC_z,
546         ESC_Q, ESC_REF };         ESC_E, ESC_Q, ESC_k, ESC_REF };
547    
548    
549  /* Opcode table: OP_BRA must be last, as all values >= it are used for brackets  /* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
550  that extract substrings. Starting from 1 (i.e. after OP_END), the values up to  that extract substrings. Starting from 1 (i.e. after OP_END), the values up to
551  OP_EOD must correspond in order to the list of escapes immediately above.  OP_EOD must correspond in order to the list of escapes immediately above.
552  Note that whenever this list is updated, the two macro definitions that follow  
553  must also be updated to match. */  To keep stored, compiled patterns compatible, new opcodes should be added
554    immediately before OP_BRA, where (since release 7.0) a gap is left for this
555    purpose.
556    
557    *** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions
558    that follow must also be updated to match. There is also a table called
559    "coptable" in pcre_dfa_exec.c that must be updated. */
560    
561  enum {  enum {
562    OP_END,            /* 0 End of pattern */    OP_END,            /* 0 End of pattern */
# Line 532  enum { Line 577  enum {
577    OP_ANYBYTE,        /* 12 Match any byte (\C); different to OP_ANY for UTF-8 */    OP_ANYBYTE,        /* 12 Match any byte (\C); different to OP_ANY for UTF-8 */
578    OP_NOTPROP,        /* 13 \P (not Unicode property) */    OP_NOTPROP,        /* 13 \P (not Unicode property) */
579    OP_PROP,           /* 14 \p (Unicode property) */    OP_PROP,           /* 14 \p (Unicode property) */
580    OP_EXTUNI,         /* 15 \X (extended Unicode sequence */    OP_ANYNL,          /* 15 \R (any newline sequence) */
581    OP_EODN,           /* 16 End of data or \n at end of data: \Z. */    OP_EXTUNI,         /* 16 \X (extended Unicode sequence */
582    OP_EOD,            /* 17 End of data: \z */    OP_EODN,           /* 17 End of data or \n at end of data: \Z. */
583      OP_EOD,            /* 18 End of data: \z */
584    OP_OPT,            /* 18 Set runtime options */  
585    OP_CIRC,           /* 19 Start of line - varies with multiline switch */    OP_OPT,            /* 19 Set runtime options */
586    OP_DOLL,           /* 20 End of line - varies with multiline switch */    OP_CIRC,           /* 20 Start of line - varies with multiline switch */
587    OP_CHAR,           /* 21 Match one character, casefully */    OP_DOLL,           /* 21 End of line - varies with multiline switch */
588    OP_CHARNC,         /* 22 Match one character, caselessly */    OP_CHAR,           /* 22 Match one character, casefully */
589    OP_NOT,            /* 23 Match one character, not the following one */    OP_CHARNC,         /* 23 Match one character, caselessly */
590      OP_NOT,            /* 24 Match one character, not the following one */
591    OP_STAR,           /* 24 The maximizing and minimizing versions of */  
592    OP_MINSTAR,        /* 25 all these opcodes must come in pairs, with */    OP_STAR,           /* 25 The maximizing and minimizing versions of */
593    OP_PLUS,           /* 26 the minimizing one second. */    OP_MINSTAR,        /* 26 these six opcodes must come in pairs, with */
594    OP_MINPLUS,        /* 27 This first set applies to single characters */    OP_PLUS,           /* 27 the minimizing one second. */
595    OP_QUERY,          /* 28 */    OP_MINPLUS,        /* 28 This first set applies to single characters.*/
596    OP_MINQUERY,       /* 29 */    OP_QUERY,          /* 29 */
597    OP_UPTO,           /* 30 From 0 to n matches */    OP_MINQUERY,       /* 30 */
598    OP_MINUPTO,        /* 31 */  
599    OP_EXACT,          /* 32 Exactly n matches */    OP_UPTO,           /* 31 From 0 to n matches */
600      OP_MINUPTO,        /* 32 */
601    OP_NOTSTAR,        /* 33 The maximizing and minimizing versions of */    OP_EXACT,          /* 33 Exactly n matches */
602    OP_NOTMINSTAR,     /* 34 all these opcodes must come in pairs, with */  
603    OP_NOTPLUS,        /* 35 the minimizing one second. */    OP_POSSTAR,        /* 34 Possessified star */
604    OP_NOTMINPLUS,     /* 36 This set applies to "not" single characters */    OP_POSPLUS,        /* 35 Possessified plus */
605    OP_NOTQUERY,       /* 37 */    OP_POSQUERY,       /* 36 Posesssified query */
606    OP_NOTMINQUERY,    /* 38 */    OP_POSUPTO,        /* 37 Possessified upto */
607    OP_NOTUPTO,        /* 39 From 0 to n matches */  
608    OP_NOTMINUPTO,     /* 40 */    OP_NOTSTAR,        /* 38 The maximizing and minimizing versions of */
609    OP_NOTEXACT,       /* 41 Exactly n matches */    OP_NOTMINSTAR,     /* 39 these six opcodes must come in pairs, with */
610      OP_NOTPLUS,        /* 40 the minimizing one second. They must be in */
611    OP_TYPESTAR,       /* 42 The maximizing and minimizing versions of */    OP_NOTMINPLUS,     /* 41 exactly the same order as those above. */
612    OP_TYPEMINSTAR,    /* 43 all these opcodes must come in pairs, with */    OP_NOTQUERY,       /* 42 This set applies to "not" single characters. */
613    OP_TYPEPLUS,       /* 44 the minimizing one second. These codes must */    OP_NOTMINQUERY,    /* 43 */
614    OP_TYPEMINPLUS,    /* 45 be in exactly the same order as those above. */  
615    OP_TYPEQUERY,      /* 46 This set applies to character types such as \d */    OP_NOTUPTO,        /* 44 From 0 to n matches */
616    OP_TYPEMINQUERY,   /* 47 */    OP_NOTMINUPTO,     /* 45 */
617    OP_TYPEUPTO,       /* 48 From 0 to n matches */    OP_NOTEXACT,       /* 46 Exactly n matches */
618    OP_TYPEMINUPTO,    /* 49 */  
619    OP_TYPEEXACT,      /* 50 Exactly n matches */    OP_NOTPOSSTAR,     /* 47 Possessified versions */
620      OP_NOTPOSPLUS,     /* 48 */
621    OP_CRSTAR,         /* 51 The maximizing and minimizing versions of */    OP_NOTPOSQUERY,    /* 49 */
622    OP_CRMINSTAR,      /* 52 all these opcodes must come in pairs, with */    OP_NOTPOSUPTO,     /* 50 */
623    OP_CRPLUS,         /* 53 the minimizing one second. These codes must */  
624    OP_CRMINPLUS,      /* 54 be in exactly the same order as those above. */    OP_TYPESTAR,       /* 51 The maximizing and minimizing versions of */
625    OP_CRQUERY,        /* 55 These are for character classes and back refs */    OP_TYPEMINSTAR,    /* 52 these six opcodes must come in pairs, with */
626    OP_CRMINQUERY,     /* 56 */    OP_TYPEPLUS,       /* 53 the minimizing one second. These codes must */
627    OP_CRRANGE,        /* 57 These are different to the three sets above. */    OP_TYPEMINPLUS,    /* 54 be in exactly the same order as those above. */
628    OP_CRMINRANGE,     /* 58 */    OP_TYPEQUERY,      /* 55 This set applies to character types such as \d */
629      OP_TYPEMINQUERY,   /* 56 */
630    
631      OP_TYPEUPTO,       /* 57 From 0 to n matches */
632      OP_TYPEMINUPTO,    /* 58 */
633      OP_TYPEEXACT,      /* 59 Exactly n matches */
634    
635      OP_TYPEPOSSTAR,    /* 60 Possessified versions */
636      OP_TYPEPOSPLUS,    /* 61 */
637      OP_TYPEPOSQUERY,   /* 62 */
638      OP_TYPEPOSUPTO,    /* 63 */
639    
640      OP_CRSTAR,         /* 64 The maximizing and minimizing versions of */
641      OP_CRMINSTAR,      /* 65 all these opcodes must come in pairs, with */
642      OP_CRPLUS,         /* 66 the minimizing one second. These codes must */
643      OP_CRMINPLUS,      /* 67 be in exactly the same order as those above. */
644      OP_CRQUERY,        /* 68 These are for character classes and back refs */
645      OP_CRMINQUERY,     /* 69 */
646      OP_CRRANGE,        /* 70 These are different to the three sets above. */
647      OP_CRMINRANGE,     /* 71 */
648    
649    OP_CLASS,          /* 59 Match a character class, chars < 256 only */    OP_CLASS,          /* 72 Match a character class, chars < 256 only */
650    OP_NCLASS,         /* 60 Same, but the bitmap was created from a negative    OP_NCLASS,         /* 73 Same, but the bitmap was created from a negative
651                             class - the difference is relevant only when a UTF-8                             class - the difference is relevant only when a UTF-8
652                             character > 255 is encountered. */                             character > 255 is encountered. */
653    
654    OP_XCLASS,         /* 61 Extended class for handling UTF-8 chars within the    OP_XCLASS,         /* 74 Extended class for handling UTF-8 chars within the
655                             class. This does both positive and negative. */                             class. This does both positive and negative. */
656    
657    OP_REF,            /* 62 Match a back reference */    OP_REF,            /* 75 Match a back reference */
658    OP_RECURSE,        /* 63 Match a numbered subpattern (possibly recursive) */    OP_RECURSE,        /* 76 Match a numbered subpattern (possibly recursive) */
659    OP_CALLOUT,        /* 64 Call out to external function if provided */    OP_CALLOUT,        /* 77 Call out to external function if provided */
660    
661    OP_ALT,            /* 65 Start of alternation */    OP_ALT,            /* 78 Start of alternation */
662    OP_KET,            /* 66 End of group that doesn't have an unbounded repeat */    OP_KET,            /* 79 End of group that doesn't have an unbounded repeat */
663    OP_KETRMAX,        /* 67 These two must remain together and in this */    OP_KETRMAX,        /* 80 These two must remain together and in this */
664    OP_KETRMIN,        /* 68 order. They are for groups the repeat for ever. */    OP_KETRMIN,        /* 81 order. They are for groups the repeat for ever. */
665    
666    /* The assertions must come before ONCE and COND */    /* The assertions must come before BRA, CBRA, ONCE, and COND.*/
667    
668    OP_ASSERT,         /* 69 Positive lookahead */    OP_ASSERT,         /* 82 Positive lookahead */
669    OP_ASSERT_NOT,     /* 70 Negative lookahead */    OP_ASSERT_NOT,     /* 83 Negative lookahead */
670    OP_ASSERTBACK,     /* 71 Positive lookbehind */    OP_ASSERTBACK,     /* 84 Positive lookbehind */
671    OP_ASSERTBACK_NOT, /* 72 Negative lookbehind */    OP_ASSERTBACK_NOT, /* 85 Negative lookbehind */
672    OP_REVERSE,        /* 73 Move pointer back - used in lookbehind assertions */    OP_REVERSE,        /* 86 Move pointer back - used in lookbehind assertions */
673    
674    /* ONCE and COND must come after the assertions, with ONCE first, as there's    /* ONCE, BRA, CBRA, and COND must come after the assertions, with ONCE first,
675    a test for >= ONCE for a subpattern that isn't an assertion. */    as there's a test for >= ONCE for a subpattern that isn't an assertion. */
676    
677    OP_ONCE,           /* 74 Once matched, don't back up into the subpattern */    OP_ONCE,           /* 87 Atomic group */
678    OP_COND,           /* 75 Conditional group */    OP_BRA,            /* 88 Start of non-capturing bracket */
679    OP_CREF,           /* 76 Used to hold an extraction string number (cond ref) */    OP_CBRA,           /* 89 Start of capturing bracket */
680      OP_COND,           /* 90 Conditional group */
681    OP_BRAZERO,        /* 77 These two must remain together and in this */  
682    OP_BRAMINZERO,     /* 78 order. */    /* These three must follow the previous three, in the same order. There's a
683      check for >= SBRA to distinguish the two sets. */
684    OP_BRANUMBER,      /* 79 Used for extracting brackets whose number is greater  
685                             than can fit into an opcode. */    OP_SBRA,           /* 91 Start of non-capturing bracket, check empty  */
686      OP_SCBRA,          /* 92 Start of capturing bracket, check empty */
687    OP_BRA             /* 80 This and greater values are used for brackets that    OP_SCOND,          /* 93 Conditional group, check empty */
688                             extract substrings up to EXTRACT_BASIC_MAX. After  
689                             that, use is made of OP_BRANUMBER. */    OP_CREF,           /* 94 Used to hold a capture number as condition */
690  };    OP_RREF,           /* 95 Used to hold a recursion number as condition */
691      OP_DEF,            /* 96 The DEFINE condition */
 /* WARNING WARNING WARNING: There is an implicit assumption in pcre.c and  
 study.c that all opcodes are less than 128 in value. This makes handling UTF-8  
 character sequences easier. */  
   
 /* The highest extraction number before we have to start using additional  
 bytes. (Originally PCRE didn't have support for extraction counts highter than  
 this number.) The value is limited by the number of opcodes left after OP_BRA,  
 i.e. 255 - OP_BRA. We actually set it a bit lower to leave room for additional  
 opcodes. */  
692    
693  #define EXTRACT_BASIC_MAX  100    OP_BRAZERO,        /* 97 These two must remain together and in this */
694      OP_BRAMINZERO      /* 98 order. */
695    };
696    
697    
698  /* This macro defines textual names for all the opcodes. These are used only  /* This macro defines textual names for all the opcodes. These are used only
# Line 644  for debugging. The macro is referenced o Line 701  for debugging. The macro is referenced o
701  #define OP_NAME_LIST \  #define OP_NAME_LIST \
702    "End", "\\A", "\\G", "\\B", "\\b", "\\D", "\\d",                \    "End", "\\A", "\\G", "\\B", "\\b", "\\D", "\\d",                \
703    "\\S", "\\s", "\\W", "\\w", "Any", "Anybyte",                   \    "\\S", "\\s", "\\W", "\\w", "Any", "Anybyte",                   \
704    "notprop", "prop", "extuni",                                    \    "notprop", "prop", "anynl", "extuni",                           \
705    "\\Z", "\\z",                                                   \    "\\Z", "\\z",                                                   \
706    "Opt", "^", "$", "char", "charnc", "not",                       \    "Opt", "^", "$", "char", "charnc", "not",                       \
707    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \
708      "*+","++", "?+", "{",                                           \
709    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \
710      "*+","++", "?+", "{",                                           \
711    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \
712      "*+","++", "?+", "{",                                           \
713    "*", "*?", "+", "+?", "?", "??", "{", "{",                      \    "*", "*?", "+", "+?", "?", "??", "{", "{",                      \
714    "class", "nclass", "xclass", "Ref", "Recurse", "Callout",       \    "class", "nclass", "xclass", "Ref", "Recurse", "Callout",       \
715    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",     \    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",     \
716    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cond ref",\    "AssertB", "AssertB not", "Reverse",                            \
717    "Brazero", "Braminzero", "Branumber", "Bra"    "Once", "Bra 0", "Bra", "Cond", "SBra 0", "SBra", "SCond",      \
718      "Cond ref", "Cond rec", "Cond def", "Brazero", "Braminzero"
719    
720    
721  /* This macro defines the length of fixed length operations in the compiled  /* This macro defines the length of fixed length operations in the compiled
# Line 670  in UTF-8 mode. The code that uses this t Line 731  in UTF-8 mode. The code that uses this t
731    1,                             /* End                                    */ \    1,                             /* End                                    */ \
732    1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \    1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \
733    1, 1,                          /* Any, Anybyte                           */ \    1, 1,                          /* Any, Anybyte                           */ \
734    3, 3, 1,                       /* NOTPROP, PROP, EXTUNI                  */ \    3, 3, 1, 1,                    /* NOTPROP, PROP, EXTUNI, ANYNL           */ \
735    1, 1, 2, 1, 1,                 /* \Z, \z, Opt, ^, $                      */ \    1, 1, 2, 1, 1,                 /* \Z, \z, Opt, ^, $                      */ \
736    2,                             /* Char  - the minimum length             */ \    2,                             /* Char  - the minimum length             */ \
737    2,                             /* Charnc  - the minimum length           */ \    2,                             /* Charnc  - the minimum length           */ \
# Line 678  in UTF-8 mode. The code that uses this t Line 739  in UTF-8 mode. The code that uses this t
739    /* Positive single-char repeats                            ** These are  */ \    /* Positive single-char repeats                            ** These are  */ \
740    2, 2, 2, 2, 2, 2,              /* *, *?, +, +?, ?, ??      ** minima in  */ \    2, 2, 2, 2, 2, 2,              /* *, *?, +, +?, ?, ??      ** minima in  */ \
741    4, 4, 4,                       /* upto, minupto, exact     ** UTF-8 mode */ \    4, 4, 4,                       /* upto, minupto, exact     ** UTF-8 mode */ \
742      2, 2, 2, 4,                    /* *+, ++, ?+, upto+                      */ \
743    /* Negative single-char repeats - only for chars < 256                   */ \    /* Negative single-char repeats - only for chars < 256                   */ \
744    2, 2, 2, 2, 2, 2,              /* NOT *, *?, +, +?, ?, ??                */ \    2, 2, 2, 2, 2, 2,              /* NOT *, *?, +, +?, ?, ??                */ \
745    4, 4, 4,                       /* NOT upto, minupto, exact               */ \    4, 4, 4,                       /* NOT upto, minupto, exact               */ \
746      2, 2, 2, 4,                    /* Possessive *, +, ?, upto               */ \
747    /* Positive type repeats                                                 */ \    /* Positive type repeats                                                 */ \
748    2, 2, 2, 2, 2, 2,              /* Type *, *?, +, +?, ?, ??               */ \    2, 2, 2, 2, 2, 2,              /* Type *, *?, +, +?, ?, ??               */ \
749    4, 4, 4,                       /* Type upto, minupto, exact              */ \    4, 4, 4,                       /* Type upto, minupto, exact              */ \
750      2, 2, 2, 4,                    /* Possessive *+, ++, ?+, upto+           */ \
751    /* Character class & ref repeats                                         */ \    /* Character class & ref repeats                                         */ \
752    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */ \    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */ \
753    5, 5,                          /* CRRANGE, CRMINRANGE                    */ \    5, 5,                          /* CRRANGE, CRMINRANGE                    */ \
# Line 702  in UTF-8 mode. The code that uses this t Line 766  in UTF-8 mode. The code that uses this t
766    1+LINK_SIZE,                   /* Assert behind                          */ \    1+LINK_SIZE,                   /* Assert behind                          */ \
767    1+LINK_SIZE,                   /* Assert behind not                      */ \    1+LINK_SIZE,                   /* Assert behind not                      */ \
768    1+LINK_SIZE,                   /* Reverse                                */ \    1+LINK_SIZE,                   /* Reverse                                */ \
769    1+LINK_SIZE,                   /* Once                                   */ \    1+LINK_SIZE,                   /* ONCE                                   */ \
770      1+LINK_SIZE,                   /* BRA                                    */ \
771      3+LINK_SIZE,                   /* CBRA                                   */ \
772    1+LINK_SIZE,                   /* COND                                   */ \    1+LINK_SIZE,                   /* COND                                   */ \
773      1+LINK_SIZE,                   /* SBRA                                   */ \
774      3+LINK_SIZE,                   /* SCBRA                                  */ \
775      1+LINK_SIZE,                   /* SCOND                                  */ \
776    3,                             /* CREF                                   */ \    3,                             /* CREF                                   */ \
777      3,                             /* RREF                                   */ \
778      1,                             /* DEF                                    */ \
779    1, 1,                          /* BRAZERO, BRAMINZERO                    */ \    1, 1,                          /* BRAZERO, BRAMINZERO                    */ \
   3,                             /* BRANUMBER                              */ \  
   1+LINK_SIZE                    /* BRA                                    */ \  
780    
781    
782  /* A magic value for OP_CREF to indicate the "in recursion" condition. */  /* A magic value for OP_RREF to indicate the "any recursion" condition. */
783    
784  #define CREF_RECURSE  0xffff  #define RREF_ANY  0xffff
785    
786  /* Error code numbers. They are given names so that they can more easily be  /* Error code numbers. They are given names so that they can more easily be
787  tracked. */  tracked. */
# Line 722  enum { ERR0, ERR1, ERR2, ERR3, ERR4, Line 791  enum { ERR0, ERR1, ERR2, ERR3, ERR4,
791         ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29,         ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29,
792         ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,         ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
793         ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,         ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
794         ERR50, ERR51 };         ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57 };
795    
796  /* The real format of the start of the pcre block; the index of names and the  /* The real format of the start of the pcre block; the index of names and the
797  code vector run on as long as necessary after the end. We store an explicit  code vector run on as long as necessary after the end. We store an explicit
# Line 777  typedef struct compile_data { Line 846  typedef struct compile_data {
846    const uschar *fcc;            /* Points to case-flipping table */    const uschar *fcc;            /* Points to case-flipping table */
847    const uschar *cbits;          /* Points to character type table */    const uschar *cbits;          /* Points to character type table */
848    const uschar *ctypes;         /* Points to table of type maps */    const uschar *ctypes;         /* Points to table of type maps */
849      const uschar *start_workspace;/* The start of working space */
850    const uschar *start_code;     /* The start of the compiled code */    const uschar *start_code;     /* The start of the compiled code */
851    const uschar *start_pattern;  /* The start of the pattern */    const uschar *start_pattern;  /* The start of the pattern */
852      const uschar *end_pattern;    /* The end of the pattern */
853      uschar *hwm;                  /* High watermark of workspace */
854    uschar *name_table;           /* The name/number table */    uschar *name_table;           /* The name/number table */
855    int  names_found;             /* Number of entries so far */    int  names_found;             /* Number of entries so far */
856    int  name_entry_size;         /* Size of each entry */    int  name_entry_size;         /* Size of each entry */
857      int  bracount;                /* Count of capturing parens */
858    int  top_backref;             /* Maximum back reference */    int  top_backref;             /* Maximum back reference */
859    unsigned int backref_map;     /* Bitmap of low back refs */    unsigned int backref_map;     /* Bitmap of low back refs */
860      int  external_options;        /* External (initial) options */
861    int  req_varyopt;             /* "After variable item" flag for reqbyte */    int  req_varyopt;             /* "After variable item" flag for reqbyte */
862    BOOL nopartial;               /* Set TRUE if partial won't work */    BOOL nopartial;               /* Set TRUE if partial won't work */
863    int  nllen;                   /* 1 or 2 for newline string length */    int  nltype;                  /* Newline type */
864    uschar nl[4];                 /* Newline string */    int  nllen;                   /* Newline string length */
865      uschar nl[4];                 /* Newline string when fixed length */
866  } compile_data;  } compile_data;
867    
868  /* Structure for maintaining a chain of pointers to the currently incomplete  /* Structure for maintaining a chain of pointers to the currently incomplete
# Line 820  This isn't used for a "normal" compilati Line 895  This isn't used for a "normal" compilati
895    
896  struct heapframe;  struct heapframe;
897    
898    /* Structure for building a chain of data for holding the values of the subject
899    pointer at the start of each subpattern, so as to detect when an empty string
900    has been matched by a subpattern - to break infinite loops. */
901    
902    typedef struct eptrblock {
903      struct eptrblock *epb_prev;
904      USPTR epb_saved_eptr;
905    } eptrblock;
906    
907    
908  /* Structure for passing "static" information around between the functions  /* Structure for passing "static" information around between the functions
909  doing traditional NFA matching, so that they are thread-safe. */  doing traditional NFA matching, so that they are thread-safe. */
910    
# Line 830  typedef struct match_data { Line 915  typedef struct match_data {
915    int   *offset_vector;         /* Offset vector */    int   *offset_vector;         /* Offset vector */
916    int    offset_end;            /* One past the end */    int    offset_end;            /* One past the end */
917    int    offset_max;            /* The maximum usable for return data */    int    offset_max;            /* The maximum usable for return data */
918    int    nllen;                 /* 1 or 2 for newline string length */    int    nltype;                /* Newline type */
919    uschar nl[4];                 /* Newline string */    int    nllen;                 /* Newline string length */
920      uschar nl[4];                 /* Newline string when fixed */
921    const uschar *lcc;            /* Points to lower casing table */    const uschar *lcc;            /* Points to lower casing table */
922    const uschar *ctypes;         /* Points to table of type maps */    const uschar *ctypes;         /* Points to table of type maps */
923    BOOL   offset_overflow;       /* Set if too many extractions */    BOOL   offset_overflow;       /* Set if too many extractions */
# Line 850  typedef struct match_data { Line 936  typedef struct match_data {
936    int    end_offset_top;        /* Highwater mark at end of match */    int    end_offset_top;        /* Highwater mark at end of match */
937    int    capture_last;          /* Most recent capture number */    int    capture_last;          /* Most recent capture number */
938    int    start_offset;          /* The start offset value */    int    start_offset;          /* The start offset value */
939      eptrblock *eptrchain;         /* Chain of eptrblocks for tail recursions */
940      int    eptrn;                 /* Next free eptrblock */
941    recursion_info *recursive;    /* Linked list of recursion data */    recursion_info *recursive;    /* Linked list of recursion data */
942    void  *callout_data;          /* To pass back to callouts */    void  *callout_data;          /* To pass back to callouts */
943    struct heapframe *thisframe;  /* Used only when compiling for no recursion */    struct heapframe *thisframe;  /* Used only when compiling for no recursion */
# Line 865  typedef struct dfa_match_data { Line 953  typedef struct dfa_match_data {
953    const uschar *tables;         /* Character tables */    const uschar *tables;         /* Character tables */
954    int   moptions;               /* Match options */    int   moptions;               /* Match options */
955    int   poptions;               /* Pattern options */    int   poptions;               /* Pattern options */
956    int    nllen;                 /* 1 or 2 for newline string length */    int    nltype;                /* Newline type */
957    uschar nl[4];                 /* Newline string */    int    nllen;                 /* Newline string length */
958      uschar nl[4];                 /* Newline string when fixed */
959    void  *callout_data;          /* To pass back to callouts */    void  *callout_data;          /* To pass back to callouts */
960  } dfa_match_data;  } dfa_match_data;
961    
# Line 937  extern const uschar _pcre_OP_lengths[]; Line 1026  extern const uschar _pcre_OP_lengths[];
1026  one of the exported public functions. They have to be "external" in the C  one of the exported public functions. They have to be "external" in the C
1027  sense, but are not part of the PCRE public API. */  sense, but are not part of the PCRE public API. */
1028    
1029  extern int         _pcre_ord2utf8(int, uschar *);  extern BOOL         _pcre_is_newline(const uschar *, const uschar *, int *,
1030  extern real_pcre * _pcre_try_flipped(const real_pcre *, real_pcre *,                        BOOL);
1031                       const pcre_study_data *, pcre_study_data *);  extern int          _pcre_ord2utf8(int, uschar *);
1032  extern int         _pcre_ucp_findprop(const unsigned int, int *, int *);  extern real_pcre   *_pcre_try_flipped(const real_pcre *, real_pcre *,
1033  extern int         _pcre_ucp_othercase(const int);                        const pcre_study_data *, pcre_study_data *);
1034  extern int         _pcre_valid_utf8(const uschar *, int);  extern int          _pcre_ucp_findprop(const unsigned int, int *, int *);
1035  extern BOOL        _pcre_xclass(int, const uschar *);  extern unsigned int _pcre_ucp_othercase(const unsigned int);
1036    extern int          _pcre_valid_utf8(const uschar *, int);
1037    extern BOOL         _pcre_was_newline(const uschar *, const uschar *, int *,
1038                          BOOL);
1039    extern BOOL         _pcre_xclass(int, const uschar *);
1040    
1041  #endif  #endif
1042    

Legend:
Removed from v.91  
changed lines
  Added in v.93

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12