/[pcre]/code/trunk/pcre.c
ViewVC logotype

Diff of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 6 by nigel, Sat Feb 24 21:38:05 2007 UTC revision 7 by nigel, Sat Feb 24 21:38:09 2007 UTC
# Line 48  static char rep_max[] = { 0, 0, 0, 0, 1, Line 48  static char rep_max[] = { 0, 0, 0, 0, 1,
48  /* Text forms of OP_ values and things, for debugging */  /* Text forms of OP_ values and things, for debugging */
49    
50  #ifdef DEBUG  #ifdef DEBUG
51  static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d",  static const char *OP_names[] = {
52      "End", "\\A", "\\B", "\\b", "\\D", "\\d",
53    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",
54    "not",    "not",
55    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
# Line 81  static short int escapes[] = { Line 82  static short int escapes[] = {
82    
83  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
84    
85  static BOOL compile_regex(int, int *,uschar **,uschar **,char **);  static BOOL compile_regex(int, int *, uschar **, const uschar **, const char **);
86    
87  /* Structure for passing "static" information around between the functions  /* Structure for passing "static" information around between the functions
88  doing the matching, so that they are thread-safe. */  doing the matching, so that they are thread-safe. */
# Line 98  typedef struct match_data { Line 99  typedef struct match_data {
99    BOOL   noteol;                /* NOTEOL flag */    BOOL   noteol;                /* NOTEOL flag */
100    BOOL   dotall;                /* Dot matches any char */    BOOL   dotall;                /* Dot matches any char */
101    BOOL   endonly;               /* Dollar not before final \n */    BOOL   endonly;               /* Dollar not before final \n */
102    uschar *start_subject;        /* Start of the subject string */    const uschar *start_subject;  /* Start of the subject string */
103    uschar *end_subject;          /* End of the subject string */    const uschar *end_subject;    /* End of the subject string */
104    jmp_buf fail_env;             /* Environment for longjump() break out */    jmp_buf fail_env;             /* Environment for longjump() break out */
105    uschar *end_match_ptr;        /* Subject position at end match */    const uschar *end_match_ptr;  /* Subject position at end match */
106    int     end_offset_top;       /* Highwater mark at end of match */    int     end_offset_top;       /* Highwater mark at end of match */
107  } match_data;  } match_data;
108    
# Line 126  void (*pcre_free)(void *) = free; Line 127  void (*pcre_free)(void *) = free;
127  *          Return version string                 *  *          Return version string                 *
128  *************************************************/  *************************************************/
129    
130  char *  const char *
131  pcre_version(void)  pcre_version(void)
132  {  {
133  return PCRE_VERSION;  return PCRE_VERSION;
# Line 156  Returns: number of identifying ex Line 157  Returns: number of identifying ex
157  int  int
158  pcre_info(const pcre *external_re, int *optptr, int *first_char)  pcre_info(const pcre *external_re, int *optptr, int *first_char)
159  {  {
160  real_pcre *re = (real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
161  if (re == NULL) return PCRE_ERROR_NULL;  if (re == NULL) return PCRE_ERROR_NULL;
162  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
163  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);
# Line 360  Returns: zero or positive => a data Line 361  Returns: zero or positive => a data
361  */  */
362    
363  static int  static int
364  check_escape(uschar **ptrptr, char **errorptr, int bracount, int options,  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
365    BOOL isclass)    int options, BOOL isclass)
366  {  {
367  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
368  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */
369  int i;  int i;
370    
# Line 382  else if ((i = escapes[c - '0']) != 0) c Line 383  else if ((i = escapes[c - '0']) != 0) c
383    
384  else  else
385    {    {
386    uschar *oldptr;    const uschar *oldptr;
387    switch (c)    switch (c)
388      {      {
389      /* The handling of escape sequences consisting of a string of digits      /* The handling of escape sequences consisting of a string of digits
# Line 502  Returns: TRUE or FALSE Line 503  Returns: TRUE or FALSE
503  */  */
504    
505  static BOOL  static BOOL
506  is_counted_repeat(uschar *p)  is_counted_repeat(const uschar *p)
507  {  {
508  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;
509  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;
# Line 537  Returns: pointer to '}' on success; Line 538  Returns: pointer to '}' on success;
538               current ptr on error, with errorptr set               current ptr on error, with errorptr set
539  */  */
540    
541  static uschar *  static const uschar *
542  read_repeat_counts(uschar *p, int *minp, int *maxp, char **errorptr)  read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr)
543  {  {
544  int min = 0;  int min = 0;
545  int max = -1;  int max = -1;
# Line 592  Returns: TRUE on success Line 593  Returns: TRUE on success
593  */  */
594    
595  static BOOL  static BOOL
596  compile_branch(int options, int *brackets, uschar **codeptr, uschar **ptrptr,  compile_branch(int options, int *brackets, uschar **codeptr,
597    char **errorptr)    const uschar **ptrptr, const char **errorptr)
598  {  {
599  int repeat_type, op_type;  int repeat_type, op_type;
600  int repeat_min, repeat_max;  int repeat_min, repeat_max;
601  int bravalue, length;  int bravalue, length;
602  register int c;  register int c;
603  register uschar *code = *codeptr;  register uschar *code = *codeptr;
604  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
605    const uschar *oldptr;
606  uschar *previous = NULL;  uschar *previous = NULL;
 uschar *oldptr;  
607  uschar class[32];  uschar class[32];
608    
609  /* Switch on next character until the end of the branch */  /* Switch on next character until the end of the branch */
# Line 697  for (;; ptr++) Line 698  for (;; ptr++)
698        /* Backslash may introduce a single character, or it may introduce one        /* Backslash may introduce a single character, or it may introduce one
699        of the specials, which just set a flag. Escaped items are checked for        of the specials, which just set a flag. Escaped items are checked for
700        validity in the pre-compiling pass. The sequence \b is a special case.        validity in the pre-compiling pass. The sequence \b is a special case.
701        Inside a class (and only there) it is treated as backslash. Elsewhere        Inside a class (and only there) it is treated as backspace. Elsewhere
702        it marks a word boundary. Other escapes have preset maps ready to        it marks a word boundary. Other escapes have preset maps ready to
703        or into the one we are building. We assume they have more than one        or into the one we are building. We assume they have more than one
704        character in them, so set class_count bigger than one. */        character in them, so set class_count bigger than one. */
# Line 1214  for (;; ptr++) Line 1215  for (;; ptr++)
1215        continue;        continue;
1216        }        }
1217    
1218      /* Reset and fall through */      /* Data character: reset and fall through */
1219    
1220      ptr = oldptr;      ptr = oldptr;
1221      c = '\\';      c = '\\';
# Line 1305  Returns: TRUE on success Line 1306  Returns: TRUE on success
1306  */  */
1307    
1308  static BOOL  static BOOL
1309  compile_regex(int options, int *brackets, uschar **codeptr, uschar **ptrptr,  compile_regex(int options, int *brackets, uschar **codeptr,
1310    char **errorptr)    const uschar **ptrptr, const char **errorptr)
1311  {  {
1312  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
1313  uschar *code = *codeptr;  uschar *code = *codeptr;
1314  uschar *start_bracket = code;  uschar *start_bracket = code;
1315    
# Line 1374  Returns: TRUE or FALSE Line 1375  Returns: TRUE or FALSE
1375  */  */
1376    
1377  static BOOL  static BOOL
1378  is_anchored(register uschar *code, BOOL multiline)  is_anchored(register const uschar *code, BOOL multiline)
1379  {  {
1380  do {  do {
1381     int op = (int)code[3];     int op = (int)code[3];
# Line 1403  Returns: TRUE or FALSE Line 1404  Returns: TRUE or FALSE
1404  */  */
1405    
1406  static BOOL  static BOOL
1407  is_startline(uschar *code)  is_startline(const uschar *code)
1408  {  {
1409  do {  do {
1410     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)
# Line 1488  Returns: pointer to compiled data Line 1489  Returns: pointer to compiled data
1489  */  */
1490    
1491  pcre *  pcre *
1492  pcre_compile(const char *pattern, int options, char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
1493    int *erroroffset)    int *erroroffset)
1494  {  {
1495  real_pcre *re;  real_pcre *re;
# Line 1498  int runlength; Line 1499  int runlength;
1499  int c, size;  int c, size;
1500  int bracount = 0;  int bracount = 0;
1501  int brastack[200];  int brastack[200];
 int brastackptr = 0;  
1502  int top_backref = 0;  int top_backref = 0;
1503  uschar *code, *ptr;  unsigned int brastackptr = 0;
1504    uschar *code;
1505    const uschar *ptr;
1506    
1507  #ifdef DEBUG  #ifdef DEBUG
1508  uschar *code_base, *code_end;  uschar *code_base, *code_end;
# Line 1539  internal flag settings. Make an attempt Line 1541  internal flag settings. Make an attempt
1541  if an "extended" flag setting appears late in the pattern. We can't be so  if an "extended" flag setting appears late in the pattern. We can't be so
1542  clever for #-comments. */  clever for #-comments. */
1543    
1544  ptr = (uschar *)(pattern - 1);  ptr = (const uschar *)(pattern - 1);
1545  while ((c = *(++ptr)) != 0)  while ((c = *(++ptr)) != 0)
1546    {    {
1547    int min, max;    int min, max;
# Line 1566  while ((c = *(++ptr)) != 0) Line 1568  while ((c = *(++ptr)) != 0)
1568    
1569      case '\\':      case '\\':
1570        {        {
1571        uschar *save_ptr = ptr;        const uschar *save_ptr = ptr;
1572        c = check_escape(&ptr, errorptr, bracount, options, FALSE);        c = check_escape(&ptr, errorptr, bracount, options, FALSE);
1573        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1574        if (c >= 0)        if (c >= 0)
# Line 1831  while ((c = *(++ptr)) != 0) Line 1833  while ((c = *(++ptr)) != 0)
1833    
1834        if (c == '\\')        if (c == '\\')
1835          {          {
1836          uschar *saveptr = ptr;          const uschar *saveptr = ptr;
1837          c = check_escape(&ptr, errorptr, bracount, options, FALSE);          c = check_escape(&ptr, errorptr, bracount, options, FALSE);
1838          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1839          if (c < 0) { ptr = saveptr; break; }          if (c < 0) { ptr = saveptr; break; }
# Line 1879  re->options = options; Line 1881  re->options = options;
1881  error, *errorptr will be set non-NULL, so we don't need to look at the result  error, *errorptr will be set non-NULL, so we don't need to look at the result
1882  of the function here. */  of the function here. */
1883    
1884  ptr = (uschar *)pattern;  ptr = (const uschar *)pattern;
1885  code = re->code;  code = re->code;
1886  *code = OP_BRA;  *code = OP_BRA;
1887  bracount = 0;  bracount = 0;
# Line 1906  if (*errorptr != NULL) Line 1908  if (*errorptr != NULL)
1908    {    {
1909    (pcre_free)(re);    (pcre_free)(re);
1910    PCRE_ERROR_RETURN:    PCRE_ERROR_RETURN:
1911    *erroroffset = ptr - (uschar *)pattern;    *erroroffset = ptr - (const uschar *)pattern;
1912    return NULL;    return NULL;
1913    }    }
1914    
# Line 2204  Returns: TRUE if matched Line 2206  Returns: TRUE if matched
2206  */  */
2207    
2208  static BOOL  static BOOL
2209  match_ref(int number, register uschar *eptr, int length, match_data *md)  match_ref(int number, register const uschar *eptr, int length, match_data *md)
2210  {  {
2211  uschar *p = md->start_subject + md->offset_vector[number];  const uschar *p = md->start_subject + md->offset_vector[number];
2212    
2213  #ifdef DEBUG  #ifdef DEBUG
2214  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 2253  Returns: TRUE if matched Line 2255  Returns: TRUE if matched
2255  */  */
2256    
2257  static BOOL  static BOOL
2258  match(register uschar *eptr, register uschar *ecode, int offset_top,  match(register const uschar *eptr, register const uschar *ecode, int offset_top,
2259    match_data *md)    match_data *md)
2260  {  {
2261  for (;;)  for (;;)
# Line 2261  for (;;) Line 2263  for (;;)
2263    int min, max, ctype;    int min, max, ctype;
2264    register int i;    register int i;
2265    register int c;    register int c;
2266    BOOL minimize;    BOOL minimize = FALSE;
2267    
2268    /* Opening bracket. Check the alternative branches in turn, failing if none    /* Opening bracket. Check the alternative branches in turn, failing if none
2269    match. We have to set the start offset if required and there is space    match. We have to set the start offset if required and there is space
# Line 2274  for (;;) Line 2276  for (;;)
2276    if ((int)*ecode >= OP_BRA)    if ((int)*ecode >= OP_BRA)
2277      {      {
2278      int number = (*ecode - OP_BRA) << 1;      int number = (*ecode - OP_BRA) << 1;
2279      int save_offset1, save_offset2;      int save_offset1 = 0, save_offset2 = 0;
2280    
2281      #ifdef DEBUG      #ifdef DEBUG
2282      printf("start bracket %d\n", number/2);      printf("start bracket %d\n", number/2);
# Line 2401  for (;;) Line 2403  for (;;)
2403    
2404      case OP_BRAZERO:      case OP_BRAZERO:
2405        {        {
2406        uschar *next = ecode+1;        const uschar *next = ecode+1;
2407        if (match(eptr, next, offset_top, md)) return TRUE;        if (match(eptr, next, offset_top, md)) return TRUE;
2408        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
2409        ecode = next + 3;        ecode = next + 3;
# Line 2410  for (;;) Line 2412  for (;;)
2412    
2413      case OP_BRAMINZERO:      case OP_BRAMINZERO:
2414        {        {
2415        uschar *next = ecode+1;        const uschar *next = ecode+1;
2416        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
2417        if (match(eptr, next+3, offset_top, md)) return TRUE;        if (match(eptr, next+3, offset_top, md)) return TRUE;
2418        ecode++;        ecode++;
# Line 2426  for (;;) Line 2428  for (;;)
2428      case OP_KETRMAX:      case OP_KETRMAX:
2429        {        {
2430        int number;        int number;
2431        uschar *prev = ecode - (ecode[1] << 8) - ecode[2];        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
2432    
2433        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)
2434          {          {
# Line 2675  for (;;) Line 2677  for (;;)
2677    
2678        else        else
2679          {          {
2680          uschar *pp = eptr;          const uschar *pp = eptr;
2681          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2682            {            {
2683            if (!match_ref(number, eptr, length, md)) break;            if (!match_ref(number, eptr, length, md)) break;
# Line 2699  for (;;) Line 2701  for (;;)
2701    
2702      case OP_CLASS:      case OP_CLASS:
2703        {        {
2704        uschar *data = ecode + 1;  /* Save for matching */        const uschar *data = ecode + 1;  /* Save for matching */
2705        ecode += 33;               /* Advance past the item */        ecode += 33;                     /* Advance past the item */
2706    
2707        switch (*ecode)        switch (*ecode)
2708          {          {
# Line 2783  for (;;) Line 2785  for (;;)
2785    
2786        else        else
2787          {          {
2788          uschar *pp = eptr;          const uschar *pp = eptr;
2789          for (i = min; i < max; eptr++, i++)          for (i = min; i < max; eptr++, i++)
2790            {            {
2791            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
# Line 2901  for (;;) Line 2903  for (;;)
2903          }          }
2904        else        else
2905          {          {
2906          uschar *pp = eptr;          const uschar *pp = eptr;
2907          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2908            {            {
2909            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;
# Line 2931  for (;;) Line 2933  for (;;)
2933          }          }
2934        else        else
2935          {          {
2936          uschar *pp = eptr;          const uschar *pp = eptr;
2937          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2938            {            {
2939            if (eptr >= md->end_subject || c != *eptr) break;            if (eptr >= md->end_subject || c != *eptr) break;
# Line 3028  for (;;) Line 3030  for (;;)
3030          }          }
3031        else        else
3032          {          {
3033          uschar *pp = eptr;          const uschar *pp = eptr;
3034          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3035            {            {
3036            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;
# Line 3058  for (;;) Line 3060  for (;;)
3060          }          }
3061        else        else
3062          {          {
3063          uschar *pp = eptr;          const uschar *pp = eptr;
3064          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3065            {            {
3066            if (eptr >= md->end_subject || c == *eptr) break;            if (eptr >= md->end_subject || c == *eptr) break;
# Line 3175  for (;;) Line 3177  for (;;)
3177    
3178      else      else
3179        {        {
3180        uschar *pp = eptr;        const uschar *pp = eptr;
3181        switch(ctype)        switch(ctype)
3182          {          {
3183          case OP_ANY:          case OP_ANY:
# Line 3307  int resetcount; Line 3309  int resetcount;
3309  int ocount = offsetcount;  int ocount = offsetcount;
3310  int first_char = -1;  int first_char = -1;
3311  match_data match_block;  match_data match_block;
3312  uschar *start_bits = NULL;  const uschar *start_bits = NULL;
3313  uschar *start_match = (uschar *)subject;  const uschar *start_match = (uschar *)subject;
3314  uschar *end_subject;  const uschar *end_subject;
3315  real_pcre *re = (real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
3316  real_pcre_extra *extra = (real_pcre_extra *)external_extra;  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
3317  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
3318  BOOL startline = (re->options & PCRE_STARTLINE) != 0;  BOOL startline = (re->options & PCRE_STARTLINE) != 0;
3319    
# Line 3321  if (re == NULL || subject == NULL || Line 3323  if (re == NULL || subject == NULL ||
3323     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3324  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
3325    
3326  match_block.start_subject = (uschar *)subject;  match_block.start_subject = (const uschar *)subject;
3327  match_block.end_subject = match_block.start_subject + length;  match_block.end_subject = match_block.start_subject + length;
3328  end_subject = match_block.end_subject;  end_subject = match_block.end_subject;
3329    

Legend:
Removed from v.6  
changed lines
  Added in v.7

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12