/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 85 by nigel, Sat Feb 24 21:41:13 2007 UTC revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2006 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 54  are on the heap, not on the stack. */ Line 54  are on the heap, not on the stack. */
54    
55  typedef struct eptrblock {  typedef struct eptrblock {
56    struct eptrblock *epb_prev;    struct eptrblock *epb_prev;
57    const uschar *epb_saved_eptr;    USPTR epb_saved_eptr;
58  } eptrblock;  } eptrblock;
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
# Line 128  Returns: TRUE if matched Line 128  Returns: TRUE if matched
128  */  */
129    
130  static BOOL  static BOOL
131  match_ref(int offset, register const uschar *eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
132    unsigned long int ims)    unsigned long int ims)
133  {  {
134  const uschar *p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
135    
136  #ifdef DEBUG  #ifdef DEBUG
137  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 169  return TRUE; Line 169  return TRUE;
169  ****************************************************************************  ****************************************************************************
170                     RECURSION IN THE match() FUNCTION                     RECURSION IN THE match() FUNCTION
171    
172  The match() function is highly recursive. Some regular expressions can cause  The match() function is highly recursive, though not every recursive call
173  it to recurse thousands of times. I was writing for Unix, so I just let it  increases the recursive depth. Nevertheless, some regular expressions can cause
174  call itself recursively. This uses the stack for saving everything that has  it to recurse to a great depth. I was writing for Unix, so I just let it call
175  to be saved for a recursive call. On Unix, the stack can be large, and this  itself recursively. This uses the stack for saving everything that has to be
176  works fine.  saved for a recursive call. On Unix, the stack can be large, and this works
177    fine.
178  It turns out that on non-Unix systems there are problems with programs that  
179  use a lot of stack. (This despite the fact that every last chip has oodles  It turns out that on some non-Unix-like systems there are problems with
180  of memory these days, and techniques for extending the stack have been known  programs that use a lot of stack. (This despite the fact that every last chip
181  for decades.) So....  has oodles of memory these days, and techniques for extending the stack have
182    been known for decades.) So....
183    
184  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
185  calls by keeping local variables that need to be preserved in blocks of memory  calls by keeping local variables that need to be preserved in blocks of memory
186  obtained from malloc instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
187  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
188  always used to.  always used to.
189  ****************************************************************************  ****************************************************************************
190  ***************************************************************************/  ***************************************************************************/
191    
192    
193  /* These versions of the macros use the stack, as normal */  /* These versions of the macros use the stack, as normal. There are debugging
194    versions and production versions. */
195    
196  #ifndef NO_RECURSE  #ifndef NO_RECURSE
197  #define REGISTER register  #define REGISTER register
198  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) rx = match(ra,rb,rc,rd,re,rf,rg)  #ifdef DEBUG
199    #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
200      { \
201      printf("match() called in line %d\n", __LINE__); \
202      rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \
203      printf("to line %d\n", __LINE__); \
204      }
205    #define RRETURN(ra) \
206      { \
207      printf("match() returned %d from line %d ", ra, __LINE__); \
208      return ra; \
209      }
210    #else
211    #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
212      rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)
213  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
214    #endif
215    
216  #else  #else
217    
218    
# Line 215  match(), which never changes. */ Line 233  match(), which never changes. */
233      newframe->Xims = re;\      newframe->Xims = re;\
234      newframe->Xeptrb = rf;\      newframe->Xeptrb = rf;\
235      newframe->Xflags = rg;\      newframe->Xflags = rg;\
236        newframe->Xrdepth = frame->Xrdepth + 1;\
237      newframe->Xprevframe = frame;\      newframe->Xprevframe = frame;\
238      frame = newframe;\      frame = newframe;\
239      DPRINTF(("restarting from line %d\n", __LINE__));\      DPRINTF(("restarting from line %d\n", __LINE__));\
# Line 256  typedef struct heapframe { Line 275  typedef struct heapframe {
275    long int Xims;    long int Xims;
276    eptrblock *Xeptrb;    eptrblock *Xeptrb;
277    int Xflags;    int Xflags;
278      int Xrdepth;
279    
280    /* Function local variables */    /* Function local variables */
281    
# Line 278  typedef struct heapframe { Line 298  typedef struct heapframe {
298    
299  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
300    int Xprop_type;    int Xprop_type;
301      int Xprop_value;
302    int Xprop_fail_result;    int Xprop_fail_result;
303    int Xprop_category;    int Xprop_category;
304    int Xprop_chartype;    int Xprop_chartype;
305    int Xprop_othercase;    int Xprop_script;
   int Xprop_test_against;  
306    int *Xprop_test_variable;    int *Xprop_test_variable;
307  #endif  #endif
308    
# Line 343  Arguments: Line 363  Arguments:
363     flags       can contain     flags       can contain
364                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
365                   match_isgroup - this is the start of a bracketed group                   match_isgroup - this is the start of a bracketed group
366       rdepth      the recursion depth
367    
368  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
369                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
370                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
371                   (e.g. stopped by recursion limit)                   (e.g. stopped by repeated call or recursion limit)
372  */  */
373    
374  static int  static int
375  match(REGISTER const uschar *eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,
376    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
377    int flags)    int flags, int rdepth)
378  {  {
379  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
380  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark them with "register"
# Line 381  frame->Xoffset_top = offset_top; Line 402  frame->Xoffset_top = offset_top;
402  frame->Xims = ims;  frame->Xims = ims;
403  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
404  frame->Xflags = flags;  frame->Xflags = flags;
405    frame->Xrdepth = rdepth;
406    
407  /* This is where control jumps back to to effect "recursion" */  /* This is where control jumps back to to effect "recursion" */
408    
# Line 394  HEAP_RECURSE: Line 416  HEAP_RECURSE:
416  #define ims                frame->Xims  #define ims                frame->Xims
417  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
418  #define flags              frame->Xflags  #define flags              frame->Xflags
419    #define rdepth             frame->Xrdepth
420    
421  /* Ditto for the local variables */  /* Ditto for the local variables */
422    
# Line 418  HEAP_RECURSE: Line 441  HEAP_RECURSE:
441    
442  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
443  #define prop_type          frame->Xprop_type  #define prop_type          frame->Xprop_type
444    #define prop_value         frame->Xprop_value
445  #define prop_fail_result   frame->Xprop_fail_result  #define prop_fail_result   frame->Xprop_fail_result
446  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
447  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
448  #define prop_othercase     frame->Xprop_othercase  #define prop_script        frame->Xprop_script
 #define prop_test_against  frame->Xprop_test_against  
449  #define prop_test_variable frame->Xprop_test_variable  #define prop_test_variable frame->Xprop_test_variable
450  #endif  #endif
451    
# Line 452  i, and fc and c, can be the same variabl Line 475  i, and fc and c, can be the same variabl
475  #define fc c  #define fc c
476    
477    
478  #ifdef SUPPORT_UTF8                /* Many of these variables are used ony */  #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
479  const uschar *charptr;             /* small blocks of the code. My normal  */  const uschar *charptr;             /* in small blocks of the code. My normal */
480  #endif                             /* style of coding would have declared  */  #endif                             /* style of coding would have declared    */
481  const uschar *callpat;             /* them within each of those blocks.    */  const uschar *callpat;             /* them within each of those blocks.      */
482  const uschar *data;                /* However, in order to accommodate the */  const uschar *data;                /* However, in order to accommodate the   */
483  const uschar *next;                /* version of this code that uses an    */  const uschar *next;                /* version of this code that uses an      */
484  const uschar *pp;                  /* external "stack" implemented on the  */  USPTR         pp;                  /* external "stack" implemented on the    */
485  const uschar *prev;                /* heap, it is easier to declare them   */  const uschar *prev;                /* heap, it is easier to declare them all */
486  const uschar *saved_eptr;          /* all here, so the declarations can    */  USPTR         saved_eptr;          /* here, so the declarations can be cut   */
487                                     /* be cut out in a block. The only      */                                     /* out in a block. The only declarations  */
488  recursion_info new_recursive;      /* declarations within blocks below are */  recursion_info new_recursive;      /* within blocks below are for variables  */
489                                     /* for variables that do not have to    */                                     /* that do not have to be preserved over  */
490  BOOL cur_is_word;                  /* be preserved over a recursive call   */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
491  BOOL condition;                    /* to RMATCH().                         */  BOOL condition;
492  BOOL minimize;  BOOL minimize;
493  BOOL prev_is_word;  BOOL prev_is_word;
494    
# Line 473  unsigned long int original_ims; Line 496  unsigned long int original_ims;
496    
497  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
498  int prop_type;  int prop_type;
499    int prop_value;
500  int prop_fail_result;  int prop_fail_result;
501  int prop_category;  int prop_category;
502  int prop_chartype;  int prop_chartype;
503  int prop_othercase;  int prop_script;
 int prop_test_against;  
504  int *prop_test_variable;  int *prop_test_variable;
505  #endif  #endif
506    
# Line 499  eptrblock newptrb; Line 522  eptrblock newptrb;
522  variables. */  variables. */
523    
524  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
525    prop_value = 0;
526  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_against = 0;  
527  prop_test_variable = NULL;  prop_test_variable = NULL;
528  #endif  #endif
529    
530  /* OK, now we can get on with the real code of the function. Recursion is  /* OK, now we can get on with the real code of the function. Recursive calls
531  specified by the macros RMATCH and RRETURN. When NO_RECURSE is *not* defined,  are specified by the macro RMATCH and RRETURN is used to return. When
532  these just turn into a recursive call to match() and a "return", respectively.  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
533  However, RMATCH isn't like a function call because it's quite a complicated  and a "return", respectively (possibly with some debugging if DEBUG is
534  macro. It has to be used in one particular way. This shouldn't, however, impact  defined). However, RMATCH isn't like a function call because it's quite a
535  performance when true recursion is being used. */  complicated macro. It has to be used in one particular way. This shouldn't,
536    however, impact performance when true recursion is being used. */
537    
538    /* First check that we haven't called match() too many times, or that we
539    haven't exceeded the recursive call limit. */
540    
541  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
542    if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
543    
544  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
545  utf8 = md->utf8;       /* Local copy of the flag */  utf8 = md->utf8;       /* Local copy of the flag */
# Line 681  for (;;) Line 709  for (;;)
709      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
710        {        {
711        recursion_info *rec = md->recursive;        recursion_info *rec = md->recursive;
712        DPRINTF(("Hit the end in a (?0) recursion\n"));        DPRINTF(("End of pattern in a (?0) recursion\n"));
713        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
714        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
715          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
# Line 800  for (;;) Line 828  for (;;)
828        cb.version          = 1;   /* Version 1 of the callout block */        cb.version          = 1;   /* Version 1 of the callout block */
829        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
830        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
831        cb.subject          = (const char *)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
832        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
833        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = md->start_match - md->start_subject;
834        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
# Line 882  for (;;) Line 910  for (;;)
910              eptrb, match_isgroup);              eptrb, match_isgroup);
911          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
912            {            {
913              DPRINTF(("Recursion matched\n"));
914            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
915            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
916              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
917            RRETURN(MATCH_MATCH);            RRETURN(MATCH_MATCH);
918            }            }
919          else if (rrc != MATCH_NOMATCH) RRETURN(rrc);          else if (rrc != MATCH_NOMATCH)
920              {
921              DPRINTF(("Recursion gave error %d\n", rrc));
922              RRETURN(rrc);
923              }
924    
925          md->recursive = &new_recursive;          md->recursive = &new_recursive;
926          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
# Line 1352  for (;;) Line 1385  for (;;)
1385      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1386      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1387        {        {
1388        int chartype, rqdtype;        int chartype, script;
1389        int othercase;        int category = _pcre_ucp_findprop(c, &chartype, &script);
       int category = _pcre_ucp_findchar(c, &chartype, &othercase);  
1390    
1391        rqdtype = *(++ecode);        switch(ecode[1])
       ecode++;  
   
       if (rqdtype >= 128)  
1392          {          {
1393          if ((rqdtype - 128 != category) == (op == OP_PROP))          case PT_ANY:
1394            if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1395            break;
1396    
1397            case PT_LAMP:
1398            if ((chartype == ucp_Lu ||
1399                 chartype == ucp_Ll ||
1400                 chartype == ucp_Lt) == (op == OP_NOTPROP))
1401            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1402          }           break;
1403        else  
1404          {          case PT_GC:
1405          if ((rqdtype != chartype) == (op == OP_PROP))          if ((ecode[2] != category) == (op == OP_PROP))
1406              RRETURN(MATCH_NOMATCH);
1407            break;
1408    
1409            case PT_PC:
1410            if ((ecode[2] != chartype) == (op == OP_PROP))
1411              RRETURN(MATCH_NOMATCH);
1412            break;
1413    
1414            case PT_SC:
1415            if ((ecode[2] != script) == (op == OP_PROP))
1416            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1417            break;
1418    
1419            default:
1420            RRETURN(PCRE_ERROR_INTERNAL);
1421            break;
1422          }          }
1423    
1424          ecode += 3;
1425        }        }
1426      break;      break;
1427    
# Line 1379  for (;;) Line 1432  for (;;)
1432      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1433      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1434        {        {
1435        int chartype;        int chartype, script;
1436        int othercase;        int category = _pcre_ucp_findprop(c, &chartype, &script);
       int category = _pcre_ucp_findchar(c, &chartype, &othercase);  
1437        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1438        while (eptr < md->end_subject)        while (eptr < md->end_subject)
1439          {          {
# Line 1390  for (;;) Line 1442  for (;;)
1442            {            {
1443            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
1444            }            }
1445          category = _pcre_ucp_findchar(c, &chartype, &othercase);          category = _pcre_ucp_findprop(c, &chartype, &script);
1446          if (category != ucp_M) break;          if (category != ucp_M) break;
1447          eptr += len;          eptr += len;
1448          }          }
# Line 1683  for (;;) Line 1735  for (;;)
1735            while (eptr >= pp)            while (eptr >= pp)
1736              {              {
1737              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
             eptr--;  
1738              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1739                eptr--;
1740              }              }
1741            }            }
1742    
# Line 1841  for (;;) Line 1893  for (;;)
1893          ecode += length;          ecode += length;
1894    
1895          /* If we have Unicode property support, we can use it to test the other          /* If we have Unicode property support, we can use it to test the other
1896          case of the character, if there is one. The result of _pcre_ucp_findchar() is          case of the character, if there is one. */
         < 0 if the char isn't found, and othercase is returned as zero if there  
         isn't one. */  
1897    
1898          if (fc != dc)          if (fc != dc)
1899            {            {
1900  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1901            int chartype;            if (dc != _pcre_ucp_othercase(fc))
           int othercase;  
           if (_pcre_ucp_findchar(fc, &chartype, &othercase) < 0 || dc != othercase)  
1902  #endif  #endif
1903              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
1904            }            }
# Line 1918  for (;;) Line 1966  for (;;)
1966    
1967  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1968          int othercase;          int othercase;
         int chartype;  
1969          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
1970               _pcre_ucp_findchar(fc, &chartype, &othercase) >= 0 &&              (othercase = _pcre_ucp_othercase(fc)) >= 0 &&
1971               othercase > 0)               othercase >= 0)
1972            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
1973  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1974    
# Line 2408  for (;;) Line 2455  for (;;)
2455        {        {
2456        prop_fail_result = ctype == OP_NOTPROP;        prop_fail_result = ctype == OP_NOTPROP;
2457        prop_type = *ecode++;        prop_type = *ecode++;
2458        if (prop_type >= 128)        prop_value = *ecode++;
         {  
         prop_test_against = prop_type - 128;  
         prop_test_variable = &prop_category;  
         }  
       else  
         {  
         prop_test_against = prop_type;  
         prop_test_variable = &prop_chartype;  
         }  
2459        }        }
2460      else prop_type = -1;      else prop_type = -1;
2461  #endif  #endif
# Line 2434  for (;;) Line 2472  for (;;)
2472      if (min > 0)      if (min > 0)
2473        {        {
2474  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2475        if (prop_type > 0)        if (prop_type >= 0)
2476          {          {
2477          for (i = 1; i <= min; i++)          switch(prop_type)
2478            {            {
2479            GETCHARINC(c, eptr);            case PT_ANY:
2480            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2481            if ((*prop_test_variable == prop_test_against) == prop_fail_result)            for (i = 1; i <= min; i++)
2482              RRETURN(MATCH_NOMATCH);              {
2483                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2484                GETCHARINC(c, eptr);
2485                }
2486              break;
2487    
2488              case PT_LAMP:
2489              for (i = 1; i <= min; i++)
2490                {
2491                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2492                GETCHARINC(c, eptr);
2493                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2494                if ((prop_chartype == ucp_Lu ||
2495                     prop_chartype == ucp_Ll ||
2496                     prop_chartype == ucp_Lt) == prop_fail_result)
2497                  RRETURN(MATCH_NOMATCH);
2498                }
2499              break;
2500    
2501              case PT_GC:
2502              for (i = 1; i <= min; i++)
2503                {
2504                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2505                GETCHARINC(c, eptr);
2506                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2507                if ((prop_category == prop_value) == prop_fail_result)
2508                  RRETURN(MATCH_NOMATCH);
2509                }
2510              break;
2511    
2512              case PT_PC:
2513              for (i = 1; i <= min; i++)
2514                {
2515                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2516                GETCHARINC(c, eptr);
2517                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2518                if ((prop_chartype == prop_value) == prop_fail_result)
2519                  RRETURN(MATCH_NOMATCH);
2520                }
2521              break;
2522    
2523              case PT_SC:
2524              for (i = 1; i <= min; i++)
2525                {
2526                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2527                GETCHARINC(c, eptr);
2528                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2529                if ((prop_script == prop_value) == prop_fail_result)
2530                  RRETURN(MATCH_NOMATCH);
2531                }
2532              break;
2533    
2534              default:
2535              RRETURN(PCRE_ERROR_INTERNAL);
2536              break;
2537            }            }
2538          }          }
2539    
# Line 2453  for (;;) Line 2545  for (;;)
2545          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2546            {            {
2547            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
2548            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2549            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2550            while (eptr < md->end_subject)            while (eptr < md->end_subject)
2551              {              {
# Line 2462  for (;;) Line 2554  for (;;)
2554                {                {
2555                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
2556                }                }
2557              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2558              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
2559              eptr += len;              eptr += len;
2560              }              }
# Line 2624  for (;;) Line 2716  for (;;)
2716      if (minimize)      if (minimize)
2717        {        {
2718  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2719        if (prop_type > 0)        if (prop_type >= 0)
2720          {          {
2721          for (fi = min;; fi++)          switch(prop_type)
2722            {            {
2723            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            case PT_ANY:
2724            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            for (fi = min;; fi++)
2725            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              {
2726            GETCHARINC(c, eptr);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2727            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2728            if ((*prop_test_variable == prop_test_against) == prop_fail_result)              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2729              RRETURN(MATCH_NOMATCH);              GETCHARINC(c, eptr);
2730                if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2731                }
2732              break;
2733    
2734              case PT_LAMP:
2735              for (fi = min;; fi++)
2736                {
2737                RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2738                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2739                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2740                GETCHARINC(c, eptr);
2741                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2742                if ((prop_chartype == ucp_Lu ||
2743                     prop_chartype == ucp_Ll ||
2744                     prop_chartype == ucp_Lt) == prop_fail_result)
2745                  RRETURN(MATCH_NOMATCH);
2746                }
2747              break;
2748    
2749              case PT_GC:
2750              for (fi = min;; fi++)
2751                {
2752                RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2753                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2754                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2755                GETCHARINC(c, eptr);
2756                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2757                if ((prop_category == prop_value) == prop_fail_result)
2758                  RRETURN(MATCH_NOMATCH);
2759                }
2760              break;
2761    
2762              case PT_PC:
2763              for (fi = min;; fi++)
2764                {
2765                RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2766                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2767                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2768                GETCHARINC(c, eptr);
2769                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2770                if ((prop_chartype == prop_value) == prop_fail_result)
2771                  RRETURN(MATCH_NOMATCH);
2772                }
2773              break;
2774    
2775              case PT_SC:
2776              for (fi = min;; fi++)
2777                {
2778                RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2779                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2780                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2781                GETCHARINC(c, eptr);
2782                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2783                if ((prop_script == prop_value) == prop_fail_result)
2784                  RRETURN(MATCH_NOMATCH);
2785                }
2786              break;
2787    
2788              default:
2789              RRETURN(PCRE_ERROR_INTERNAL);
2790              break;
2791            }            }
2792          }          }
2793    
# Line 2649  for (;;) Line 2802  for (;;)
2802            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2803            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2804            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
2805            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2806            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2807            while (eptr < md->end_subject)            while (eptr < md->end_subject)
2808              {              {
# Line 2658  for (;;) Line 2811  for (;;)
2811                {                {
2812                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
2813                }                }
2814              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2815              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
2816              eptr += len;              eptr += len;
2817              }              }
# Line 2783  for (;;) Line 2936  for (;;)
2936        pp = eptr;  /* Remember where we started */        pp = eptr;  /* Remember where we started */
2937    
2938  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2939        if (prop_type > 0)        if (prop_type >= 0)
2940          {          {
2941          for (i = min; i < max; i++)          switch(prop_type)
2942            {            {
2943            int len = 1;            case PT_ANY:
2944            if (eptr >= md->end_subject) break;            for (i = min; i < max; i++)
2945            GETCHARLEN(c, eptr, len);              {
2946            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              int len = 1;
2947            if ((*prop_test_variable == prop_test_against) == prop_fail_result)              if (eptr >= md->end_subject) break;
2948              break;              GETCHARLEN(c, eptr, len);
2949            eptr+= len;              if (prop_fail_result) break;
2950                eptr+= len;
2951                }
2952              break;
2953    
2954              case PT_LAMP:
2955              for (i = min; i < max; i++)
2956                {
2957                int len = 1;
2958                if (eptr >= md->end_subject) break;
2959                GETCHARLEN(c, eptr, len);
2960                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2961                if ((prop_chartype == ucp_Lu ||
2962                     prop_chartype == ucp_Ll ||
2963                     prop_chartype == ucp_Lt) == prop_fail_result)
2964                  break;
2965                eptr+= len;
2966                }
2967              break;
2968    
2969              case PT_GC:
2970              for (i = min; i < max; i++)
2971                {
2972                int len = 1;
2973                if (eptr >= md->end_subject) break;
2974                GETCHARLEN(c, eptr, len);
2975                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2976                if ((prop_category == prop_value) == prop_fail_result)
2977                  break;
2978                eptr+= len;
2979                }
2980              break;
2981    
2982              case PT_PC:
2983              for (i = min; i < max; i++)
2984                {
2985                int len = 1;
2986                if (eptr >= md->end_subject) break;
2987                GETCHARLEN(c, eptr, len);
2988                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2989                if ((prop_chartype == prop_value) == prop_fail_result)
2990                  break;
2991                eptr+= len;
2992                }
2993              break;
2994    
2995              case PT_SC:
2996              for (i = min; i < max; i++)
2997                {
2998                int len = 1;
2999                if (eptr >= md->end_subject) break;
3000                GETCHARLEN(c, eptr, len);
3001                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3002                if ((prop_script == prop_value) == prop_fail_result)
3003                  break;
3004                eptr+= len;
3005                }
3006              break;
3007            }            }
3008    
3009          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
# Line 2816  for (;;) Line 3026  for (;;)
3026            {            {
3027            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3028            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3029            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3030            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
3031            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3032              {              {
# Line 2825  for (;;) Line 3035  for (;;)
3035                {                {
3036                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3037                }                }
3038              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3039              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3040              eptr += len;              eptr += len;
3041              }              }
# Line 2846  for (;;) Line 3056  for (;;)
3056                {                {
3057                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3058                }                }
3059              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3060              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3061              eptr--;              eptr--;
3062              }              }
# Line 3200  Returns: > 0 => success; value Line 3410  Returns: > 0 => success; value
3410                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3411  */  */
3412    
3413  PCRE_EXPORT int  PCRE_DATA_SCOPE int
3414  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
3415    const char *subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
3416    int offsetcount)    int offsetcount)
3417  {  {
3418  int rc, resetcount, ocount;  int rc, resetcount, ocount;
# Line 3219  BOOL req_byte_caseless = FALSE; Line 3429  BOOL req_byte_caseless = FALSE;
3429  match_data match_block;  match_data match_block;
3430  const uschar *tables;  const uschar *tables;
3431  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
3432  const uschar *start_match = (const uschar *)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
3433  const uschar *end_subject;  USPTR end_subject;
3434  const uschar *req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
3435    
3436  pcre_study_data internal_study;  pcre_study_data internal_study;
3437  const pcre_study_data *study;  const pcre_study_data *study;
# Line 3242  the default values. */ Line 3452  the default values. */
3452    
3453  study = NULL;  study = NULL;
3454  match_block.match_limit = MATCH_LIMIT;  match_block.match_limit = MATCH_LIMIT;
3455    match_block.match_limit_recursion = MATCH_LIMIT_RECURSION;
3456  match_block.callout_data = NULL;  match_block.callout_data = NULL;
3457    
3458  /* The table pointer is always in native byte order. */  /* The table pointer is always in native byte order. */
# Line 3255  if (extra_data != NULL) Line 3466  if (extra_data != NULL)
3466      study = (const pcre_study_data *)extra_data->study_data;      study = (const pcre_study_data *)extra_data->study_data;
3467    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
3468      match_block.match_limit = extra_data->match_limit;      match_block.match_limit = extra_data->match_limit;
3469      if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
3470        match_block.match_limit_recursion = extra_data->match_limit_recursion;
3471    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
3472      match_block.callout_data = extra_data->callout_data;      match_block.callout_data = extra_data->callout_data;
3473    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
# Line 3289  firstline = (re->options & PCRE_FIRSTLIN Line 3502  firstline = (re->options & PCRE_FIRSTLIN
3502  match_block.start_code = (const uschar *)external_re + re->name_table_offset +  match_block.start_code = (const uschar *)external_re + re->name_table_offset +
3503    re->name_count * re->name_entry_size;    re->name_count * re->name_entry_size;
3504    
3505  match_block.start_subject = (const uschar *)subject;  match_block.start_subject = (USPTR)subject;
3506  match_block.start_offset = start_offset;  match_block.start_offset = start_offset;
3507  match_block.end_subject = match_block.start_subject + length;  match_block.end_subject = match_block.start_subject + length;
3508  end_subject = match_block.end_subject;  end_subject = match_block.end_subject;
# Line 3415  the loop runs just once. */ Line 3628  the loop runs just once. */
3628    
3629  do  do
3630    {    {
3631    const uschar *save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
3632    
3633    /* Reset the maximum number of extractions we might see. */    /* Reset the maximum number of extractions we might see. */
3634    
# Line 3434  do Line 3647  do
3647    
3648    if (firstline)    if (firstline)
3649      {      {
3650      const uschar *t = start_match;      USPTR t = start_match;
3651      while (t < save_end_subject && *t != '\n') t++;      while (t < save_end_subject && *t != '\n') t++;
3652      end_subject = t;      end_subject = t;
3653      }      }
# Line 3504  do Line 3717  do
3717        end_subject - start_match < REQ_BYTE_MAX &&        end_subject - start_match < REQ_BYTE_MAX &&
3718        !match_block.partial)        !match_block.partial)
3719      {      {
3720      register const uschar *p = start_match + ((first_byte >= 0)? 1 : 0);      register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
3721    
3722      /* We don't need to repeat the search if we haven't yet reached the      /* We don't need to repeat the search if we haven't yet reached the
3723      place we found it at last time. */      place we found it at last time. */
# Line 3550  do Line 3763  do
3763    match_block.match_call_count = 0;    match_block.match_call_count = 0;
3764    
3765    rc = match(start_match, match_block.start_code, 2, &match_block, ims, NULL,    rc = match(start_match, match_block.start_code, 2, &match_block, ims, NULL,
3766      match_isgroup);      match_isgroup, 0);
3767    
3768    /* When the result is no match, if the subject's first character was a    /* When the result is no match, if the subject's first character was a
3769    newline and the PCRE_FIRSTLINE option is set, break (which will return    newline and the PCRE_FIRSTLINE option is set, break (which will return

Legend:
Removed from v.85  
changed lines
  Added in v.87

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12