/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 172 by ph10, Tue Jun 5 10:40:13 2007 UTC revision 219 by ph10, Thu Aug 16 11:46:40 2007 UTC
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include <config.h>
47    #endif
48    
49  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
50  #define PSSTART start_subject  /* Field containing processed string start */  #define PSSTART start_subject  /* Field containing processed string start */
51  #define PSEND   end_subject    /* Field containing processed string end */  #define PSEND   end_subject    /* Field containing processed string end */
# Line 53  possible. There are also some static sup Line 57  possible. There are also some static sup
57  #undef min  #undef min
58  #undef max  #undef max
59    
 /* The chain of eptrblocks for tail recursions uses memory in stack workspace,  
 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */  
   
 #define EPTR_WORK_SIZE (1000)  
   
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert     0x01  /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
 #define match_tail_recursed  0x04  /* Tail recursive call */  
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 70  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_COMMIT       (-999)
75    #define MATCH_PRUNE        (-998)
76    #define MATCH_SKIP         (-997)
77    #define MATCH_THEN         (-996)
78    
79  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
80  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
81  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 205  variable instead of being passed in the Line 211  variable instead of being passed in the
211  ****************************************************************************  ****************************************************************************
212  ***************************************************************************/  ***************************************************************************/
213    
214    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
215  /* Numbers for RMATCH calls */  below must be updated in sync.  */
216    
217  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
218         RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,         RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
219         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
220         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
221         RM41,  RM42, RM43, RM44, RM45, RM46, RM47 };         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
222           RM51,  RM52, RM53, RM54 };
223    
224  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
225  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 384  Arguments: Line 390  Arguments:
390                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
391                   match_cbegroup - this is the start of an unlimited repeat                   match_cbegroup - this is the start of an unlimited repeat
392                     group that can match an empty string                     group that can match an empty string
                  match_tail_recursed - this is a tail_recursed group  
393     rdepth      the recursion depth     rdepth      the recursion depth
394    
395  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
# Line 586  original_ims = ims; /* Save for reset Line 591  original_ims = ims; /* Save for reset
591  string, the match_cbegroup flag is set. When this is the case, add the current  string, the match_cbegroup flag is set. When this is the case, add the current
592  subject pointer to the chain of such remembered pointers, to be checked when we  subject pointer to the chain of such remembered pointers, to be checked when we
593  hit the closing ket, in order to break infinite loops that match no characters.  hit the closing ket, in order to break infinite loops that match no characters.
594  When match() is called in other circumstances, don't add to the chain. If this  When match() is called in other circumstances, don't add to the chain. The
595  is a tail recursion, use a block from the workspace, as the one on the stack is  match_cbegroup flag must NOT be used with tail recursion, because the memory
596  already used. */  block that is used is on the stack, so a new one may be required for each
597    match(). */
598    
599  if ((flags & match_cbegroup) != 0)  if ((flags & match_cbegroup) != 0)
600    {    {
601    eptrblock *p;    newptrb.epb_saved_eptr = eptr;
602    if ((flags & match_tail_recursed) != 0)    newptrb.epb_prev = eptrb;
603      {    eptrb = &newptrb;
     if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);  
     p = md->eptrchain + md->eptrn++;  
     }  
   else p = &newptrb;  
   p->epb_saved_eptr = eptr;  
   p->epb_prev = eptrb;  
   eptrb = p;  
604    }    }
605    
606  /* Now start processing the opcodes. */  /* Now start processing the opcodes. */
# Line 621  for (;;) Line 620  for (;;)
620    
621    switch(op)    switch(op)
622      {      {
623        case OP_FAIL:
624        RRETURN(MATCH_NOMATCH);
625    
626        case OP_PRUNE:
627        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
628          ims, eptrb, flags, RM51);
629        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
630        RRETURN(MATCH_PRUNE);
631    
632        case OP_COMMIT:
633        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
634          ims, eptrb, flags, RM52);
635        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
636        RRETURN(MATCH_COMMIT);
637    
638        case OP_SKIP:
639        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
640          ims, eptrb, flags, RM53);
641        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
642        md->start_match_ptr = eptr;   /* Pass back current position */
643        RRETURN(MATCH_SKIP);
644    
645        case OP_THEN:
646        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
647          ims, eptrb, flags, RM54);
648        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
649        RRETURN(MATCH_THEN);
650    
651      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
652      the current subject position in the working slot at the top of the vector.      the current subject position in the working slot at the top of the vector.
653      We mustn't change the current values of the data slot, because they may be      We mustn't change the current values of the data slot, because they may be
# Line 662  for (;;) Line 689  for (;;)
689          {          {
690          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
691            ims, eptrb, flags, RM1);            ims, eptrb, flags, RM1);
692          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
693          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
694          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
695          }          }
# Line 677  for (;;) Line 704  for (;;)
704        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
705        }        }
706    
707      /* Insufficient room for saving captured contents. Treat as a non-capturing      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
708      bracket. */      as a non-capturing bracket. */
709    
710        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
711        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
712    
713      DPRINTF(("insufficient capture room: treat as non-capturing\n"));      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
714    
715        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
716        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
717    
718      /* Non-capturing bracket. Loop for all the alternatives. When we get to the      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
719      final alternative within the brackets, we would return the result of a      final alternative within the brackets, we would return the result of a
720      recursive call to match() whatever happened. We can reduce stack usage by      recursive call to match() whatever happened. We can reduce stack usage by
721      turning this into a tail recursion. */      turning this into a tail recursion, except in the case when match_cbegroup
722        is set.*/
723    
724      case OP_BRA:      case OP_BRA:
725      case OP_SBRA:      case OP_SBRA:
# Line 693  for (;;) Line 727  for (;;)
727      flags = (op >= OP_SBRA)? match_cbegroup : 0;      flags = (op >= OP_SBRA)? match_cbegroup : 0;
728      for (;;)      for (;;)
729        {        {
730        if (ecode[GET(ecode, 1)] != OP_ALT)        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
731          {          {
732          ecode += _pcre_OP_lengths[*ecode];          if (flags == 0)    /* Not a possibly empty group */
733          flags |= match_tail_recursed;            {
734          DPRINTF(("bracket 0 tail recursion\n"));            ecode += _pcre_OP_lengths[*ecode];
735          goto TAIL_RECURSE;            DPRINTF(("bracket 0 tail recursion\n"));
736              goto TAIL_RECURSE;
737              }
738    
739            /* Possibly empty group; can't use tail recursion. */
740    
741            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
742              eptrb, flags, RM48);
743            RRETURN(rrc);
744          }          }
745    
746        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
# Line 706  for (;;) Line 748  for (;;)
748    
749        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
750          eptrb, flags, RM2);          eptrb, flags, RM2);
751        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
752        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
753        }        }
754      /* Control never reaches here. */      /* Control never reaches here. */
# Line 754  for (;;) Line 796  for (;;)
796          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
797          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
798          }          }
799        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
800          {          {
801          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
802          }          }
# Line 766  for (;;) Line 808  for (;;)
808        }        }
809    
810      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
811      we can use tail recursion to avoid using another stack frame. If the second      we can use tail recursion to avoid using another stack frame, except when
812      alternative doesn't exist, we can just plough on. */      match_cbegroup is required for an unlimited repeat of a possibly empty
813        group. If the second alternative doesn't exist, we can just plough on. */
814    
815      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
816        {        {
817        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
818        flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);        if (op == OP_SCOND)        /* Possibly empty group */
819        goto TAIL_RECURSE;          {
820            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
821            RRETURN(rrc);
822            }
823          else                       /* Group must match something */
824            {
825            flags = 0;
826            goto TAIL_RECURSE;
827            }
828        }        }
829      else      else                         /* Condition false & no 2nd alternative */
830        {        {
831        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
832        }        }
833      break;      break;
834    
835    
836      /* End of the pattern. If we are in a top-level recursion, we should      /* End of the pattern, either real or forced. If we are in a top-level
837      restore the offsets appropriately and continue from after the call. */      recursion, we should restore the offsets appropriately and continue from
838        after the call. */
839    
840        case OP_ACCEPT:
841      case OP_END:      case OP_END:
842      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
843        {        {
# Line 805  for (;;) Line 858  for (;;)
858      if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);      if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
859      md->end_match_ptr = eptr;           /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
860      md->end_offset_top = offset_top;    /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
861      md->start_match_ptr = mstart;  /* and the start (\K can modify) */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
862      RRETURN(MATCH_MATCH);      RRETURN(MATCH_MATCH);
863    
864      /* Change option settings */      /* Change option settings */
# Line 829  for (;;) Line 882  for (;;)
882        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
883          RM4);          RM4);
884        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
885        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
886        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
887        }        }
888      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 856  for (;;) Line 909  for (;;)
909        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
910          RM5);          RM5);
911        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
912        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
913        ecode += GET(ecode,1);        ecode += GET(ecode,1);
914        }        }
915      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 880  for (;;) Line 933  for (;;)
933          {          {
934          eptr--;          eptr--;
935          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
936          BACKCHAR(eptr)          BACKCHAR(eptr);
937          }          }
938        }        }
939      else      else
# Line 993  for (;;) Line 1046  for (;;)
1046              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1047            RRETURN(MATCH_MATCH);            RRETURN(MATCH_MATCH);
1048            }            }
1049          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1050            {            {
1051            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1052            RRETURN(rrc);            RRETURN(rrc);
# Line 1027  for (;;) Line 1080  for (;;)
1080    
1081      do      do
1082        {        {
1083        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
         eptrb, 0, RM7);  
1084        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
1085        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1086        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1087        }        }
1088      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1073  for (;;) Line 1125  for (;;)
1125    
1126      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1127        {        {
1128        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
         RM8);  
1129        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1130        ecode = prev;        ecode = prev;
1131        flags = match_tail_recursed;        flags = 0;
1132        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1133        }        }
1134      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
# Line 1085  for (;;) Line 1136  for (;;)
1136        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1137        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1138        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1139        flags = match_tail_recursed;        flags = 0;
1140        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1141        }        }
1142      /* Control never gets here */      /* Control never gets here */
# Line 1216  for (;;) Line 1267  for (;;)
1267    
1268      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1269      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1270      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame, unless we have an
1271        unlimited repeat of a group that can match an empty string. */
1272    
1273      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1274    
1275      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1276        {        {
1277        RMATCH(eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
         RM12);  
1278        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1279          if (flags != 0)    /* Could match an empty string */
1280            {
1281            RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1282            RRETURN(rrc);
1283            }
1284        ecode = prev;        ecode = prev;
       flags |= match_tail_recursed;  
1285        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1286        }        }
1287      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
# Line 1234  for (;;) Line 1289  for (;;)
1289        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1290        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1291        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1292        flags = match_tail_recursed;        flags = 0;
1293        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1294        }        }
1295      /* Control never gets here */      /* Control never gets here */
# Line 1482  for (;;) Line 1537  for (;;)
1537      ecode++;      ecode++;
1538      break;      break;
1539    
1540        case OP_NOT_HSPACE:
1541        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1542        GETCHARINCTEST(c, eptr);
1543        switch(c)
1544          {
1545          default: break;
1546          case 0x09:      /* HT */
1547          case 0x20:      /* SPACE */
1548          case 0xa0:      /* NBSP */
1549          case 0x1680:    /* OGHAM SPACE MARK */
1550          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1551          case 0x2000:    /* EN QUAD */
1552          case 0x2001:    /* EM QUAD */
1553          case 0x2002:    /* EN SPACE */
1554          case 0x2003:    /* EM SPACE */
1555          case 0x2004:    /* THREE-PER-EM SPACE */
1556          case 0x2005:    /* FOUR-PER-EM SPACE */
1557          case 0x2006:    /* SIX-PER-EM SPACE */
1558          case 0x2007:    /* FIGURE SPACE */
1559          case 0x2008:    /* PUNCTUATION SPACE */
1560          case 0x2009:    /* THIN SPACE */
1561          case 0x200A:    /* HAIR SPACE */
1562          case 0x202f:    /* NARROW NO-BREAK SPACE */
1563          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1564          case 0x3000:    /* IDEOGRAPHIC SPACE */
1565          RRETURN(MATCH_NOMATCH);
1566          }
1567        ecode++;
1568        break;
1569    
1570        case OP_HSPACE:
1571        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1572        GETCHARINCTEST(c, eptr);
1573        switch(c)
1574          {
1575          default: RRETURN(MATCH_NOMATCH);
1576          case 0x09:      /* HT */
1577          case 0x20:      /* SPACE */
1578          case 0xa0:      /* NBSP */
1579          case 0x1680:    /* OGHAM SPACE MARK */
1580          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1581          case 0x2000:    /* EN QUAD */
1582          case 0x2001:    /* EM QUAD */
1583          case 0x2002:    /* EN SPACE */
1584          case 0x2003:    /* EM SPACE */
1585          case 0x2004:    /* THREE-PER-EM SPACE */
1586          case 0x2005:    /* FOUR-PER-EM SPACE */
1587          case 0x2006:    /* SIX-PER-EM SPACE */
1588          case 0x2007:    /* FIGURE SPACE */
1589          case 0x2008:    /* PUNCTUATION SPACE */
1590          case 0x2009:    /* THIN SPACE */
1591          case 0x200A:    /* HAIR SPACE */
1592          case 0x202f:    /* NARROW NO-BREAK SPACE */
1593          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1594          case 0x3000:    /* IDEOGRAPHIC SPACE */
1595          break;
1596          }
1597        ecode++;
1598        break;
1599    
1600        case OP_NOT_VSPACE:
1601        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1602        GETCHARINCTEST(c, eptr);
1603        switch(c)
1604          {
1605          default: break;
1606          case 0x0a:      /* LF */
1607          case 0x0b:      /* VT */
1608          case 0x0c:      /* FF */
1609          case 0x0d:      /* CR */
1610          case 0x85:      /* NEL */
1611          case 0x2028:    /* LINE SEPARATOR */
1612          case 0x2029:    /* PARAGRAPH SEPARATOR */
1613          RRETURN(MATCH_NOMATCH);
1614          }
1615        ecode++;
1616        break;
1617    
1618        case OP_VSPACE:
1619        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1620        GETCHARINCTEST(c, eptr);
1621        switch(c)
1622          {
1623          default: RRETURN(MATCH_NOMATCH);
1624          case 0x0a:      /* LF */
1625          case 0x0b:      /* VT */
1626          case 0x0c:      /* FF */
1627          case 0x0d:      /* CR */
1628          case 0x85:      /* NEL */
1629          case 0x2028:    /* LINE SEPARATOR */
1630          case 0x2029:    /* PARAGRAPH SEPARATOR */
1631          break;
1632          }
1633        ecode++;
1634        break;
1635    
1636  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1637      /* Check the next character by Unicode property. We will get here only      /* Check the next character by Unicode property. We will get here only
1638      if the support is in the binary; otherwise a compile-time error occurs. */      if the support is in the binary; otherwise a compile-time error occurs. */
# Line 1937  for (;;) Line 2088  for (;;)
2088            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2089            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2090            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2091            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2092            }            }
2093          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2094          }          }
# Line 2690  for (;;) Line 2841  for (;;)
2841            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2842              {              {
2843              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2844              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2845              }              }
2846            break;            break;
2847    
# Line 2698  for (;;) Line 2849  for (;;)
2849            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2850              {              {
2851              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2852              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2853              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2854              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
2855                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
# Line 2711  for (;;) Line 2862  for (;;)
2862            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2863              {              {
2864              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2865              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2866              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2867              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
2868                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2722  for (;;) Line 2873  for (;;)
2873            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2874              {              {
2875              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2876              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2877              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2878              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
2879                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2733  for (;;) Line 2884  for (;;)
2884            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2885              {              {
2886              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2887              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2888              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2889              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
2890                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2814  for (;;) Line 2965  for (;;)
2965            }            }
2966          break;          break;
2967    
2968            case OP_NOT_HSPACE:
2969            for (i = 1; i <= min; i++)
2970              {
2971              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2972              GETCHARINC(c, eptr);
2973              switch(c)
2974                {
2975                default: break;
2976                case 0x09:      /* HT */
2977                case 0x20:      /* SPACE */
2978                case 0xa0:      /* NBSP */
2979                case 0x1680:    /* OGHAM SPACE MARK */
2980                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2981                case 0x2000:    /* EN QUAD */
2982                case 0x2001:    /* EM QUAD */
2983                case 0x2002:    /* EN SPACE */
2984                case 0x2003:    /* EM SPACE */
2985                case 0x2004:    /* THREE-PER-EM SPACE */
2986                case 0x2005:    /* FOUR-PER-EM SPACE */
2987                case 0x2006:    /* SIX-PER-EM SPACE */
2988                case 0x2007:    /* FIGURE SPACE */
2989                case 0x2008:    /* PUNCTUATION SPACE */
2990                case 0x2009:    /* THIN SPACE */
2991                case 0x200A:    /* HAIR SPACE */
2992                case 0x202f:    /* NARROW NO-BREAK SPACE */
2993                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2994                case 0x3000:    /* IDEOGRAPHIC SPACE */
2995                RRETURN(MATCH_NOMATCH);
2996                }
2997              }
2998            break;
2999    
3000            case OP_HSPACE:
3001            for (i = 1; i <= min; i++)
3002              {
3003              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3004              GETCHARINC(c, eptr);
3005              switch(c)
3006                {
3007                default: RRETURN(MATCH_NOMATCH);
3008                case 0x09:      /* HT */
3009                case 0x20:      /* SPACE */
3010                case 0xa0:      /* NBSP */
3011                case 0x1680:    /* OGHAM SPACE MARK */
3012                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3013                case 0x2000:    /* EN QUAD */
3014                case 0x2001:    /* EM QUAD */
3015                case 0x2002:    /* EN SPACE */
3016                case 0x2003:    /* EM SPACE */
3017                case 0x2004:    /* THREE-PER-EM SPACE */
3018                case 0x2005:    /* FOUR-PER-EM SPACE */
3019                case 0x2006:    /* SIX-PER-EM SPACE */
3020                case 0x2007:    /* FIGURE SPACE */
3021                case 0x2008:    /* PUNCTUATION SPACE */
3022                case 0x2009:    /* THIN SPACE */
3023                case 0x200A:    /* HAIR SPACE */
3024                case 0x202f:    /* NARROW NO-BREAK SPACE */
3025                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3026                case 0x3000:    /* IDEOGRAPHIC SPACE */
3027                break;
3028                }
3029              }
3030            break;
3031    
3032            case OP_NOT_VSPACE:
3033            for (i = 1; i <= min; i++)
3034              {
3035              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3036              GETCHARINC(c, eptr);
3037              switch(c)
3038                {
3039                default: break;
3040                case 0x0a:      /* LF */
3041                case 0x0b:      /* VT */
3042                case 0x0c:      /* FF */
3043                case 0x0d:      /* CR */
3044                case 0x85:      /* NEL */
3045                case 0x2028:    /* LINE SEPARATOR */
3046                case 0x2029:    /* PARAGRAPH SEPARATOR */
3047                RRETURN(MATCH_NOMATCH);
3048                }
3049              }
3050            break;
3051    
3052            case OP_VSPACE:
3053            for (i = 1; i <= min; i++)
3054              {
3055              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3056              GETCHARINC(c, eptr);
3057              switch(c)
3058                {
3059                default: RRETURN(MATCH_NOMATCH);
3060                case 0x0a:      /* LF */
3061                case 0x0b:      /* VT */
3062                case 0x0c:      /* FF */
3063                case 0x0d:      /* CR */
3064                case 0x85:      /* NEL */
3065                case 0x2028:    /* LINE SEPARATOR */
3066                case 0x2029:    /* PARAGRAPH SEPARATOR */
3067                break;
3068                }
3069              }
3070            break;
3071    
3072          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3073          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3074            {            {
# Line 2838  for (;;) Line 3093  for (;;)
3093          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3094            {            {
3095            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
3096               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))               (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))
3097              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3098            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3099            }            }
3100          break;          break;
3101    
# Line 2858  for (;;) Line 3113  for (;;)
3113          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3114            {            {
3115            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
3116               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))               (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
3117              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3118            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3119            }            }
3120          break;          break;
3121    
# Line 2925  for (;;) Line 3180  for (;;)
3180            }            }
3181          break;          break;
3182    
3183            case OP_NOT_HSPACE:
3184            for (i = 1; i <= min; i++)
3185              {
3186              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3187              switch(*eptr++)
3188                {
3189                default: break;
3190                case 0x09:      /* HT */
3191                case 0x20:      /* SPACE */
3192                case 0xa0:      /* NBSP */
3193                RRETURN(MATCH_NOMATCH);
3194                }
3195              }
3196            break;
3197    
3198            case OP_HSPACE:
3199            for (i = 1; i <= min; i++)
3200              {
3201              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3202              switch(*eptr++)
3203                {
3204                default: RRETURN(MATCH_NOMATCH);
3205                case 0x09:      /* HT */
3206                case 0x20:      /* SPACE */
3207                case 0xa0:      /* NBSP */
3208                break;
3209                }
3210              }
3211            break;
3212    
3213            case OP_NOT_VSPACE:
3214            for (i = 1; i <= min; i++)
3215              {
3216              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3217              switch(*eptr++)
3218                {
3219                default: break;
3220                case 0x0a:      /* LF */
3221                case 0x0b:      /* VT */
3222                case 0x0c:      /* FF */
3223                case 0x0d:      /* CR */
3224                case 0x85:      /* NEL */
3225                RRETURN(MATCH_NOMATCH);
3226                }
3227              }
3228            break;
3229    
3230            case OP_VSPACE:
3231            for (i = 1; i <= min; i++)
3232              {
3233              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3234              switch(*eptr++)
3235                {
3236                default: RRETURN(MATCH_NOMATCH);
3237                case 0x0a:      /* LF */
3238                case 0x0b:      /* VT */
3239                case 0x0c:      /* FF */
3240                case 0x0d:      /* CR */
3241                case 0x85:      /* NEL */
3242                break;
3243                }
3244              }
3245            break;
3246    
3247          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3248          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3249            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
# Line 3116  for (;;) Line 3435  for (;;)
3435                }                }
3436              break;              break;
3437    
3438                case OP_NOT_HSPACE:
3439                switch(c)
3440                  {
3441                  default: break;
3442                  case 0x09:      /* HT */
3443                  case 0x20:      /* SPACE */
3444                  case 0xa0:      /* NBSP */
3445                  case 0x1680:    /* OGHAM SPACE MARK */
3446                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3447                  case 0x2000:    /* EN QUAD */
3448                  case 0x2001:    /* EM QUAD */
3449                  case 0x2002:    /* EN SPACE */
3450                  case 0x2003:    /* EM SPACE */
3451                  case 0x2004:    /* THREE-PER-EM SPACE */
3452                  case 0x2005:    /* FOUR-PER-EM SPACE */
3453                  case 0x2006:    /* SIX-PER-EM SPACE */
3454                  case 0x2007:    /* FIGURE SPACE */
3455                  case 0x2008:    /* PUNCTUATION SPACE */
3456                  case 0x2009:    /* THIN SPACE */
3457                  case 0x200A:    /* HAIR SPACE */
3458                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3459                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3460                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3461                  RRETURN(MATCH_NOMATCH);
3462                  }
3463                break;
3464    
3465                case OP_HSPACE:
3466                switch(c)
3467                  {
3468                  default: RRETURN(MATCH_NOMATCH);
3469                  case 0x09:      /* HT */
3470                  case 0x20:      /* SPACE */
3471                  case 0xa0:      /* NBSP */
3472                  case 0x1680:    /* OGHAM SPACE MARK */
3473                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3474                  case 0x2000:    /* EN QUAD */
3475                  case 0x2001:    /* EM QUAD */
3476                  case 0x2002:    /* EN SPACE */
3477                  case 0x2003:    /* EM SPACE */
3478                  case 0x2004:    /* THREE-PER-EM SPACE */
3479                  case 0x2005:    /* FOUR-PER-EM SPACE */
3480                  case 0x2006:    /* SIX-PER-EM SPACE */
3481                  case 0x2007:    /* FIGURE SPACE */
3482                  case 0x2008:    /* PUNCTUATION SPACE */
3483                  case 0x2009:    /* THIN SPACE */
3484                  case 0x200A:    /* HAIR SPACE */
3485                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3486                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3487                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3488                  break;
3489                  }
3490                break;
3491    
3492                case OP_NOT_VSPACE:
3493                switch(c)
3494                  {
3495                  default: break;
3496                  case 0x0a:      /* LF */
3497                  case 0x0b:      /* VT */
3498                  case 0x0c:      /* FF */
3499                  case 0x0d:      /* CR */
3500                  case 0x85:      /* NEL */
3501                  case 0x2028:    /* LINE SEPARATOR */
3502                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3503                  RRETURN(MATCH_NOMATCH);
3504                  }
3505                break;
3506    
3507                case OP_VSPACE:
3508                switch(c)
3509                  {
3510                  default: RRETURN(MATCH_NOMATCH);
3511                  case 0x0a:      /* LF */
3512                  case 0x0b:      /* VT */
3513                  case 0x0c:      /* FF */
3514                  case 0x0d:      /* CR */
3515                  case 0x85:      /* NEL */
3516                  case 0x2028:    /* LINE SEPARATOR */
3517                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3518                  break;
3519                  }
3520                break;
3521    
3522              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3523              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3524                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 3187  for (;;) Line 3590  for (;;)
3590                }                }
3591              break;              break;
3592    
3593                case OP_NOT_HSPACE:
3594                switch(c)
3595                  {
3596                  default: break;
3597                  case 0x09:      /* HT */
3598                  case 0x20:      /* SPACE */
3599                  case 0xa0:      /* NBSP */
3600                  RRETURN(MATCH_NOMATCH);
3601                  }
3602                break;
3603    
3604                case OP_HSPACE:
3605                switch(c)
3606                  {
3607                  default: RRETURN(MATCH_NOMATCH);
3608                  case 0x09:      /* HT */
3609                  case 0x20:      /* SPACE */
3610                  case 0xa0:      /* NBSP */
3611                  break;
3612                  }
3613                break;
3614    
3615                case OP_NOT_VSPACE:
3616                switch(c)
3617                  {
3618                  default: break;
3619                  case 0x0a:      /* LF */
3620                  case 0x0b:      /* VT */
3621                  case 0x0c:      /* FF */
3622                  case 0x0d:      /* CR */
3623                  case 0x85:      /* NEL */
3624                  RRETURN(MATCH_NOMATCH);
3625                  }
3626                break;
3627    
3628                case OP_VSPACE:
3629                switch(c)
3630                  {
3631                  default: RRETURN(MATCH_NOMATCH);
3632                  case 0x0a:      /* LF */
3633                  case 0x0b:      /* VT */
3634                  case 0x0c:      /* FF */
3635                  case 0x0d:      /* CR */
3636                  case 0x85:      /* NEL */
3637                  break;
3638                  }
3639                break;
3640    
3641              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3642              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3643              break;              break;
# Line 3306  for (;;) Line 3757  for (;;)
3757            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3758            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3759            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3760            BACKCHAR(eptr);            if (utf8) BACKCHAR(eptr);
3761            }            }
3762          }          }
3763    
# Line 3345  for (;;) Line 3796  for (;;)
3796            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
3797              {              {
3798              int len = 1;              int len = 1;
             BACKCHAR(eptr);  
3799              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr; else
3800                {                {
3801                  BACKCHAR(eptr);
3802                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3803                }                }
3804              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
# Line 3368  for (;;) Line 3819  for (;;)
3819          switch(ctype)          switch(ctype)
3820            {            {
3821            case OP_ANY:            case OP_ANY:
   
           /* Special code is required for UTF8, but when the maximum is  
           unlimited we don't need it, so we repeat the non-UTF8 code. This is  
           probably worth it, because .* is quite a common idiom. */  
   
3822            if (max < INT_MAX)            if (max < INT_MAX)
3823              {              {
3824              if ((ims & PCRE_DOTALL) == 0)              if ((ims & PCRE_DOTALL) == 0)
# Line 3405  for (;;) Line 3851  for (;;)
3851                  {                  {
3852                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3853                  eptr++;                  eptr++;
3854                    while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3855                  }                  }
               break;  
3856                }                }
3857              else              else
3858                {                {
3859                c = max - min;                eptr = md->end_subject;
               if (c > (unsigned int)(md->end_subject - eptr))  
                 c = md->end_subject - eptr;  
               eptr += c;  
3860                }                }
3861              }              }
3862            break;            break;
# Line 3448  for (;;) Line 3891  for (;;)
3891              }              }
3892            break;            break;
3893    
3894              case OP_NOT_HSPACE:
3895              case OP_HSPACE:
3896              for (i = min; i < max; i++)
3897                {
3898                BOOL gotspace;
3899                int len = 1;
3900                if (eptr >= md->end_subject) break;
3901                GETCHARLEN(c, eptr, len);
3902                switch(c)
3903                  {
3904                  default: gotspace = FALSE; break;
3905                  case 0x09:      /* HT */
3906                  case 0x20:      /* SPACE */
3907                  case 0xa0:      /* NBSP */
3908                  case 0x1680:    /* OGHAM SPACE MARK */
3909                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3910                  case 0x2000:    /* EN QUAD */
3911                  case 0x2001:    /* EM QUAD */
3912                  case 0x2002:    /* EN SPACE */
3913                  case 0x2003:    /* EM SPACE */
3914                  case 0x2004:    /* THREE-PER-EM SPACE */
3915                  case 0x2005:    /* FOUR-PER-EM SPACE */
3916                  case 0x2006:    /* SIX-PER-EM SPACE */
3917                  case 0x2007:    /* FIGURE SPACE */
3918                  case 0x2008:    /* PUNCTUATION SPACE */
3919                  case 0x2009:    /* THIN SPACE */
3920                  case 0x200A:    /* HAIR SPACE */
3921                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3922                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3923                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3924                  gotspace = TRUE;
3925                  break;
3926                  }
3927                if (gotspace == (ctype == OP_NOT_HSPACE)) break;
3928                eptr += len;
3929                }
3930              break;
3931    
3932              case OP_NOT_VSPACE:
3933              case OP_VSPACE:
3934              for (i = min; i < max; i++)
3935                {
3936                BOOL gotspace;
3937                int len = 1;
3938                if (eptr >= md->end_subject) break;
3939                GETCHARLEN(c, eptr, len);
3940                switch(c)
3941                  {
3942                  default: gotspace = FALSE; break;
3943                  case 0x0a:      /* LF */
3944                  case 0x0b:      /* VT */
3945                  case 0x0c:      /* FF */
3946                  case 0x0d:      /* CR */
3947                  case 0x85:      /* NEL */
3948                  case 0x2028:    /* LINE SEPARATOR */
3949                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3950                  gotspace = TRUE;
3951                  break;
3952                  }
3953                if (gotspace == (ctype == OP_NOT_VSPACE)) break;
3954                eptr += len;
3955                }
3956              break;
3957    
3958            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3959            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3960              {              {
# Line 3530  for (;;) Line 4037  for (;;)
4037            }            }
4038          }          }
4039        else        else
4040  #endif  #endif  /* SUPPORT_UTF8 */
4041    
4042        /* Not UTF-8 mode */        /* Not UTF-8 mode */
4043          {          {
# Line 3574  for (;;) Line 4081  for (;;)
4081              }              }
4082            break;            break;
4083    
4084              case OP_NOT_HSPACE:
4085              for (i = min; i < max; i++)
4086                {
4087                if (eptr >= md->end_subject) break;
4088                c = *eptr;
4089                if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4090                eptr++;
4091                }
4092              break;
4093    
4094              case OP_HSPACE:
4095              for (i = min; i < max; i++)
4096                {
4097                if (eptr >= md->end_subject) break;
4098                c = *eptr;
4099                if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4100                eptr++;
4101                }
4102              break;
4103    
4104              case OP_NOT_VSPACE:
4105              for (i = min; i < max; i++)
4106                {
4107                if (eptr >= md->end_subject) break;
4108                c = *eptr;
4109                if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4110                  break;
4111                eptr++;
4112                }
4113              break;
4114    
4115              case OP_VSPACE:
4116              for (i = min; i < max; i++)
4117                {
4118                if (eptr >= md->end_subject) break;
4119                c = *eptr;
4120                if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4121                  break;
4122                eptr++;
4123                }
4124              break;
4125    
4126            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
4127            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4128              {              {
# Line 3679  switch (frame->Xwhere) Line 4228  switch (frame->Xwhere)
4228    LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)    LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
4229    LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)    LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
4230    LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)    LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)
4231    LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47)    LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47) LBL(48)
4232      LBL(49) LBL(50) LBL(51) LBL(52) LBL(53) LBL(54)
4233    default:    default:
4234    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4235    return PCRE_ERROR_INTERNAL;    return PCRE_ERROR_INTERNAL;
# Line 3796  const uschar *start_bits = NULL; Line 4346  const uschar *start_bits = NULL;
4346  USPTR start_match = (USPTR)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
4347  USPTR end_subject;  USPTR end_subject;
4348  USPTR req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
 eptrblock eptrchain[EPTR_WORK_SIZE];  
4349    
4350  pcre_study_data internal_study;  pcre_study_data internal_study;
4351  const pcre_study_data *study;  const pcre_study_data *study;
# Line 3882  md->partial = (options & PCRE_PARTIAL) ! Line 4431  md->partial = (options & PCRE_PARTIAL) !
4431  md->hitend = FALSE;  md->hitend = FALSE;
4432    
4433  md->recursive = NULL;                   /* No recursion at top level */  md->recursive = NULL;                   /* No recursion at top level */
 md->eptrchain = eptrchain;              /* Make workspace generally available */  
4434    
4435  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
4436  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
# Line 4038  the loop runs just once. */ Line 4586  the loop runs just once. */
4586  for(;;)  for(;;)
4587    {    {
4588    USPTR save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
4589      USPTR new_start_match;
4590    
4591    /* Reset the maximum number of extractions we might see. */    /* Reset the maximum number of extractions we might see. */
4592    
# Line 4178  for(;;) Line 4727  for(;;)
4727    
4728    /* OK, we can now run the match. */    /* OK, we can now run the match. */
4729    
4730    md->start_match_ptr = start_match;      /* Insurance */    md->start_match_ptr = start_match;
4731    md->match_call_count = 0;    md->match_call_count = 0;
4732    md->eptrn = 0;                          /* Next free eptrchain slot */    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
   rc = match(start_match, md->start_code, start_match, 2, md,  
     ims, NULL, 0, 0);  
4733    
4734    /* Any return other than MATCH_NOMATCH breaks the loop. */    switch(rc)
4735        {
4736        /* NOMATCH and PRUNE advance by one character. THEN at this level acts
4737        exactly like PRUNE. */
4738    
4739        case MATCH_NOMATCH:
4740        case MATCH_PRUNE:
4741        case MATCH_THEN:
4742        new_start_match = start_match + 1;
4743    #ifdef SUPPORT_UTF8
4744        if (utf8)
4745          while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
4746            new_start_match++;
4747    #endif
4748        break;
4749    
4750        /* SKIP passes back the next starting point explicitly. */
4751    
4752        case MATCH_SKIP:
4753        new_start_match = md->start_match_ptr;
4754        break;
4755    
4756        /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
4757    
4758    if (rc != MATCH_NOMATCH) break;      case MATCH_COMMIT:
4759        rc = MATCH_NOMATCH;
4760        goto ENDLOOP;
4761    
4762        /* Any other return is some kind of error. */
4763    
4764        default:
4765        goto ENDLOOP;
4766        }
4767    
4768      /* Control reaches here for the various types of "no match at this point"
4769      result. Reset the code to MATCH_NOMATCH for subsequent checking. */
4770    
4771      rc = MATCH_NOMATCH;
4772    
4773    /* If PCRE_FIRSTLINE is set, the match must happen before or at the first    /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4774    newline in the subject (though it may continue over the newline). Therefore,    newline in the subject (though it may continue over the newline). Therefore,
# Line 4194  for(;;) Line 4776  for(;;)
4776    
4777    if (firstline && IS_NEWLINE(start_match)) break;    if (firstline && IS_NEWLINE(start_match)) break;
4778    
4779    /* Advance the match position by one character. */    /* Advance to new matching position */
4780    
4781    start_match++;    start_match = new_start_match;
 #ifdef SUPPORT_UTF8  
   if (utf8)  
     while(start_match < end_subject && (*start_match & 0xc0) == 0x80)  
       start_match++;  
 #endif  
4782    
4783    /* Break the loop if the pattern is anchored or if we have passed the end of    /* Break the loop if the pattern is anchored or if we have passed the end of
4784    the subject. */    the subject. */
# Line 4227  for(;;) Line 4804  for(;;)
4804  /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping  /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4805  conditions is true:  conditions is true:
4806    
4807  (1) The pattern is anchored;  (1) The pattern is anchored or the match was failed by (*COMMIT);
4808    
4809  (2) We are past the end of the subject;  (2) We are past the end of the subject;
4810    
# Line 4242  processing, copy those that we can. In t Line 4819  processing, copy those that we can. In t
4819  certain parts of the pattern were not used, even though there are more  certain parts of the pattern were not used, even though there are more
4820  capturing parentheses than vector slots. */  capturing parentheses than vector slots. */
4821    
4822    ENDLOOP:
4823    
4824  if (rc == MATCH_MATCH)  if (rc == MATCH_MATCH)
4825    {    {
4826    if (using_temporary_offsets)    if (using_temporary_offsets)

Legend:
Removed from v.172  
changed lines
  Added in v.219

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12