/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 200 by ph10, Wed Aug 1 09:10:40 2007 UTC revision 371 by ph10, Mon Aug 25 18:28:05 2008 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2008 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 45  applications. */ Line 45  applications. */
45    
46    
47  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
48  #include <config.h>  #include "config.h"
49  #endif  #endif
50    
51  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
# Line 84  centralize the loading of these characte Line 84  centralize the loading of these characte
84  small value. ***NOTE*** If the start of this table is modified, the two tables  small value. ***NOTE*** If the start of this table is modified, the two tables
85  that follow must also be modified. */  that follow must also be modified. */
86    
87  static uschar coptable[] = {  static const uschar coptable[] = {
88    0,                             /* End                                    */    0,                             /* End                                    */
89    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
90    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
91    0, 0,                          /* Any, Anybyte                           */    0, 0, 0,                       /* Any, AllAny, Anybyte                   */
92    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */
93    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
94    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
# Line 130  static uschar coptable[] = { Line 130  static uschar coptable[] = {
130    0,                             /* CREF                                   */    0,                             /* CREF                                   */
131    0,                             /* RREF                                   */    0,                             /* RREF                                   */
132    0,                             /* DEF                                    */    0,                             /* DEF                                    */
133    0, 0                           /* BRAZERO, BRAMINZERO                    */    0, 0,                          /* BRAZERO, BRAMINZERO                    */
134      0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
135      0, 0, 0                        /* FAIL, ACCEPT, SKIPZERO                 */
136  };  };
137    
138  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
139  and \w */  and \w */
140    
141  static uschar toptable1[] = {  static const uschar toptable1[] = {
142    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
143    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
144    ctype_space, ctype_space,    ctype_space, ctype_space,
145    ctype_word,  ctype_word,    ctype_word,  ctype_word,
146    0                               /* OP_ANY */    0, 0                            /* OP_ANY, OP_ALLANY */
147  };  };
148    
149  static uschar toptable2[] = {  static const uschar toptable2[] = {
150    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
151    ctype_digit, 0,    ctype_digit, 0,
152    ctype_space, 0,    ctype_space, 0,
153    ctype_word,  0,    ctype_word,  0,
154    1                               /* OP_ANY */    1, 1                            /* OP_ANY, OP_ALLANY */
155  };  };
156    
157    
# Line 221  Arguments: Line 223  Arguments:
223    rlevel            function call recursion level    rlevel            function call recursion level
224    recursing         regex recursive call level    recursing         regex recursive call level
225    
226  Returns:            > 0 =>  Returns:            > 0 => number of match offset pairs placed in offsets
227                      = 0 =>                      = 0 => offsets overflowed; longest matches are present
228                       -1 => failed to match                       -1 => failed to match
229                     < -1 => some kind of unexpected problem                     < -1 => some kind of unexpected problem
230    
# Line 510  for (;;) Line 512  for (;;)
512      const uschar *code;      const uschar *code;
513      int state_offset = current_state->offset;      int state_offset = current_state->offset;
514      int count, codevalue;      int count, codevalue;
 #ifdef SUPPORT_UCP  
     int chartype, script;  
 #endif  
515    
516  #ifdef DEBUG  #ifdef DEBUG
517      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
# Line 692  for (;;) Line 691  for (;;)
691        break;        break;
692    
693        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
694          case OP_SKIPZERO:
695          code += 1 + GET(code, 2);
696          while (*code == OP_ALT) code += GET(code, 1);
697          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
698          break;
699    
700          /*-----------------------------------------------------------------*/
701        case OP_CIRC:        case OP_CIRC:
702        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
703            ((ims & PCRE_MULTILINE) != 0 &&            ((ims & PCRE_MULTILINE) != 0 &&
# Line 730  for (;;) Line 736  for (;;)
736    
737        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
738        case OP_ANY:        case OP_ANY:
739        if (clen > 0 && ((ims & PCRE_DOTALL) != 0 || !IS_NEWLINE(ptr)))        if (clen > 0 && !IS_NEWLINE(ptr))
740            { ADD_NEW(state_offset + 1, 0); }
741          break;
742    
743          /*-----------------------------------------------------------------*/
744          case OP_ALLANY:
745          if (clen > 0)
746          { ADD_NEW(state_offset + 1, 0); }          { ADD_NEW(state_offset + 1, 0); }
747        break;        break;
748    
# Line 810  for (;;) Line 822  for (;;)
822        if (clen > 0)        if (clen > 0)
823          {          {
824          BOOL OK;          BOOL OK;
825          int category = _pcre_ucp_findprop(c, &chartype, &script);          const ucd_record * prop = GET_UCD(c);
826          switch(code[1])          switch(code[1])
827            {            {
828            case PT_ANY:            case PT_ANY:
# Line 818  for (;;) Line 830  for (;;)
830            break;            break;
831    
832            case PT_LAMP:            case PT_LAMP:
833            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
834            break;            break;
835    
836            case PT_GC:            case PT_GC:
837            OK = category == code[2];            OK = _pcre_ucp_gentype[prop->chartype] == code[2];
838            break;            break;
839    
840            case PT_PC:            case PT_PC:
841            OK = chartype == code[2];            OK = prop->chartype == code[2];
842            break;            break;
843    
844            case PT_SC:            case PT_SC:
845            OK = script == code[2];            OK = prop->script == code[2];
846            break;            break;
847    
848            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 850  for (;;) Line 862  for (;;)
862  /* ========================================================================== */  /* ========================================================================== */
863        /* These opcodes likewise inspect the subject character, but have an        /* These opcodes likewise inspect the subject character, but have an
864        argument that is not a data character. It is one of these opcodes:        argument that is not a data character. It is one of these opcodes:
865        OP_ANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE, OP_WORDCHAR,        OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
866        OP_NOT_WORDCHAR. The value is loaded into d. */        OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
867    
868        case OP_TYPEPLUS:        case OP_TYPEPLUS:
869        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
# Line 862  for (;;) Line 874  for (;;)
874          {          {
875          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
876              (c < 256 &&              (c < 256 &&
877                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
878                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
879            {            {
880            if (count > 0 && codevalue == OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_TYPEPOSPLUS)
# Line 888  for (;;) Line 897  for (;;)
897          {          {
898          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
899              (c < 256 &&              (c < 256 &&
900                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
901                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
902            {            {
903            if (codevalue == OP_TYPEPOSQUERY)            if (codevalue == OP_TYPEPOSQUERY)
# Line 913  for (;;) Line 919  for (;;)
919          {          {
920          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
921              (c < 256 &&              (c < 256 &&
922                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
923                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
924            {            {
925            if (codevalue == OP_TYPEPOSSTAR)            if (codevalue == OP_TYPEPOSSTAR)
# Line 936  for (;;) Line 939  for (;;)
939          {          {
940          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
941              (c < 256 &&              (c < 256 &&
942                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
943                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
944            {            {
945            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
# Line 960  for (;;) Line 960  for (;;)
960          {          {
961          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
962              (c < 256 &&              (c < 256 &&
963                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
964                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
965            {            {
966            if (codevalue == OP_TYPEPOSUPTO)            if (codevalue == OP_TYPEPOSUPTO)
# Line 994  for (;;) Line 991  for (;;)
991        if (clen > 0)        if (clen > 0)
992          {          {
993          BOOL OK;          BOOL OK;
994          int category = _pcre_ucp_findprop(c, &chartype, &script);          const ucd_record * prop = GET_UCD(c);
995          switch(code[2])          switch(code[2])
996            {            {
997            case PT_ANY:            case PT_ANY:
# Line 1002  for (;;) Line 999  for (;;)
999            break;            break;
1000    
1001            case PT_LAMP:            case PT_LAMP:
1002            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1003            break;            break;
1004    
1005            case PT_GC:            case PT_GC:
1006            OK = category == code[3];            OK = _pcre_ucp_gentype[prop->chartype] == code[3];
1007            break;            break;
1008    
1009            case PT_PC:            case PT_PC:
1010            OK = chartype == code[3];            OK = prop->chartype == code[3];
1011            break;            break;
1012    
1013            case PT_SC:            case PT_SC:
1014            OK = script == code[3];            OK = prop->script == code[3];
1015            break;            break;
1016    
1017            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 1043  for (;;) Line 1040  for (;;)
1040        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1041        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1042        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1043        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1044          {          {
1045          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1046          int ncount = 0;          int ncount = 0;
# Line 1057  for (;;) Line 1054  for (;;)
1054            int nd;            int nd;
1055            int ndlen = 1;            int ndlen = 1;
1056            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1057            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;            if (UCD_CATEGORY(nd) != ucp_M) break;
1058            ncount++;            ncount++;
1059            nptr += ndlen;            nptr += ndlen;
1060            }            }
# Line 1078  for (;;) Line 1075  for (;;)
1075          int ncount = 0;          int ncount = 0;
1076          switch (c)          switch (c)
1077            {            {
           case 0x000d:  
           if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;  
           /* Fall through */  
           case 0x000a:  
1078            case 0x000b:            case 0x000b:
1079            case 0x000c:            case 0x000c:
1080            case 0x0085:            case 0x0085:
1081            case 0x2028:            case 0x2028:
1082            case 0x2029:            case 0x2029:
1083              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1084              goto ANYNL01;
1085    
1086              case 0x000d:
1087              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1088              /* Fall through */
1089    
1090              ANYNL01:
1091              case 0x000a:
1092            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1093              {              {
1094              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1095  for (;;) Line 1097  for (;;)
1097            count++;            count++;
1098            ADD_NEW_DATA(-state_offset, count, ncount);            ADD_NEW_DATA(-state_offset, count, ncount);
1099            break;            break;
1100    
1101            default:            default:
1102            break;            break;
1103            }            }
# Line 1210  for (;;) Line 1213  for (;;)
1213        if (clen > 0)        if (clen > 0)
1214          {          {
1215          BOOL OK;          BOOL OK;
1216          int category = _pcre_ucp_findprop(c, &chartype, &script);          const ucd_record * prop = GET_UCD(c);
1217          switch(code[2])          switch(code[2])
1218            {            {
1219            case PT_ANY:            case PT_ANY:
# Line 1218  for (;;) Line 1221  for (;;)
1221            break;            break;
1222    
1223            case PT_LAMP:            case PT_LAMP:
1224            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1225            break;            break;
1226    
1227            case PT_GC:            case PT_GC:
1228            OK = category == code[3];            OK = _pcre_ucp_gentype[prop->chartype] == code[3];
1229            break;            break;
1230    
1231            case PT_PC:            case PT_PC:
1232            OK = chartype == code[3];            OK = prop->chartype == code[3];
1233            break;            break;
1234    
1235            case PT_SC:            case PT_SC:
1236            OK = script == code[3];            OK = prop->script == code[3];
1237            break;            break;
1238    
1239            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 1268  for (;;) Line 1271  for (;;)
1271        QS2:        QS2:
1272    
1273        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1274        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1275          {          {
1276          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1277          int ncount = 0;          int ncount = 0;
# Line 1283  for (;;) Line 1286  for (;;)
1286            int nd;            int nd;
1287            int ndlen = 1;            int ndlen = 1;
1288            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1289            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;            if (UCD_CATEGORY(nd) != ucp_M) break;
1290            ncount++;            ncount++;
1291            nptr += ndlen;            nptr += ndlen;
1292            }            }
# Line 1311  for (;;) Line 1314  for (;;)
1314          int ncount = 0;          int ncount = 0;
1315          switch (c)          switch (c)
1316            {            {
           case 0x000d:  
           if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;  
           /* Fall through */  
           case 0x000a:  
1317            case 0x000b:            case 0x000b:
1318            case 0x000c:            case 0x000c:
1319            case 0x0085:            case 0x0085:
1320            case 0x2028:            case 0x2028:
1321            case 0x2029:            case 0x2029:
1322              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1323              goto ANYNL02;
1324    
1325              case 0x000d:
1326              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1327              /* Fall through */
1328    
1329              ANYNL02:
1330              case 0x000a:
1331            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1332                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1333              {              {
# Line 1328  for (;;) Line 1336  for (;;)
1336              }              }
1337            ADD_NEW_DATA(-(state_offset + count), 0, ncount);            ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1338            break;            break;
1339    
1340            default:            default:
1341            break;            break;
1342            }            }
# Line 1451  for (;;) Line 1460  for (;;)
1460        if (clen > 0)        if (clen > 0)
1461          {          {
1462          BOOL OK;          BOOL OK;
1463          int category = _pcre_ucp_findprop(c, &chartype, &script);          const ucd_record * prop = GET_UCD(c);
1464          switch(code[4])          switch(code[4])
1465            {            {
1466            case PT_ANY:            case PT_ANY:
# Line 1459  for (;;) Line 1468  for (;;)
1468            break;            break;
1469    
1470            case PT_LAMP:            case PT_LAMP:
1471            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1472            break;            break;
1473    
1474            case PT_GC:            case PT_GC:
1475            OK = category == code[5];            OK = _pcre_ucp_gentype[prop->chartype] == code[5];
1476            break;            break;
1477    
1478            case PT_PC:            case PT_PC:
1479            OK = chartype == code[5];            OK = prop->chartype == code[5];
1480            break;            break;
1481    
1482            case PT_SC:            case PT_SC:
1483            OK = script == code[5];            OK = prop->script == code[5];
1484            break;            break;
1485    
1486            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 1504  for (;;) Line 1513  for (;;)
1513        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1514          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 4, 0); }
1515        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1516        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1517          {          {
1518          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1519          int ncount = 0;          int ncount = 0;
# Line 1518  for (;;) Line 1527  for (;;)
1527            int nd;            int nd;
1528            int ndlen = 1;            int ndlen = 1;
1529            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1530            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;            if (UCD_CATEGORY(nd) != ucp_M) break;
1531            ncount++;            ncount++;
1532            nptr += ndlen;            nptr += ndlen;
1533            }            }
# Line 1543  for (;;) Line 1552  for (;;)
1552          int ncount = 0;          int ncount = 0;
1553          switch (c)          switch (c)
1554            {            {
           case 0x000d:  
           if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;  
           /* Fall through */  
           case 0x000a:  
1555            case 0x000b:            case 0x000b:
1556            case 0x000c:            case 0x000c:
1557            case 0x0085:            case 0x0085:
1558            case 0x2028:            case 0x2028:
1559            case 0x2029:            case 0x2029:
1560              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1561              goto ANYNL03;
1562    
1563              case 0x000d:
1564              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1565              /* Fall through */
1566    
1567              ANYNL03:
1568              case 0x000a:
1569            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1570              {              {
1571              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1562  for (;;) Line 1576  for (;;)
1576            else            else
1577              { ADD_NEW_DATA(-state_offset, count, ncount); }              { ADD_NEW_DATA(-state_offset, count, ncount); }
1578            break;            break;
1579    
1580            default:            default:
1581            break;            break;
1582            }            }
# Line 1692  for (;;) Line 1707  for (;;)
1707            other case of the character. */            other case of the character. */
1708    
1709  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1710            othercase = _pcre_ucp_othercase(c);            othercase = UCD_OTHERCASE(c);
1711  #else  #else
1712            othercase = NOTACHAR;            othercase = NOTACHAR;
1713  #endif  #endif
# Line 1717  for (;;) Line 1732  for (;;)
1732        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
1733    
1734        case OP_EXTUNI:        case OP_EXTUNI:
1735        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1736          {          {
1737          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1738          int ncount = 0;          int ncount = 0;
# Line 1725  for (;;) Line 1740  for (;;)
1740            {            {
1741            int nclen = 1;            int nclen = 1;
1742            GETCHARLEN(c, nptr, nclen);            GETCHARLEN(c, nptr, nclen);
1743            if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;            if (UCD_CATEGORY(c) != ucp_M) break;
1744            ncount++;            ncount++;
1745            nptr += nclen;            nptr += nclen;
1746            }            }
# Line 1742  for (;;) Line 1757  for (;;)
1757        case OP_ANYNL:        case OP_ANYNL:
1758        if (clen > 0) switch(c)        if (clen > 0) switch(c)
1759          {          {
         case 0x000a:  
1760          case 0x000b:          case 0x000b:
1761          case 0x000c:          case 0x000c:
1762          case 0x0085:          case 0x0085:
1763          case 0x2028:          case 0x2028:
1764          case 0x2029:          case 0x2029:
1765            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1766    
1767            case 0x000a:
1768          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
1769          break;          break;
1770    
1771          case 0x000d:          case 0x000d:
1772          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          if (ptr + 1 < end_subject && ptr[1] == 0x0a)
1773            {            {
# Line 1890  for (;;) Line 1908  for (;;)
1908            if (utf8 && d >= 128)            if (utf8 && d >= 128)
1909              {              {
1910  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1911              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
1912  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1913              }              }
1914            else            else
# Line 1928  for (;;) Line 1946  for (;;)
1946            if (utf8 && d >= 128)            if (utf8 && d >= 128)
1947              {              {
1948  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1949              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
1950  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1951              }              }
1952            else            else
# Line 1964  for (;;) Line 1982  for (;;)
1982            if (utf8 && d >= 128)            if (utf8 && d >= 128)
1983              {              {
1984  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1985              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
1986  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1987              }              }
1988            else            else
# Line 1996  for (;;) Line 2014  for (;;)
2014            if (utf8 && d >= 128)            if (utf8 && d >= 128)
2015              {              {
2016  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2017              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
2018  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2019              }              }
2020            else            else
# Line 2031  for (;;) Line 2049  for (;;)
2049            if (utf8 && d >= 128)            if (utf8 && d >= 128)
2050              {              {
2051  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2052              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
2053  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2054              }              }
2055            else            else
# Line 2139  for (;;) Line 2157  for (;;)
2157    
2158  /* ========================================================================== */  /* ========================================================================== */
2159        /* These are the opcodes for fancy brackets of various kinds. We have        /* These are the opcodes for fancy brackets of various kinds. We have
2160        to use recursion in order to handle them. */        to use recursion in order to handle them. The "always failing" assersion
2161          (?!) is optimised when compiling to OP_FAIL, so we have to support that,
2162          though the other "backtracking verbs" are not supported. */
2163    
2164          case OP_FAIL:
2165          break;
2166    
2167        case OP_ASSERT:        case OP_ASSERT:
2168        case OP_ASSERT_NOT:        case OP_ASSERT_NOT:
# Line 2482  Returns: > 0 => number of match Line 2505  Returns: > 0 => number of match
2505                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
2506  */  */
2507    
2508  PCRE_EXP_DEFN int  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
2509  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
2510    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
2511    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
# Line 2572  md->end_subject = end_subject; Line 2595  md->end_subject = end_subject;
2595  md->moptions = options;  md->moptions = options;
2596  md->poptions = re->options;  md->poptions = re->options;
2597    
2598    /* If the BSR option is not set at match time, copy what was set
2599    at compile time. */
2600    
2601    if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
2602      {
2603      if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
2604        md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
2605    #ifdef BSR_ANYCRLF
2606      else md->moptions |= PCRE_BSR_ANYCRLF;
2607    #endif
2608      }
2609    
2610  /* Handle different types of newline. The three bits give eight cases. If  /* Handle different types of newline. The three bits give eight cases. If
2611  nothing is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
2612    
# Line 2642  if (md->tables == NULL) md->tables = _pc Line 2677  if (md->tables == NULL) md->tables = _pc
2677  used in a loop when finding where to start. */  used in a loop when finding where to start. */
2678    
2679  lcc = md->tables + lcc_offset;  lcc = md->tables + lcc_offset;
2680  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
2681  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
2682    
2683  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_byte value is
# Line 2653  studied, there may be a bitmap of possib Line 2688  studied, there may be a bitmap of possib
2688    
2689  if (!anchored)  if (!anchored)
2690    {    {
2691    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
2692      {      {
2693      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
2694      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
# Line 2670  if (!anchored) Line 2705  if (!anchored)
2705  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
2706  character" set. */  character" set. */
2707    
2708  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
2709    {    {
2710    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
2711    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
# Line 2698  for (;;) Line 2733  for (;;)
2733    
2734      if (firstline)      if (firstline)
2735        {        {
2736        const uschar *t = current_subject;        USPTR t = current_subject;
2737    #ifdef SUPPORT_UTF8
2738          if (utf8)
2739            {
2740            while (t < md->end_subject && !IS_NEWLINE(t))
2741              {
2742              t++;
2743              while (t < end_subject && (*t & 0xc0) == 0x80) t++;
2744              }
2745            }
2746          else
2747    #endif
2748        while (t < md->end_subject && !IS_NEWLINE(t)) t++;        while (t < md->end_subject && !IS_NEWLINE(t)) t++;
2749        end_subject = t;        end_subject = t;
2750        }        }
# Line 2720  for (;;) Line 2766  for (;;)
2766        {        {
2767        if (current_subject > md->start_subject + start_offset)        if (current_subject > md->start_subject + start_offset)
2768          {          {
2769          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))  #ifdef SUPPORT_UTF8
2770            if (utf8)
2771              {
2772              while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
2773                {
2774                current_subject++;
2775                while(current_subject < end_subject &&
2776                      (*current_subject & 0xc0) == 0x80)
2777                  current_subject++;
2778                }
2779              }
2780            else
2781    #endif
2782            while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
2783            current_subject++;            current_subject++;
2784    
2785          /* If we have just passed a CR and the newline option is ANY or          /* If we have just passed a CR and the newline option is ANY or
# Line 2840  for (;;) Line 2899  for (;;)
2899      }      }
2900    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2901    
2902    /* If we have just passed a CR and the newline option is CRLF or ANY or    /* If we have just passed a CR and we are now at a LF, and the pattern does
2903    ANYCRLF, and we are now at a LF, advance the match position by one more    not contain any explicit matches for \r or \n, and the newline option is CRLF
2904    character. */    or ANY or ANYCRLF, advance the match position by one more character. */
2905    
2906    if (current_subject[-1] == '\r' &&    if (current_subject[-1] == '\r' &&
2907         (md->nltype == NLTYPE_ANY ||        current_subject < end_subject &&
2908          md->nltype == NLTYPE_ANYCRLF ||        *current_subject == '\n' &&
2909          md->nllen == 2) &&        (re->flags & PCRE_HASCRORLF) == 0 &&
2910         current_subject < end_subject &&          (md->nltype == NLTYPE_ANY ||
2911         *current_subject == '\n')           md->nltype == NLTYPE_ANYCRLF ||
2912             md->nllen == 2))
2913      current_subject++;      current_subject++;
2914    
2915    }   /* "Bumpalong" loop */    }   /* "Bumpalong" loop */

Legend:
Removed from v.200  
changed lines
  Added in v.371

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12