/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 117 by ph10, Fri Mar 9 15:59:06 2007 UTC revision 305 by ph10, Sun Jan 20 20:07:32 2008 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2008 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 44  FSM). This is NOT Perl- compatible, but Line 44  FSM). This is NOT Perl- compatible, but
44  applications. */  applications. */
45    
46    
47    #ifdef HAVE_CONFIG_H
48    #include "config.h"
49    #endif
50    
51  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
52  #define PSSTART start_subject  /* Field containing processed string start */  #define PSSTART start_subject  /* Field containing processed string start */
53  #define PSEND   end_subject    /* Field containing processed string end */  #define PSEND   end_subject    /* Field containing processed string end */
# Line 63  applications. */ Line 67  applications. */
67    
68  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
69  into others, under special conditions. A gap of 20 between the blocks should be  into others, under special conditions. A gap of 20 between the blocks should be
70  enough. */  enough. The resulting opcodes don't have to be less than 256 because they are
71    never stored, so we push them well clear of the normal opcodes. */
72    
73  #define OP_PROP_EXTRA 100  #define OP_PROP_EXTRA       300
74  #define OP_EXTUNI_EXTRA 120  #define OP_EXTUNI_EXTRA     320
75  #define OP_ANYNL_EXTRA 140  #define OP_ANYNL_EXTRA      340
76    #define OP_HSPACE_EXTRA     360
77    #define OP_VSPACE_EXTRA     380
78    
79    
80  /* This table identifies those opcodes that are followed immediately by a  /* This table identifies those opcodes that are followed immediately by a
81  character that is to be tested in some way. This makes is possible to  character that is to be tested in some way. This makes is possible to
82  centralize the loading of these characters. In the case of Type * etc, the  centralize the loading of these characters. In the case of Type * etc, the
83  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
84  small value. */  small value. ***NOTE*** If the start of this table is modified, the two tables
85    that follow must also be modified. */
86    
87  static uschar coptable[] = {  static uschar coptable[] = {
88    0,                             /* End                                    */    0,                             /* End                                    */
89    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
90      0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
91    0, 0,                          /* Any, Anybyte                           */    0, 0,                          /* Any, Anybyte                           */
92    0, 0, 0, 0,                    /* NOTPROP, PROP, EXTUNI, ANYNL           */    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */
93      0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
94    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
95    1,                             /* Char                                   */    1,                             /* Char                                   */
96    1,                             /* Charnc                                 */    1,                             /* Charnc                                 */
# Line 120  static uschar coptable[] = { Line 130  static uschar coptable[] = {
130    0,                             /* CREF                                   */    0,                             /* CREF                                   */
131    0,                             /* RREF                                   */    0,                             /* RREF                                   */
132    0,                             /* DEF                                    */    0,                             /* DEF                                    */
133    0, 0                           /* BRAZERO, BRAMINZERO                    */    0, 0,                          /* BRAZERO, BRAMINZERO                    */
134      0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
135      0, 0                           /* FAIL, ACCEPT                           */
136  };  };
137    
138  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
139  and \w */  and \w */
140    
141  static uschar toptable1[] = {  static uschar toptable1[] = {
142    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
143    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
144    ctype_space, ctype_space,    ctype_space, ctype_space,
145    ctype_word,  ctype_word,    ctype_word,  ctype_word,
# Line 135  static uschar toptable1[] = { Line 147  static uschar toptable1[] = {
147  };  };
148    
149  static uschar toptable2[] = {  static uschar toptable2[] = {
150    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
151    ctype_digit, 0,    ctype_digit, 0,
152    ctype_space, 0,    ctype_space, 0,
153    ctype_word,  0,    ctype_word,  0,
# Line 500  for (;;) Line 512  for (;;)
512      const uschar *code;      const uschar *code;
513      int state_offset = current_state->offset;      int state_offset = current_state->offset;
514      int count, codevalue;      int count, codevalue;
515    #ifdef SUPPORT_UCP
516      int chartype, script;      int chartype, script;
517    #endif
518    
519  #ifdef DEBUG  #ifdef DEBUG
520      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
# Line 555  for (;;) Line 569  for (;;)
569      permitted.      permitted.
570    
571      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
572      argument that is not a data character - but is always one byte long.      argument that is not a data character - but is always one byte long. We
573      Unfortunately, we have to take special action to deal with  \P, \p, and      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in
574      \X in this case. To keep the other cases fast, convert these ones to new      this case. To keep the other cases fast, convert these ones to new opcodes.
575      opcodes. */      */
576    
577      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
578        {        {
# Line 576  for (;;) Line 590  for (;;)
590            case OP_PROP: codevalue += OP_PROP_EXTRA; break;            case OP_PROP: codevalue += OP_PROP_EXTRA; break;
591            case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;            case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
592            case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;            case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
593              case OP_NOT_HSPACE:
594              case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
595              case OP_NOT_VSPACE:
596              case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
597            default: break;            default: break;
598            }            }
599          }          }
# Line 783  for (;;) Line 801  for (;;)
801        break;        break;
802    
803    
 #ifdef SUPPORT_UCP  
   
804        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
805        /* Check the next character by Unicode property. We will get here only        /* Check the next character by Unicode property. We will get here only
806        if the support is in the binary; otherwise a compile-time error occurs.        if the support is in the binary; otherwise a compile-time error occurs.
807        */        */
808    
809    #ifdef SUPPORT_UCP
810        case OP_PROP:        case OP_PROP:
811        case OP_NOTPROP:        case OP_NOTPROP:
812        if (clen > 0)        if (clen > 0)
# Line 970  for (;;) Line 987  for (;;)
987        argument. It keeps the code above fast for the other cases. The argument        argument. It keeps the code above fast for the other cases. The argument
988        is in the d variable. */        is in the d variable. */
989    
990    #ifdef SUPPORT_UCP
991        case OP_PROP_EXTRA + OP_TYPEPLUS:        case OP_PROP_EXTRA + OP_TYPEPLUS:
992        case OP_PROP_EXTRA + OP_TYPEMINPLUS:        case OP_PROP_EXTRA + OP_TYPEMINPLUS:
993        case OP_PROP_EXTRA + OP_TYPEPOSPLUS:        case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
# Line 1049  for (;;) Line 1067  for (;;)
1067          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
1068          }          }
1069        break;        break;
1070    #endif
1071    
1072        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1073        case OP_ANYNL_EXTRA + OP_TYPEPLUS:        case OP_ANYNL_EXTRA + OP_TYPEPLUS:
# Line 1061  for (;;) Line 1080  for (;;)
1080          int ncount = 0;          int ncount = 0;
1081          switch (c)          switch (c)
1082            {            {
1083              case 0x000b:
1084              case 0x000c:
1085              case 0x0085:
1086              case 0x2028:
1087              case 0x2029:
1088              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1089              goto ANYNL01;
1090    
1091            case 0x000d:            case 0x000d:
1092            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1093            /* Fall through */            /* Fall through */
1094    
1095              ANYNL01:
1096              case 0x000a:
1097              if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1098                {
1099                active_count--;           /* Remove non-match possibility */
1100                next_active_state--;
1101                }
1102              count++;
1103              ADD_NEW_DATA(-state_offset, count, ncount);
1104              break;
1105    
1106              default:
1107              break;
1108              }
1109            }
1110          break;
1111    
1112          /*-----------------------------------------------------------------*/
1113          case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1114          case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1115          case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1116          count = current_state->count;  /* Already matched */
1117          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1118          if (clen > 0)
1119            {
1120            BOOL OK;
1121            switch (c)
1122              {
1123            case 0x000a:            case 0x000a:
1124            case 0x000b:            case 0x000b:
1125            case 0x000c:            case 0x000c:
1126              case 0x000d:
1127            case 0x0085:            case 0x0085:
1128            case 0x2028:            case 0x2028:
1129            case 0x2029:            case 0x2029:
1130            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)            OK = TRUE;
1131              break;
1132    
1133              default:
1134              OK = FALSE;
1135              break;
1136              }
1137    
1138            if (OK == (d == OP_VSPACE))
1139              {
1140              if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1141              {              {
1142              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1143              next_active_state--;              next_active_state--;
1144              }              }
1145            count++;            count++;
1146            ADD_NEW_DATA(-state_offset, count, ncount);            ADD_NEW_DATA(-state_offset, count, 0);
1147              }
1148            }
1149          break;
1150    
1151          /*-----------------------------------------------------------------*/
1152          case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1153          case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1154          case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1155          count = current_state->count;  /* Already matched */
1156          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1157          if (clen > 0)
1158            {
1159            BOOL OK;
1160            switch (c)
1161              {
1162              case 0x09:      /* HT */
1163              case 0x20:      /* SPACE */
1164              case 0xa0:      /* NBSP */
1165              case 0x1680:    /* OGHAM SPACE MARK */
1166              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1167              case 0x2000:    /* EN QUAD */
1168              case 0x2001:    /* EM QUAD */
1169              case 0x2002:    /* EN SPACE */
1170              case 0x2003:    /* EM SPACE */
1171              case 0x2004:    /* THREE-PER-EM SPACE */
1172              case 0x2005:    /* FOUR-PER-EM SPACE */
1173              case 0x2006:    /* SIX-PER-EM SPACE */
1174              case 0x2007:    /* FIGURE SPACE */
1175              case 0x2008:    /* PUNCTUATION SPACE */
1176              case 0x2009:    /* THIN SPACE */
1177              case 0x200A:    /* HAIR SPACE */
1178              case 0x202f:    /* NARROW NO-BREAK SPACE */
1179              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1180              case 0x3000:    /* IDEOGRAPHIC SPACE */
1181              OK = TRUE;
1182            break;            break;
1183    
1184            default:            default:
1185              OK = FALSE;
1186            break;            break;
1187            }            }
1188    
1189            if (OK == (d == OP_HSPACE))
1190              {
1191              if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1192                {
1193                active_count--;           /* Remove non-match possibility */
1194                next_active_state--;
1195                }
1196              count++;
1197              ADD_NEW_DATA(-state_offset, count, 0);
1198              }
1199          }          }
1200        break;        break;
1201    
1202        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1203    #ifdef SUPPORT_UCP
1204        case OP_PROP_EXTRA + OP_TYPEQUERY:        case OP_PROP_EXTRA + OP_TYPEQUERY:
1205        case OP_PROP_EXTRA + OP_TYPEMINQUERY:        case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1206        case OP_PROP_EXTRA + OP_TYPEPOSQUERY:        case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
# Line 1182  for (;;) Line 1298  for (;;)
1298          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1299          }          }
1300        break;        break;
1301    #endif
1302    
1303        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1304        case OP_ANYNL_EXTRA + OP_TYPEQUERY:        case OP_ANYNL_EXTRA + OP_TYPEQUERY:
# Line 1202  for (;;) Line 1319  for (;;)
1319          int ncount = 0;          int ncount = 0;
1320          switch (c)          switch (c)
1321            {            {
1322              case 0x000b:
1323              case 0x000c:
1324              case 0x0085:
1325              case 0x2028:
1326              case 0x2029:
1327              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1328              goto ANYNL02;
1329    
1330            case 0x000d:            case 0x000d:
1331            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1332            /* Fall through */            /* Fall through */
1333    
1334              ANYNL02:
1335              case 0x000a:
1336              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1337                  codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1338                {
1339                active_count--;           /* Remove non-match possibility */
1340                next_active_state--;
1341                }
1342              ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1343              break;
1344    
1345              default:
1346              break;
1347              }
1348            }
1349          break;
1350    
1351          /*-----------------------------------------------------------------*/
1352          case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1353          case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1354          case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1355          count = 2;
1356          goto QS4;
1357    
1358          case OP_VSPACE_EXTRA + OP_TYPESTAR:
1359          case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1360          case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1361          count = 0;
1362    
1363          QS4:
1364          ADD_ACTIVE(state_offset + 2, 0);
1365          if (clen > 0)
1366            {
1367            BOOL OK;
1368            switch (c)
1369              {
1370            case 0x000a:            case 0x000a:
1371            case 0x000b:            case 0x000b:
1372            case 0x000c:            case 0x000c:
1373              case 0x000d:
1374            case 0x0085:            case 0x0085:
1375            case 0x2028:            case 0x2028:
1376            case 0x2029:            case 0x2029:
1377            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||            OK = TRUE;
1378                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)            break;
1379    
1380              default:
1381              OK = FALSE;
1382              break;
1383              }
1384            if (OK == (d == OP_VSPACE))
1385              {
1386              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1387                  codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1388              {              {
1389              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1390              next_active_state--;              next_active_state--;
1391              }              }
1392            ADD_NEW_DATA(-(state_offset + count), 0, ncount);            ADD_NEW_DATA(-(state_offset + count), 0, 0);
1393              }
1394            }
1395          break;
1396    
1397          /*-----------------------------------------------------------------*/
1398          case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1399          case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1400          case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1401          count = 2;
1402          goto QS5;
1403    
1404          case OP_HSPACE_EXTRA + OP_TYPESTAR:
1405          case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1406          case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1407          count = 0;
1408    
1409          QS5:
1410          ADD_ACTIVE(state_offset + 2, 0);
1411          if (clen > 0)
1412            {
1413            BOOL OK;
1414            switch (c)
1415              {
1416              case 0x09:      /* HT */
1417              case 0x20:      /* SPACE */
1418              case 0xa0:      /* NBSP */
1419              case 0x1680:    /* OGHAM SPACE MARK */
1420              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1421              case 0x2000:    /* EN QUAD */
1422              case 0x2001:    /* EM QUAD */
1423              case 0x2002:    /* EN SPACE */
1424              case 0x2003:    /* EM SPACE */
1425              case 0x2004:    /* THREE-PER-EM SPACE */
1426              case 0x2005:    /* FOUR-PER-EM SPACE */
1427              case 0x2006:    /* SIX-PER-EM SPACE */
1428              case 0x2007:    /* FIGURE SPACE */
1429              case 0x2008:    /* PUNCTUATION SPACE */
1430              case 0x2009:    /* THIN SPACE */
1431              case 0x200A:    /* HAIR SPACE */
1432              case 0x202f:    /* NARROW NO-BREAK SPACE */
1433              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1434              case 0x3000:    /* IDEOGRAPHIC SPACE */
1435              OK = TRUE;
1436            break;            break;
1437    
1438            default:            default:
1439              OK = FALSE;
1440            break;            break;
1441            }            }
1442    
1443            if (OK == (d == OP_HSPACE))
1444              {
1445              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1446                  codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1447                {
1448                active_count--;           /* Remove non-match possibility */
1449                next_active_state--;
1450                }
1451              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1452              }
1453          }          }
1454        break;        break;
1455    
1456        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1457    #ifdef SUPPORT_UCP
1458        case OP_PROP_EXTRA + OP_TYPEEXACT:        case OP_PROP_EXTRA + OP_TYPEEXACT:
1459        case OP_PROP_EXTRA + OP_TYPEUPTO:        case OP_PROP_EXTRA + OP_TYPEUPTO:
1460        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
# Line 1313  for (;;) Line 1542  for (;;)
1542            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1543          }          }
1544        break;        break;
1545    #endif
1546    
1547        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1548        case OP_ANYNL_EXTRA + OP_TYPEEXACT:        case OP_ANYNL_EXTRA + OP_TYPEEXACT:
# Line 1327  for (;;) Line 1557  for (;;)
1557          int ncount = 0;          int ncount = 0;
1558          switch (c)          switch (c)
1559            {            {
1560              case 0x000b:
1561              case 0x000c:
1562              case 0x0085:
1563              case 0x2028:
1564              case 0x2029:
1565              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1566              goto ANYNL03;
1567    
1568            case 0x000d:            case 0x000d:
1569            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1570            /* Fall through */            /* Fall through */
1571    
1572              ANYNL03:
1573              case 0x000a:
1574              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1575                {
1576                active_count--;           /* Remove non-match possibility */
1577                next_active_state--;
1578                }
1579              if (++count >= GET2(code, 1))
1580                { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
1581              else
1582                { ADD_NEW_DATA(-state_offset, count, ncount); }
1583              break;
1584    
1585              default:
1586              break;
1587              }
1588            }
1589          break;
1590    
1591          /*-----------------------------------------------------------------*/
1592          case OP_VSPACE_EXTRA + OP_TYPEEXACT:
1593          case OP_VSPACE_EXTRA + OP_TYPEUPTO:
1594          case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1595          case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1596          if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1597            { ADD_ACTIVE(state_offset + 4, 0); }
1598          count = current_state->count;  /* Number already matched */
1599          if (clen > 0)
1600            {
1601            BOOL OK;
1602            switch (c)
1603              {
1604            case 0x000a:            case 0x000a:
1605            case 0x000b:            case 0x000b:
1606            case 0x000c:            case 0x000c:
1607              case 0x000d:
1608            case 0x0085:            case 0x0085:
1609            case 0x2028:            case 0x2028:
1610            case 0x2029:            case 0x2029:
1611            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)            OK = TRUE;
1612              break;
1613    
1614              default:
1615              OK = FALSE;
1616              }
1617    
1618            if (OK == (d == OP_VSPACE))
1619              {
1620              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
1621              {              {
1622              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1623              next_active_state--;              next_active_state--;
1624              }              }
1625            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1626              { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1627            else            else
1628              { ADD_NEW_DATA(-state_offset, count, ncount); }              { ADD_NEW_DATA(-state_offset, count, 0); }
1629              }
1630            }
1631          break;
1632    
1633          /*-----------------------------------------------------------------*/
1634          case OP_HSPACE_EXTRA + OP_TYPEEXACT:
1635          case OP_HSPACE_EXTRA + OP_TYPEUPTO:
1636          case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
1637          case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
1638          if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
1639            { ADD_ACTIVE(state_offset + 4, 0); }
1640          count = current_state->count;  /* Number already matched */
1641          if (clen > 0)
1642            {
1643            BOOL OK;
1644            switch (c)
1645              {
1646              case 0x09:      /* HT */
1647              case 0x20:      /* SPACE */
1648              case 0xa0:      /* NBSP */
1649              case 0x1680:    /* OGHAM SPACE MARK */
1650              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1651              case 0x2000:    /* EN QUAD */
1652              case 0x2001:    /* EM QUAD */
1653              case 0x2002:    /* EN SPACE */
1654              case 0x2003:    /* EM SPACE */
1655              case 0x2004:    /* THREE-PER-EM SPACE */
1656              case 0x2005:    /* FOUR-PER-EM SPACE */
1657              case 0x2006:    /* SIX-PER-EM SPACE */
1658              case 0x2007:    /* FIGURE SPACE */
1659              case 0x2008:    /* PUNCTUATION SPACE */
1660              case 0x2009:    /* THIN SPACE */
1661              case 0x200A:    /* HAIR SPACE */
1662              case 0x202f:    /* NARROW NO-BREAK SPACE */
1663              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1664              case 0x3000:    /* IDEOGRAPHIC SPACE */
1665              OK = TRUE;
1666            break;            break;
1667    
1668            default:            default:
1669              OK = FALSE;
1670            break;            break;
1671            }            }
1672    
1673            if (OK == (d == OP_HSPACE))
1674              {
1675              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
1676                {
1677                active_count--;           /* Remove non-match possibility */
1678                next_active_state--;
1679                }
1680              if (++count >= GET2(code, 1))
1681                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1682              else
1683                { ADD_NEW_DATA(-state_offset, count, 0); }
1684              }
1685          }          }
1686        break;        break;
1687    
# Line 1429  for (;;) Line 1762  for (;;)
1762        case OP_ANYNL:        case OP_ANYNL:
1763        if (clen > 0) switch(c)        if (clen > 0) switch(c)
1764          {          {
         case 0x000a:  
1765          case 0x000b:          case 0x000b:
1766          case 0x000c:          case 0x000c:
1767          case 0x0085:          case 0x0085:
1768          case 0x2028:          case 0x2028:
1769          case 0x2029:          case 0x2029:
1770            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1771    
1772            case 0x000a:
1773          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
1774          break;          break;
1775    
1776          case 0x000d:          case 0x000d:
1777          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          if (ptr + 1 < end_subject && ptr[1] == 0x0a)
1778            {            {
# Line 1451  for (;;) Line 1787  for (;;)
1787        break;        break;
1788    
1789        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1790          case OP_NOT_VSPACE:
1791          if (clen > 0) switch(c)
1792            {
1793            case 0x000a:
1794            case 0x000b:
1795            case 0x000c:
1796            case 0x000d:
1797            case 0x0085:
1798            case 0x2028:
1799            case 0x2029:
1800            break;
1801    
1802            default:
1803            ADD_NEW(state_offset + 1, 0);
1804            break;
1805            }
1806          break;
1807    
1808          /*-----------------------------------------------------------------*/
1809          case OP_VSPACE:
1810          if (clen > 0) switch(c)
1811            {
1812            case 0x000a:
1813            case 0x000b:
1814            case 0x000c:
1815            case 0x000d:
1816            case 0x0085:
1817            case 0x2028:
1818            case 0x2029:
1819            ADD_NEW(state_offset + 1, 0);
1820            break;
1821    
1822            default: break;
1823            }
1824          break;
1825    
1826          /*-----------------------------------------------------------------*/
1827          case OP_NOT_HSPACE:
1828          if (clen > 0) switch(c)
1829            {
1830            case 0x09:      /* HT */
1831            case 0x20:      /* SPACE */
1832            case 0xa0:      /* NBSP */
1833            case 0x1680:    /* OGHAM SPACE MARK */
1834            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1835            case 0x2000:    /* EN QUAD */
1836            case 0x2001:    /* EM QUAD */
1837            case 0x2002:    /* EN SPACE */
1838            case 0x2003:    /* EM SPACE */
1839            case 0x2004:    /* THREE-PER-EM SPACE */
1840            case 0x2005:    /* FOUR-PER-EM SPACE */
1841            case 0x2006:    /* SIX-PER-EM SPACE */
1842            case 0x2007:    /* FIGURE SPACE */
1843            case 0x2008:    /* PUNCTUATION SPACE */
1844            case 0x2009:    /* THIN SPACE */
1845            case 0x200A:    /* HAIR SPACE */
1846            case 0x202f:    /* NARROW NO-BREAK SPACE */
1847            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1848            case 0x3000:    /* IDEOGRAPHIC SPACE */
1849            break;
1850    
1851            default:
1852            ADD_NEW(state_offset + 1, 0);
1853            break;
1854            }
1855          break;
1856    
1857          /*-----------------------------------------------------------------*/
1858          case OP_HSPACE:
1859          if (clen > 0) switch(c)
1860            {
1861            case 0x09:      /* HT */
1862            case 0x20:      /* SPACE */
1863            case 0xa0:      /* NBSP */
1864            case 0x1680:    /* OGHAM SPACE MARK */
1865            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1866            case 0x2000:    /* EN QUAD */
1867            case 0x2001:    /* EM QUAD */
1868            case 0x2002:    /* EN SPACE */
1869            case 0x2003:    /* EM SPACE */
1870            case 0x2004:    /* THREE-PER-EM SPACE */
1871            case 0x2005:    /* FOUR-PER-EM SPACE */
1872            case 0x2006:    /* SIX-PER-EM SPACE */
1873            case 0x2007:    /* FIGURE SPACE */
1874            case 0x2008:    /* PUNCTUATION SPACE */
1875            case 0x2009:    /* THIN SPACE */
1876            case 0x200A:    /* HAIR SPACE */
1877            case 0x202f:    /* NARROW NO-BREAK SPACE */
1878            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1879            case 0x3000:    /* IDEOGRAPHIC SPACE */
1880            ADD_NEW(state_offset + 1, 0);
1881            break;
1882            }
1883          break;
1884    
1885          /*-----------------------------------------------------------------*/
1886        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character. This is only used for one-byte
1887        characters, that is, we know that d < 256. The character we are        characters, that is, we know that d < 256. The character we are
1888        checking (c) can be multibyte. */        checking (c) can be multibyte. */
# Line 2073  Returns: > 0 => number of match Line 2505  Returns: > 0 => number of match
2505                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
2506  */  */
2507    
2508  PCRE_DATA_SCOPE int  PCRE_EXP_DEFN int
2509  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
2510    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
2511    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
# Line 2163  md->end_subject = end_subject; Line 2595  md->end_subject = end_subject;
2595  md->moptions = options;  md->moptions = options;
2596  md->poptions = re->options;  md->poptions = re->options;
2597    
2598    /* If the BSR option is not set at match time, copy what was set
2599    at compile time. */
2600    
2601    if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
2602      {
2603      if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
2604        md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
2605    #ifdef BSR_ANYCRLF
2606      else md->moptions |= PCRE_BSR_ANYCRLF;
2607    #endif
2608      }
2609    
2610  /* Handle different types of newline. The three bits give eight cases. If  /* Handle different types of newline. The three bits give eight cases. If
2611  nothing is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
2612    
2613  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
2614           PCRE_NEWLINE_BITS)           PCRE_NEWLINE_BITS)
2615    {    {
2616    case 0: newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
# Line 2175  switch ((((options & PCRE_NEWLINE_BITS) Line 2619  switch ((((options & PCRE_NEWLINE_BITS)
2619    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
2620         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
2621    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
2622      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
2623    default: return PCRE_ERROR_BADNEWLINE;    default: return PCRE_ERROR_BADNEWLINE;
2624    }    }
2625    
2626  if (newline < 0)  if (newline == -2)
2627      {
2628      md->nltype = NLTYPE_ANYCRLF;
2629      }
2630    else if (newline < 0)
2631    {    {
2632    md->nltype = NLTYPE_ANY;    md->nltype = NLTYPE_ANY;
2633    }    }
# Line 2228  if (md->tables == NULL) md->tables = _pc Line 2677  if (md->tables == NULL) md->tables = _pc
2677  used in a loop when finding where to start. */  used in a loop when finding where to start. */
2678    
2679  lcc = md->tables + lcc_offset;  lcc = md->tables + lcc_offset;
2680  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
2681  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
2682    
2683  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_byte value is
# Line 2239  studied, there may be a bitmap of possib Line 2688  studied, there may be a bitmap of possib
2688    
2689  if (!anchored)  if (!anchored)
2690    {    {
2691    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
2692      {      {
2693      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
2694      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
# Line 2256  if (!anchored) Line 2705  if (!anchored)
2705  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
2706  character" set. */  character" set. */
2707    
2708  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
2709    {    {
2710    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
2711    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
# Line 2308  for (;;) Line 2757  for (;;)
2757          {          {
2758          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
2759            current_subject++;            current_subject++;
2760    
2761            /* If we have just passed a CR and the newline option is ANY or
2762            ANYCRLF, and we are now at a LF, advance the match position by one more
2763            character. */
2764    
2765            if (current_subject[-1] == '\r' &&
2766                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
2767                 current_subject < end_subject &&
2768                 *current_subject == '\n')
2769              current_subject++;
2770          }          }
2771        }        }
2772    
# Line 2416  for (;;) Line 2875  for (;;)
2875      }      }
2876    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2877    
2878    /* If we have just passed a CR and the newline option is CRLF or ANY, and we    /* If we have just passed a CR and we are now at a LF, and the pattern does
2879    are now at a LF, advance the match position by one more character. */    not contain any explicit matches for \r or \n, and the newline option is CRLF
2880      or ANY or ANYCRLF, advance the match position by one more character. */
2881    
2882    if (current_subject[-1] == '\r' &&    if (current_subject[-1] == '\r' &&
2883         (md->nltype == NLTYPE_ANY || md->nllen == 2) &&        current_subject < end_subject &&
2884         current_subject < end_subject &&        *current_subject == '\n' &&
2885         *current_subject == '\n')        (re->flags & PCRE_HASCRORLF) == 0 &&
2886            (md->nltype == NLTYPE_ANY ||
2887             md->nltype == NLTYPE_ANYCRLF ||
2888             md->nllen == 2))
2889      current_subject++;      current_subject++;
2890    
2891    }   /* "Bumpalong" loop */    }   /* "Bumpalong" loop */

Legend:
Removed from v.117  
changed lines
  Added in v.305

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12