/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 144 by ph10, Mon Apr 2 13:32:07 2007 UTC revision 182 by ph10, Wed Jun 13 15:09:54 2007 UTC
# Line 63  applications. */ Line 63  applications. */
63    
64  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
65  into others, under special conditions. A gap of 20 between the blocks should be  into others, under special conditions. A gap of 20 between the blocks should be
66  enough. */  enough. The resulting opcodes don't have to be less than 256 because they are
67    never stored, so we push them well clear of the normal opcodes. */
68    
69  #define OP_PROP_EXTRA 100  #define OP_PROP_EXTRA       300
70  #define OP_EXTUNI_EXTRA 120  #define OP_EXTUNI_EXTRA     320
71  #define OP_ANYNL_EXTRA 140  #define OP_ANYNL_EXTRA      340
72    #define OP_HSPACE_EXTRA     360
73    #define OP_VSPACE_EXTRA     380
74    
75    
76  /* This table identifies those opcodes that are followed immediately by a  /* This table identifies those opcodes that are followed immediately by a
77  character that is to be tested in some way. This makes is possible to  character that is to be tested in some way. This makes is possible to
78  centralize the loading of these characters. In the case of Type * etc, the  centralize the loading of these characters. In the case of Type * etc, the
79  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
80  small value. */  small value. ***NOTE*** If the start of this table is modified, the two tables
81    that follow must also be modified. */
82    
83  static uschar coptable[] = {  static uschar coptable[] = {
84    0,                             /* End                                    */    0,                             /* End                                    */
85    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
86      0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
87    0, 0,                          /* Any, Anybyte                           */    0, 0,                          /* Any, Anybyte                           */
88    0, 0, 0, 0,                    /* NOTPROP, PROP, EXTUNI, ANYNL           */    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */
89      0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
90    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
91    1,                             /* Char                                   */    1,                             /* Char                                   */
92    1,                             /* Charnc                                 */    1,                             /* Charnc                                 */
# Line 127  static uschar coptable[] = { Line 133  static uschar coptable[] = {
133  and \w */  and \w */
134    
135  static uschar toptable1[] = {  static uschar toptable1[] = {
136    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
137    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
138    ctype_space, ctype_space,    ctype_space, ctype_space,
139    ctype_word,  ctype_word,    ctype_word,  ctype_word,
# Line 135  static uschar toptable1[] = { Line 141  static uschar toptable1[] = {
141  };  };
142    
143  static uschar toptable2[] = {  static uschar toptable2[] = {
144    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
145    ctype_digit, 0,    ctype_digit, 0,
146    ctype_space, 0,    ctype_space, 0,
147    ctype_word,  0,    ctype_word,  0,
# Line 500  for (;;) Line 506  for (;;)
506      const uschar *code;      const uschar *code;
507      int state_offset = current_state->offset;      int state_offset = current_state->offset;
508      int count, codevalue;      int count, codevalue;
509    #ifdef SUPPORT_UCP
510      int chartype, script;      int chartype, script;
511    #endif
512    
513  #ifdef DEBUG  #ifdef DEBUG
514      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
# Line 555  for (;;) Line 563  for (;;)
563      permitted.      permitted.
564    
565      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
566      argument that is not a data character - but is always one byte long.      argument that is not a data character - but is always one byte long. We
567      Unfortunately, we have to take special action to deal with  \P, \p, and      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in
568      \X in this case. To keep the other cases fast, convert these ones to new      this case. To keep the other cases fast, convert these ones to new opcodes.
569      opcodes. */      */
570    
571      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
572        {        {
# Line 576  for (;;) Line 584  for (;;)
584            case OP_PROP: codevalue += OP_PROP_EXTRA; break;            case OP_PROP: codevalue += OP_PROP_EXTRA; break;
585            case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;            case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
586            case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;            case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
587              case OP_NOT_HSPACE:
588              case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
589              case OP_NOT_VSPACE:
590              case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
591            default: break;            default: break;
592            }            }
593          }          }
# Line 783  for (;;) Line 795  for (;;)
795        break;        break;
796    
797    
 #ifdef SUPPORT_UCP  
   
798        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
799        /* Check the next character by Unicode property. We will get here only        /* Check the next character by Unicode property. We will get here only
800        if the support is in the binary; otherwise a compile-time error occurs.        if the support is in the binary; otherwise a compile-time error occurs.
801        */        */
802    
803    #ifdef SUPPORT_UCP
804        case OP_PROP:        case OP_PROP:
805        case OP_NOTPROP:        case OP_NOTPROP:
806        if (clen > 0)        if (clen > 0)
# Line 970  for (;;) Line 981  for (;;)
981        argument. It keeps the code above fast for the other cases. The argument        argument. It keeps the code above fast for the other cases. The argument
982        is in the d variable. */        is in the d variable. */
983    
984    #ifdef SUPPORT_UCP
985        case OP_PROP_EXTRA + OP_TYPEPLUS:        case OP_PROP_EXTRA + OP_TYPEPLUS:
986        case OP_PROP_EXTRA + OP_TYPEMINPLUS:        case OP_PROP_EXTRA + OP_TYPEMINPLUS:
987        case OP_PROP_EXTRA + OP_TYPEPOSPLUS:        case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
# Line 1049  for (;;) Line 1061  for (;;)
1061          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
1062          }          }
1063        break;        break;
1064    #endif
1065    
1066        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1067        case OP_ANYNL_EXTRA + OP_TYPEPLUS:        case OP_ANYNL_EXTRA + OP_TYPEPLUS:
# Line 1085  for (;;) Line 1098  for (;;)
1098        break;        break;
1099    
1100        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1101          case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1102          case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1103          case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1104          count = current_state->count;  /* Already matched */
1105          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1106          if (clen > 0)
1107            {
1108            BOOL OK;
1109            switch (c)
1110              {
1111              case 0x000a:
1112              case 0x000b:
1113              case 0x000c:
1114              case 0x000d:
1115              case 0x0085:
1116              case 0x2028:
1117              case 0x2029:
1118              OK = TRUE;
1119              break;
1120    
1121              default:
1122              OK = FALSE;
1123              break;
1124              }
1125    
1126            if (OK == (d == OP_VSPACE))
1127              {
1128              if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1129                {
1130                active_count--;           /* Remove non-match possibility */
1131                next_active_state--;
1132                }
1133              count++;
1134              ADD_NEW_DATA(-state_offset, count, 0);
1135              }
1136            }
1137          break;
1138    
1139          /*-----------------------------------------------------------------*/
1140          case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1141          case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1142          case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1143          count = current_state->count;  /* Already matched */
1144          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1145          if (clen > 0)
1146            {
1147            BOOL OK;
1148            switch (c)
1149              {
1150              case 0x09:      /* HT */
1151              case 0x20:      /* SPACE */
1152              case 0xa0:      /* NBSP */
1153              case 0x1680:    /* OGHAM SPACE MARK */
1154              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1155              case 0x2000:    /* EN QUAD */
1156              case 0x2001:    /* EM QUAD */
1157              case 0x2002:    /* EN SPACE */
1158              case 0x2003:    /* EM SPACE */
1159              case 0x2004:    /* THREE-PER-EM SPACE */
1160              case 0x2005:    /* FOUR-PER-EM SPACE */
1161              case 0x2006:    /* SIX-PER-EM SPACE */
1162              case 0x2007:    /* FIGURE SPACE */
1163              case 0x2008:    /* PUNCTUATION SPACE */
1164              case 0x2009:    /* THIN SPACE */
1165              case 0x200A:    /* HAIR SPACE */
1166              case 0x202f:    /* NARROW NO-BREAK SPACE */
1167              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1168              case 0x3000:    /* IDEOGRAPHIC SPACE */
1169              OK = TRUE;
1170              break;
1171    
1172              default:
1173              OK = FALSE;
1174              break;
1175              }
1176    
1177            if (OK == (d == OP_HSPACE))
1178              {
1179              if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1180                {
1181                active_count--;           /* Remove non-match possibility */
1182                next_active_state--;
1183                }
1184              count++;
1185              ADD_NEW_DATA(-state_offset, count, 0);
1186              }
1187            }
1188          break;
1189    
1190          /*-----------------------------------------------------------------*/
1191    #ifdef SUPPORT_UCP
1192        case OP_PROP_EXTRA + OP_TYPEQUERY:        case OP_PROP_EXTRA + OP_TYPEQUERY:
1193        case OP_PROP_EXTRA + OP_TYPEMINQUERY:        case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1194        case OP_PROP_EXTRA + OP_TYPEPOSQUERY:        case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
# Line 1182  for (;;) Line 1286  for (;;)
1286          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1287          }          }
1288        break;        break;
1289    #endif
1290    
1291        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1292        case OP_ANYNL_EXTRA + OP_TYPEQUERY:        case OP_ANYNL_EXTRA + OP_TYPEQUERY:
# Line 1226  for (;;) Line 1331  for (;;)
1331        break;        break;
1332    
1333        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1334          case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1335          case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1336          case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1337          count = 2;
1338          goto QS4;
1339    
1340          case OP_VSPACE_EXTRA + OP_TYPESTAR:
1341          case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1342          case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1343          count = 0;
1344    
1345          QS4:
1346          ADD_ACTIVE(state_offset + 2, 0);
1347          if (clen > 0)
1348            {
1349            BOOL OK;
1350            switch (c)
1351              {
1352              case 0x000a:
1353              case 0x000b:
1354              case 0x000c:
1355              case 0x000d:
1356              case 0x0085:
1357              case 0x2028:
1358              case 0x2029:
1359              OK = TRUE;
1360              break;
1361    
1362              default:
1363              OK = FALSE;
1364              break;
1365              }
1366            if (OK == (d == OP_VSPACE))
1367              {
1368              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1369                  codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1370                {
1371                active_count--;           /* Remove non-match possibility */
1372                next_active_state--;
1373                }
1374              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1375              }
1376            }
1377          break;
1378    
1379          /*-----------------------------------------------------------------*/
1380          case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1381          case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1382          case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1383          count = 2;
1384          goto QS5;
1385    
1386          case OP_HSPACE_EXTRA + OP_TYPESTAR:
1387          case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1388          case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1389          count = 0;
1390    
1391          QS5:
1392          ADD_ACTIVE(state_offset + 2, 0);
1393          if (clen > 0)
1394            {
1395            BOOL OK;
1396            switch (c)
1397              {
1398              case 0x09:      /* HT */
1399              case 0x20:      /* SPACE */
1400              case 0xa0:      /* NBSP */
1401              case 0x1680:    /* OGHAM SPACE MARK */
1402              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1403              case 0x2000:    /* EN QUAD */
1404              case 0x2001:    /* EM QUAD */
1405              case 0x2002:    /* EN SPACE */
1406              case 0x2003:    /* EM SPACE */
1407              case 0x2004:    /* THREE-PER-EM SPACE */
1408              case 0x2005:    /* FOUR-PER-EM SPACE */
1409              case 0x2006:    /* SIX-PER-EM SPACE */
1410              case 0x2007:    /* FIGURE SPACE */
1411              case 0x2008:    /* PUNCTUATION SPACE */
1412              case 0x2009:    /* THIN SPACE */
1413              case 0x200A:    /* HAIR SPACE */
1414              case 0x202f:    /* NARROW NO-BREAK SPACE */
1415              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1416              case 0x3000:    /* IDEOGRAPHIC SPACE */
1417              OK = TRUE;
1418              break;
1419    
1420              default:
1421              OK = FALSE;
1422              break;
1423              }
1424    
1425            if (OK == (d == OP_HSPACE))
1426              {
1427              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1428                  codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1429                {
1430                active_count--;           /* Remove non-match possibility */
1431                next_active_state--;
1432                }
1433              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1434              }
1435            }
1436          break;
1437    
1438          /*-----------------------------------------------------------------*/
1439    #ifdef SUPPORT_UCP
1440        case OP_PROP_EXTRA + OP_TYPEEXACT:        case OP_PROP_EXTRA + OP_TYPEEXACT:
1441        case OP_PROP_EXTRA + OP_TYPEUPTO:        case OP_PROP_EXTRA + OP_TYPEUPTO:
1442        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
# Line 1313  for (;;) Line 1524  for (;;)
1524            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1525          }          }
1526        break;        break;
1527    #endif
1528    
1529        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1530        case OP_ANYNL_EXTRA + OP_TYPEEXACT:        case OP_ANYNL_EXTRA + OP_TYPEEXACT:
# Line 1352  for (;;) Line 1564  for (;;)
1564          }          }
1565        break;        break;
1566    
1567          /*-----------------------------------------------------------------*/
1568          case OP_VSPACE_EXTRA + OP_TYPEEXACT:
1569          case OP_VSPACE_EXTRA + OP_TYPEUPTO:
1570          case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1571          case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1572          if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1573            { ADD_ACTIVE(state_offset + 4, 0); }
1574          count = current_state->count;  /* Number already matched */
1575          if (clen > 0)
1576            {
1577            BOOL OK;
1578            switch (c)
1579              {
1580              case 0x000a:
1581              case 0x000b:
1582              case 0x000c:
1583              case 0x000d:
1584              case 0x0085:
1585              case 0x2028:
1586              case 0x2029:
1587              OK = TRUE;
1588              break;
1589    
1590              default:
1591              OK = FALSE;
1592              }
1593    
1594            if (OK == (d == OP_VSPACE))
1595              {
1596              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
1597                {
1598                active_count--;           /* Remove non-match possibility */
1599                next_active_state--;
1600                }
1601              if (++count >= GET2(code, 1))
1602                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1603              else
1604                { ADD_NEW_DATA(-state_offset, count, 0); }
1605              }
1606            }
1607          break;
1608    
1609          /*-----------------------------------------------------------------*/
1610          case OP_HSPACE_EXTRA + OP_TYPEEXACT:
1611          case OP_HSPACE_EXTRA + OP_TYPEUPTO:
1612          case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
1613          case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
1614          if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
1615            { ADD_ACTIVE(state_offset + 4, 0); }
1616          count = current_state->count;  /* Number already matched */
1617          if (clen > 0)
1618            {
1619            BOOL OK;
1620            switch (c)
1621              {
1622              case 0x09:      /* HT */
1623              case 0x20:      /* SPACE */
1624              case 0xa0:      /* NBSP */
1625              case 0x1680:    /* OGHAM SPACE MARK */
1626              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1627              case 0x2000:    /* EN QUAD */
1628              case 0x2001:    /* EM QUAD */
1629              case 0x2002:    /* EN SPACE */
1630              case 0x2003:    /* EM SPACE */
1631              case 0x2004:    /* THREE-PER-EM SPACE */
1632              case 0x2005:    /* FOUR-PER-EM SPACE */
1633              case 0x2006:    /* SIX-PER-EM SPACE */
1634              case 0x2007:    /* FIGURE SPACE */
1635              case 0x2008:    /* PUNCTUATION SPACE */
1636              case 0x2009:    /* THIN SPACE */
1637              case 0x200A:    /* HAIR SPACE */
1638              case 0x202f:    /* NARROW NO-BREAK SPACE */
1639              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1640              case 0x3000:    /* IDEOGRAPHIC SPACE */
1641              OK = TRUE;
1642              break;
1643    
1644              default:
1645              OK = FALSE;
1646              break;
1647              }
1648    
1649            if (OK == (d == OP_HSPACE))
1650              {
1651              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
1652                {
1653                active_count--;           /* Remove non-match possibility */
1654                next_active_state--;
1655                }
1656              if (++count >= GET2(code, 1))
1657                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1658              else
1659                { ADD_NEW_DATA(-state_offset, count, 0); }
1660              }
1661            }
1662          break;
1663    
1664  /* ========================================================================== */  /* ========================================================================== */
1665        /* These opcodes are followed by a character that is usually compared        /* These opcodes are followed by a character that is usually compared
1666        to the current subject character; it is loaded into d. We still get        to the current subject character; it is loaded into d. We still get
# Line 1451  for (;;) Line 1760  for (;;)
1760        break;        break;
1761    
1762        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1763          case OP_NOT_VSPACE:
1764          if (clen > 0) switch(c)
1765            {
1766            case 0x000a:
1767            case 0x000b:
1768            case 0x000c:
1769            case 0x000d:
1770            case 0x0085:
1771            case 0x2028:
1772            case 0x2029:
1773            break;
1774    
1775            default:
1776            ADD_NEW(state_offset + 1, 0);
1777            break;
1778            }
1779          break;
1780    
1781          /*-----------------------------------------------------------------*/
1782          case OP_VSPACE:
1783          if (clen > 0) switch(c)
1784            {
1785            case 0x000a:
1786            case 0x000b:
1787            case 0x000c:
1788            case 0x000d:
1789            case 0x0085:
1790            case 0x2028:
1791            case 0x2029:
1792            ADD_NEW(state_offset + 1, 0);
1793            break;
1794    
1795            default: break;
1796            }
1797          break;
1798    
1799          /*-----------------------------------------------------------------*/
1800          case OP_NOT_HSPACE:
1801          if (clen > 0) switch(c)
1802            {
1803            case 0x09:      /* HT */
1804            case 0x20:      /* SPACE */
1805            case 0xa0:      /* NBSP */
1806            case 0x1680:    /* OGHAM SPACE MARK */
1807            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1808            case 0x2000:    /* EN QUAD */
1809            case 0x2001:    /* EM QUAD */
1810            case 0x2002:    /* EN SPACE */
1811            case 0x2003:    /* EM SPACE */
1812            case 0x2004:    /* THREE-PER-EM SPACE */
1813            case 0x2005:    /* FOUR-PER-EM SPACE */
1814            case 0x2006:    /* SIX-PER-EM SPACE */
1815            case 0x2007:    /* FIGURE SPACE */
1816            case 0x2008:    /* PUNCTUATION SPACE */
1817            case 0x2009:    /* THIN SPACE */
1818            case 0x200A:    /* HAIR SPACE */
1819            case 0x202f:    /* NARROW NO-BREAK SPACE */
1820            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1821            case 0x3000:    /* IDEOGRAPHIC SPACE */
1822            break;
1823    
1824            default:
1825            ADD_NEW(state_offset + 1, 0);
1826            break;
1827            }
1828          break;
1829    
1830          /*-----------------------------------------------------------------*/
1831          case OP_HSPACE:
1832          if (clen > 0) switch(c)
1833            {
1834            case 0x09:      /* HT */
1835            case 0x20:      /* SPACE */
1836            case 0xa0:      /* NBSP */
1837            case 0x1680:    /* OGHAM SPACE MARK */
1838            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1839            case 0x2000:    /* EN QUAD */
1840            case 0x2001:    /* EM QUAD */
1841            case 0x2002:    /* EN SPACE */
1842            case 0x2003:    /* EM SPACE */
1843            case 0x2004:    /* THREE-PER-EM SPACE */
1844            case 0x2005:    /* FOUR-PER-EM SPACE */
1845            case 0x2006:    /* SIX-PER-EM SPACE */
1846            case 0x2007:    /* FIGURE SPACE */
1847            case 0x2008:    /* PUNCTUATION SPACE */
1848            case 0x2009:    /* THIN SPACE */
1849            case 0x200A:    /* HAIR SPACE */
1850            case 0x202f:    /* NARROW NO-BREAK SPACE */
1851            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1852            case 0x3000:    /* IDEOGRAPHIC SPACE */
1853            ADD_NEW(state_offset + 1, 0);
1854            break;
1855            }
1856          break;
1857    
1858          /*-----------------------------------------------------------------*/
1859        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character. This is only used for one-byte
1860        characters, that is, we know that d < 256. The character we are        characters, that is, we know that d < 256. The character we are
1861        checking (c) can be multibyte. */        checking (c) can be multibyte. */
# Line 2073  Returns: > 0 => number of match Line 2478  Returns: > 0 => number of match
2478                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
2479  */  */
2480    
2481  PCRE_DATA_SCOPE int  PCRE_EXP_DEFN int
2482  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
2483    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
2484    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
# Line 2175  switch ((((options & PCRE_NEWLINE_BITS) Line 2580  switch ((((options & PCRE_NEWLINE_BITS)
2580    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
2581         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
2582    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
2583      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
2584    default: return PCRE_ERROR_BADNEWLINE;    default: return PCRE_ERROR_BADNEWLINE;
2585    }    }
2586    
2587  if (newline < 0)  if (newline == -2)
2588      {
2589      md->nltype = NLTYPE_ANYCRLF;
2590      }
2591    else if (newline < 0)
2592    {    {
2593    md->nltype = NLTYPE_ANY;    md->nltype = NLTYPE_ANY;
2594    }    }
# Line 2309  for (;;) Line 2719  for (;;)
2719          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
2720            current_subject++;            current_subject++;
2721    
2722          /* If we have just passed a CR and the newline option is ANY, and we          /* If we have just passed a CR and the newline option is ANY or
2723          are now at a LF, advance the match position by one more character. */          ANYCRLF, and we are now at a LF, advance the match position by one more
2724            character. */
2725    
2726          if (current_subject[-1] == '\r' &&          if (current_subject[-1] == '\r' &&
2727               md->nltype == NLTYPE_ANY &&               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
2728               current_subject < end_subject &&               current_subject < end_subject &&
2729               *current_subject == '\n')               *current_subject == '\n')
2730            current_subject++;            current_subject++;
# Line 2425  for (;;) Line 2836  for (;;)
2836      }      }
2837    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2838    
2839    /* If we have just passed a CR and the newline option is CRLF or ANY, and we    /* If we have just passed a CR and the newline option is CRLF or ANY or
2840    are now at a LF, advance the match position by one more character. */    ANYCRLF, and we are now at a LF, advance the match position by one more
2841      character. */
2842    
2843    if (current_subject[-1] == '\r' &&    if (current_subject[-1] == '\r' &&
2844         (md->nltype == NLTYPE_ANY || md->nllen == 2) &&         (md->nltype == NLTYPE_ANY ||
2845            md->nltype == NLTYPE_ANYCRLF ||
2846            md->nllen == 2) &&
2847         current_subject < end_subject &&         current_subject < end_subject &&
2848         *current_subject == '\n')         *current_subject == '\n')
2849      current_subject++;      current_subject++;

Legend:
Removed from v.144  
changed lines
  Added in v.182

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12