/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 97 by ph10, Mon Mar 5 12:36:47 2007 UTC revision 227 by ph10, Tue Aug 21 15:00:15 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 44  FSM). This is NOT Perl- compatible, but Line 44  FSM). This is NOT Perl- compatible, but
44  applications. */  applications. */
45    
46    
47    #ifdef HAVE_CONFIG_H
48    #include <config.h>
49    #endif
50    
51  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
52  #define PSSTART start_subject  /* Field containing processed string start */  #define PSSTART start_subject  /* Field containing processed string start */
53  #define PSEND   end_subject    /* Field containing processed string end */  #define PSEND   end_subject    /* Field containing processed string end */
# Line 63  applications. */ Line 67  applications. */
67    
68  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
69  into others, under special conditions. A gap of 20 between the blocks should be  into others, under special conditions. A gap of 20 between the blocks should be
70  enough. */  enough. The resulting opcodes don't have to be less than 256 because they are
71    never stored, so we push them well clear of the normal opcodes. */
72    
73  #define OP_PROP_EXTRA 100  #define OP_PROP_EXTRA       300
74  #define OP_EXTUNI_EXTRA 120  #define OP_EXTUNI_EXTRA     320
75  #define OP_ANYNL_EXTRA 140  #define OP_ANYNL_EXTRA      340
76    #define OP_HSPACE_EXTRA     360
77    #define OP_VSPACE_EXTRA     380
78    
79    
80  /* This table identifies those opcodes that are followed immediately by a  /* This table identifies those opcodes that are followed immediately by a
81  character that is to be tested in some way. This makes is possible to  character that is to be tested in some way. This makes is possible to
82  centralize the loading of these characters. In the case of Type * etc, the  centralize the loading of these characters. In the case of Type * etc, the
83  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
84  small value. */  small value. ***NOTE*** If the start of this table is modified, the two tables
85    that follow must also be modified. */
86    
87  static uschar coptable[] = {  static uschar coptable[] = {
88    0,                             /* End                                    */    0,                             /* End                                    */
89    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
90      0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
91    0, 0,                          /* Any, Anybyte                           */    0, 0,                          /* Any, Anybyte                           */
92    0, 0, 0, 0,                    /* NOTPROP, PROP, EXTUNI, ANYNL           */    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */
93      0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
94    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
95    1,                             /* Char                                   */    1,                             /* Char                                   */
96    1,                             /* Charnc                                 */    1,                             /* Charnc                                 */
# Line 120  static uschar coptable[] = { Line 130  static uschar coptable[] = {
130    0,                             /* CREF                                   */    0,                             /* CREF                                   */
131    0,                             /* RREF                                   */    0,                             /* RREF                                   */
132    0,                             /* DEF                                    */    0,                             /* DEF                                    */
133    0, 0                           /* BRAZERO, BRAMINZERO                    */    0, 0,                          /* BRAZERO, BRAMINZERO                    */
134      0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
135      0, 0                           /* FAIL, ACCEPT                           */
136  };  };
137    
138  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
139  and \w */  and \w */
140    
141  static uschar toptable1[] = {  static uschar toptable1[] = {
142    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
143    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
144    ctype_space, ctype_space,    ctype_space, ctype_space,
145    ctype_word,  ctype_word,    ctype_word,  ctype_word,
# Line 135  static uschar toptable1[] = { Line 147  static uschar toptable1[] = {
147  };  };
148    
149  static uschar toptable2[] = {  static uschar toptable2[] = {
150    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
151    ctype_digit, 0,    ctype_digit, 0,
152    ctype_space, 0,    ctype_space, 0,
153    ctype_word,  0,    ctype_word,  0,
# Line 500  for (;;) Line 512  for (;;)
512      const uschar *code;      const uschar *code;
513      int state_offset = current_state->offset;      int state_offset = current_state->offset;
514      int count, codevalue;      int count, codevalue;
515    #ifdef SUPPORT_UCP
516      int chartype, script;      int chartype, script;
517    #endif
518    
519  #ifdef DEBUG  #ifdef DEBUG
520      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
# Line 555  for (;;) Line 569  for (;;)
569      permitted.      permitted.
570    
571      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
572      argument that is not a data character - but is always one byte long.      argument that is not a data character - but is always one byte long. We
573      Unfortunately, we have to take special action to deal with  \P, \p, and      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in
574      \X in this case. To keep the other cases fast, convert these ones to new      this case. To keep the other cases fast, convert these ones to new opcodes.
575      opcodes. */      */
576    
577      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
578        {        {
# Line 576  for (;;) Line 590  for (;;)
590            case OP_PROP: codevalue += OP_PROP_EXTRA; break;            case OP_PROP: codevalue += OP_PROP_EXTRA; break;
591            case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;            case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
592            case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;            case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
593              case OP_NOT_HSPACE:
594              case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
595              case OP_NOT_VSPACE:
596              case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
597            default: break;            default: break;
598            }            }
599          }          }
# Line 783  for (;;) Line 801  for (;;)
801        break;        break;
802    
803    
 #ifdef SUPPORT_UCP  
   
804        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
805        /* Check the next character by Unicode property. We will get here only        /* Check the next character by Unicode property. We will get here only
806        if the support is in the binary; otherwise a compile-time error occurs.        if the support is in the binary; otherwise a compile-time error occurs.
807        */        */
808    
809    #ifdef SUPPORT_UCP
810        case OP_PROP:        case OP_PROP:
811        case OP_NOTPROP:        case OP_NOTPROP:
812        if (clen > 0)        if (clen > 0)
# Line 970  for (;;) Line 987  for (;;)
987        argument. It keeps the code above fast for the other cases. The argument        argument. It keeps the code above fast for the other cases. The argument
988        is in the d variable. */        is in the d variable. */
989    
990    #ifdef SUPPORT_UCP
991        case OP_PROP_EXTRA + OP_TYPEPLUS:        case OP_PROP_EXTRA + OP_TYPEPLUS:
992        case OP_PROP_EXTRA + OP_TYPEMINPLUS:        case OP_PROP_EXTRA + OP_TYPEMINPLUS:
993        case OP_PROP_EXTRA + OP_TYPEPOSPLUS:        case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
# Line 1049  for (;;) Line 1067  for (;;)
1067          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
1068          }          }
1069        break;        break;
1070    #endif
1071    
1072        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1073        case OP_ANYNL_EXTRA + OP_TYPEPLUS:        case OP_ANYNL_EXTRA + OP_TYPEPLUS:
# Line 1085  for (;;) Line 1104  for (;;)
1104        break;        break;
1105    
1106        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1107          case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1108          case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1109          case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1110          count = current_state->count;  /* Already matched */
1111          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1112          if (clen > 0)
1113            {
1114            BOOL OK;
1115            switch (c)
1116              {
1117              case 0x000a:
1118              case 0x000b:
1119              case 0x000c:
1120              case 0x000d:
1121              case 0x0085:
1122              case 0x2028:
1123              case 0x2029:
1124              OK = TRUE;
1125              break;
1126    
1127              default:
1128              OK = FALSE;
1129              break;
1130              }
1131    
1132            if (OK == (d == OP_VSPACE))
1133              {
1134              if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1135                {
1136                active_count--;           /* Remove non-match possibility */
1137                next_active_state--;
1138                }
1139              count++;
1140              ADD_NEW_DATA(-state_offset, count, 0);
1141              }
1142            }
1143          break;
1144    
1145          /*-----------------------------------------------------------------*/
1146          case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1147          case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1148          case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1149          count = current_state->count;  /* Already matched */
1150          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1151          if (clen > 0)
1152            {
1153            BOOL OK;
1154            switch (c)
1155              {
1156              case 0x09:      /* HT */
1157              case 0x20:      /* SPACE */
1158              case 0xa0:      /* NBSP */
1159              case 0x1680:    /* OGHAM SPACE MARK */
1160              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1161              case 0x2000:    /* EN QUAD */
1162              case 0x2001:    /* EM QUAD */
1163              case 0x2002:    /* EN SPACE */
1164              case 0x2003:    /* EM SPACE */
1165              case 0x2004:    /* THREE-PER-EM SPACE */
1166              case 0x2005:    /* FOUR-PER-EM SPACE */
1167              case 0x2006:    /* SIX-PER-EM SPACE */
1168              case 0x2007:    /* FIGURE SPACE */
1169              case 0x2008:    /* PUNCTUATION SPACE */
1170              case 0x2009:    /* THIN SPACE */
1171              case 0x200A:    /* HAIR SPACE */
1172              case 0x202f:    /* NARROW NO-BREAK SPACE */
1173              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1174              case 0x3000:    /* IDEOGRAPHIC SPACE */
1175              OK = TRUE;
1176              break;
1177    
1178              default:
1179              OK = FALSE;
1180              break;
1181              }
1182    
1183            if (OK == (d == OP_HSPACE))
1184              {
1185              if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1186                {
1187                active_count--;           /* Remove non-match possibility */
1188                next_active_state--;
1189                }
1190              count++;
1191              ADD_NEW_DATA(-state_offset, count, 0);
1192              }
1193            }
1194          break;
1195    
1196          /*-----------------------------------------------------------------*/
1197    #ifdef SUPPORT_UCP
1198        case OP_PROP_EXTRA + OP_TYPEQUERY:        case OP_PROP_EXTRA + OP_TYPEQUERY:
1199        case OP_PROP_EXTRA + OP_TYPEMINQUERY:        case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1200        case OP_PROP_EXTRA + OP_TYPEPOSQUERY:        case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
# Line 1182  for (;;) Line 1292  for (;;)
1292          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1293          }          }
1294        break;        break;
1295    #endif
1296    
1297        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1298        case OP_ANYNL_EXTRA + OP_TYPEQUERY:        case OP_ANYNL_EXTRA + OP_TYPEQUERY:
# Line 1226  for (;;) Line 1337  for (;;)
1337        break;        break;
1338    
1339        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1340          case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1341          case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1342          case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1343          count = 2;
1344          goto QS4;
1345    
1346          case OP_VSPACE_EXTRA + OP_TYPESTAR:
1347          case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1348          case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1349          count = 0;
1350    
1351          QS4:
1352          ADD_ACTIVE(state_offset + 2, 0);
1353          if (clen > 0)
1354            {
1355            BOOL OK;
1356            switch (c)
1357              {
1358              case 0x000a:
1359              case 0x000b:
1360              case 0x000c:
1361              case 0x000d:
1362              case 0x0085:
1363              case 0x2028:
1364              case 0x2029:
1365              OK = TRUE;
1366              break;
1367    
1368              default:
1369              OK = FALSE;
1370              break;
1371              }
1372            if (OK == (d == OP_VSPACE))
1373              {
1374              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1375                  codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1376                {
1377                active_count--;           /* Remove non-match possibility */
1378                next_active_state--;
1379                }
1380              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1381              }
1382            }
1383          break;
1384    
1385          /*-----------------------------------------------------------------*/
1386          case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1387          case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1388          case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1389          count = 2;
1390          goto QS5;
1391    
1392          case OP_HSPACE_EXTRA + OP_TYPESTAR:
1393          case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1394          case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1395          count = 0;
1396    
1397          QS5:
1398          ADD_ACTIVE(state_offset + 2, 0);
1399          if (clen > 0)
1400            {
1401            BOOL OK;
1402            switch (c)
1403              {
1404              case 0x09:      /* HT */
1405              case 0x20:      /* SPACE */
1406              case 0xa0:      /* NBSP */
1407              case 0x1680:    /* OGHAM SPACE MARK */
1408              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1409              case 0x2000:    /* EN QUAD */
1410              case 0x2001:    /* EM QUAD */
1411              case 0x2002:    /* EN SPACE */
1412              case 0x2003:    /* EM SPACE */
1413              case 0x2004:    /* THREE-PER-EM SPACE */
1414              case 0x2005:    /* FOUR-PER-EM SPACE */
1415              case 0x2006:    /* SIX-PER-EM SPACE */
1416              case 0x2007:    /* FIGURE SPACE */
1417              case 0x2008:    /* PUNCTUATION SPACE */
1418              case 0x2009:    /* THIN SPACE */
1419              case 0x200A:    /* HAIR SPACE */
1420              case 0x202f:    /* NARROW NO-BREAK SPACE */
1421              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1422              case 0x3000:    /* IDEOGRAPHIC SPACE */
1423              OK = TRUE;
1424              break;
1425    
1426              default:
1427              OK = FALSE;
1428              break;
1429              }
1430    
1431            if (OK == (d == OP_HSPACE))
1432              {
1433              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1434                  codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1435                {
1436                active_count--;           /* Remove non-match possibility */
1437                next_active_state--;
1438                }
1439              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1440              }
1441            }
1442          break;
1443    
1444          /*-----------------------------------------------------------------*/
1445    #ifdef SUPPORT_UCP
1446        case OP_PROP_EXTRA + OP_TYPEEXACT:        case OP_PROP_EXTRA + OP_TYPEEXACT:
1447        case OP_PROP_EXTRA + OP_TYPEUPTO:        case OP_PROP_EXTRA + OP_TYPEUPTO:
1448        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
# Line 1313  for (;;) Line 1530  for (;;)
1530            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1531          }          }
1532        break;        break;
1533    #endif
1534    
1535        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1536        case OP_ANYNL_EXTRA + OP_TYPEEXACT:        case OP_ANYNL_EXTRA + OP_TYPEEXACT:
# Line 1352  for (;;) Line 1570  for (;;)
1570          }          }
1571        break;        break;
1572    
1573          /*-----------------------------------------------------------------*/
1574          case OP_VSPACE_EXTRA + OP_TYPEEXACT:
1575          case OP_VSPACE_EXTRA + OP_TYPEUPTO:
1576          case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1577          case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1578          if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1579            { ADD_ACTIVE(state_offset + 4, 0); }
1580          count = current_state->count;  /* Number already matched */
1581          if (clen > 0)
1582            {
1583            BOOL OK;
1584            switch (c)
1585              {
1586              case 0x000a:
1587              case 0x000b:
1588              case 0x000c:
1589              case 0x000d:
1590              case 0x0085:
1591              case 0x2028:
1592              case 0x2029:
1593              OK = TRUE;
1594              break;
1595    
1596              default:
1597              OK = FALSE;
1598              }
1599    
1600            if (OK == (d == OP_VSPACE))
1601              {
1602              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
1603                {
1604                active_count--;           /* Remove non-match possibility */
1605                next_active_state--;
1606                }
1607              if (++count >= GET2(code, 1))
1608                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1609              else
1610                { ADD_NEW_DATA(-state_offset, count, 0); }
1611              }
1612            }
1613          break;
1614    
1615          /*-----------------------------------------------------------------*/
1616          case OP_HSPACE_EXTRA + OP_TYPEEXACT:
1617          case OP_HSPACE_EXTRA + OP_TYPEUPTO:
1618          case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
1619          case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
1620          if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
1621            { ADD_ACTIVE(state_offset + 4, 0); }
1622          count = current_state->count;  /* Number already matched */
1623          if (clen > 0)
1624            {
1625            BOOL OK;
1626            switch (c)
1627              {
1628              case 0x09:      /* HT */
1629              case 0x20:      /* SPACE */
1630              case 0xa0:      /* NBSP */
1631              case 0x1680:    /* OGHAM SPACE MARK */
1632              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1633              case 0x2000:    /* EN QUAD */
1634              case 0x2001:    /* EM QUAD */
1635              case 0x2002:    /* EN SPACE */
1636              case 0x2003:    /* EM SPACE */
1637              case 0x2004:    /* THREE-PER-EM SPACE */
1638              case 0x2005:    /* FOUR-PER-EM SPACE */
1639              case 0x2006:    /* SIX-PER-EM SPACE */
1640              case 0x2007:    /* FIGURE SPACE */
1641              case 0x2008:    /* PUNCTUATION SPACE */
1642              case 0x2009:    /* THIN SPACE */
1643              case 0x200A:    /* HAIR SPACE */
1644              case 0x202f:    /* NARROW NO-BREAK SPACE */
1645              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1646              case 0x3000:    /* IDEOGRAPHIC SPACE */
1647              OK = TRUE;
1648              break;
1649    
1650              default:
1651              OK = FALSE;
1652              break;
1653              }
1654    
1655            if (OK == (d == OP_HSPACE))
1656              {
1657              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
1658                {
1659                active_count--;           /* Remove non-match possibility */
1660                next_active_state--;
1661                }
1662              if (++count >= GET2(code, 1))
1663                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1664              else
1665                { ADD_NEW_DATA(-state_offset, count, 0); }
1666              }
1667            }
1668          break;
1669    
1670  /* ========================================================================== */  /* ========================================================================== */
1671        /* These opcodes are followed by a character that is usually compared        /* These opcodes are followed by a character that is usually compared
1672        to the current subject character; it is loaded into d. We still get        to the current subject character; it is loaded into d. We still get
# Line 1451  for (;;) Line 1766  for (;;)
1766        break;        break;
1767    
1768        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1769          case OP_NOT_VSPACE:
1770          if (clen > 0) switch(c)
1771            {
1772            case 0x000a:
1773            case 0x000b:
1774            case 0x000c:
1775            case 0x000d:
1776            case 0x0085:
1777            case 0x2028:
1778            case 0x2029:
1779            break;
1780    
1781            default:
1782            ADD_NEW(state_offset + 1, 0);
1783            break;
1784            }
1785          break;
1786    
1787          /*-----------------------------------------------------------------*/
1788          case OP_VSPACE:
1789          if (clen > 0) switch(c)
1790            {
1791            case 0x000a:
1792            case 0x000b:
1793            case 0x000c:
1794            case 0x000d:
1795            case 0x0085:
1796            case 0x2028:
1797            case 0x2029:
1798            ADD_NEW(state_offset + 1, 0);
1799            break;
1800    
1801            default: break;
1802            }
1803          break;
1804    
1805          /*-----------------------------------------------------------------*/
1806          case OP_NOT_HSPACE:
1807          if (clen > 0) switch(c)
1808            {
1809            case 0x09:      /* HT */
1810            case 0x20:      /* SPACE */
1811            case 0xa0:      /* NBSP */
1812            case 0x1680:    /* OGHAM SPACE MARK */
1813            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1814            case 0x2000:    /* EN QUAD */
1815            case 0x2001:    /* EM QUAD */
1816            case 0x2002:    /* EN SPACE */
1817            case 0x2003:    /* EM SPACE */
1818            case 0x2004:    /* THREE-PER-EM SPACE */
1819            case 0x2005:    /* FOUR-PER-EM SPACE */
1820            case 0x2006:    /* SIX-PER-EM SPACE */
1821            case 0x2007:    /* FIGURE SPACE */
1822            case 0x2008:    /* PUNCTUATION SPACE */
1823            case 0x2009:    /* THIN SPACE */
1824            case 0x200A:    /* HAIR SPACE */
1825            case 0x202f:    /* NARROW NO-BREAK SPACE */
1826            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1827            case 0x3000:    /* IDEOGRAPHIC SPACE */
1828            break;
1829    
1830            default:
1831            ADD_NEW(state_offset + 1, 0);
1832            break;
1833            }
1834          break;
1835    
1836          /*-----------------------------------------------------------------*/
1837          case OP_HSPACE:
1838          if (clen > 0) switch(c)
1839            {
1840            case 0x09:      /* HT */
1841            case 0x20:      /* SPACE */
1842            case 0xa0:      /* NBSP */
1843            case 0x1680:    /* OGHAM SPACE MARK */
1844            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1845            case 0x2000:    /* EN QUAD */
1846            case 0x2001:    /* EM QUAD */
1847            case 0x2002:    /* EN SPACE */
1848            case 0x2003:    /* EM SPACE */
1849            case 0x2004:    /* THREE-PER-EM SPACE */
1850            case 0x2005:    /* FOUR-PER-EM SPACE */
1851            case 0x2006:    /* SIX-PER-EM SPACE */
1852            case 0x2007:    /* FIGURE SPACE */
1853            case 0x2008:    /* PUNCTUATION SPACE */
1854            case 0x2009:    /* THIN SPACE */
1855            case 0x200A:    /* HAIR SPACE */
1856            case 0x202f:    /* NARROW NO-BREAK SPACE */
1857            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1858            case 0x3000:    /* IDEOGRAPHIC SPACE */
1859            ADD_NEW(state_offset + 1, 0);
1860            break;
1861            }
1862          break;
1863    
1864          /*-----------------------------------------------------------------*/
1865        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character. This is only used for one-byte
1866        characters, that is, we know that d < 256. The character we are        characters, that is, we know that d < 256. The character we are
1867        checking (c) can be multibyte. */        checking (c) can be multibyte. */
# Line 2073  Returns: > 0 => number of match Line 2484  Returns: > 0 => number of match
2484                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
2485  */  */
2486    
2487  PCRE_DATA_SCOPE int  PCRE_EXP_DEFN int
2488  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
2489    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
2490    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
# Line 2166  md->poptions = re->options; Line 2577  md->poptions = re->options;
2577  /* Handle different types of newline. The three bits give eight cases. If  /* Handle different types of newline. The three bits give eight cases. If
2578  nothing is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
2579    
2580  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
2581           PCRE_NEWLINE_BITS)           PCRE_NEWLINE_BITS)
2582    {    {
2583    case 0: newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
# Line 2175  switch ((((options & PCRE_NEWLINE_BITS) Line 2586  switch ((((options & PCRE_NEWLINE_BITS)
2586    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
2587         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
2588    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
2589      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
2590    default: return PCRE_ERROR_BADNEWLINE;    default: return PCRE_ERROR_BADNEWLINE;
2591    }    }
2592    
2593  if (newline < 0)  if (newline == -2)
2594      {
2595      md->nltype = NLTYPE_ANYCRLF;
2596      }
2597    else if (newline < 0)
2598    {    {
2599    md->nltype = NLTYPE_ANY;    md->nltype = NLTYPE_ANY;
2600    }    }
# Line 2308  for (;;) Line 2724  for (;;)
2724          {          {
2725          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
2726            current_subject++;            current_subject++;
2727    
2728            /* If we have just passed a CR and the newline option is ANY or
2729            ANYCRLF, and we are now at a LF, advance the match position by one more
2730            character. */
2731    
2732            if (current_subject[-1] == '\r' &&
2733                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
2734                 current_subject < end_subject &&
2735                 *current_subject == '\n')
2736              current_subject++;
2737          }          }
2738        }        }
2739    
# Line 2416  for (;;) Line 2842  for (;;)
2842      }      }
2843    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2844    
2845    /* If we have just passed a CR and the newline option is CRLF or ANY, and we    /* If we have just passed a CR and we are now at a LF, and the pattern does
2846    are now at a LF, advance the match position by one more character. */    not contain any explicit matches for \r or \n, and the newline option is CRLF
2847      or ANY or ANYCRLF, advance the match position by one more character. */
2848    
2849    if (current_subject[-1] == '\r' &&    if (current_subject[-1] == '\r' &&
2850         (md->nltype == NLTYPE_ANY || md->nllen == 2) &&        current_subject < end_subject &&
2851         current_subject < end_subject &&        *current_subject == '\n' &&
2852         *current_subject == '\n')        (re->options & PCRE_HASCRORLF) == 0 &&
2853            (md->nltype == NLTYPE_ANY ||
2854             md->nltype == NLTYPE_ANYCRLF ||
2855             md->nllen == 2))
2856      current_subject++;      current_subject++;
2857    
2858    }   /* "Bumpalong" loop */    }   /* "Bumpalong" loop */

Legend:
Removed from v.97  
changed lines
  Added in v.227

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12