/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 97 by ph10, Mon Mar 5 12:36:47 2007 UTC revision 168 by ph10, Tue May 29 15:18:18 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 74  enough. */ Line 74  enough. */
74  character that is to be tested in some way. This makes is possible to  character that is to be tested in some way. This makes is possible to
75  centralize the loading of these characters. In the case of Type * etc, the  centralize the loading of these characters. In the case of Type * etc, the
76  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
77  small value. */  small value. ***NOTE*** If the start of this table is modified, the two tables
78    that follow must also be modified. */
79    
80  static uschar coptable[] = {  static uschar coptable[] = {
81    0,                             /* End                                    */    0,                             /* End                                    */
82    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
83      0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
84    0, 0,                          /* Any, Anybyte                           */    0, 0,                          /* Any, Anybyte                           */
85    0, 0, 0, 0,                    /* NOTPROP, PROP, EXTUNI, ANYNL           */    0, 0, 0, 0,                    /* NOTPROP, PROP, EXTUNI, ANYNL           */
86    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
# Line 127  static uschar coptable[] = { Line 129  static uschar coptable[] = {
129  and \w */  and \w */
130    
131  static uschar toptable1[] = {  static uschar toptable1[] = {
132    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
133    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
134    ctype_space, ctype_space,    ctype_space, ctype_space,
135    ctype_word,  ctype_word,    ctype_word,  ctype_word,
# Line 135  static uschar toptable1[] = { Line 137  static uschar toptable1[] = {
137  };  };
138    
139  static uschar toptable2[] = {  static uschar toptable2[] = {
140    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
141    ctype_digit, 0,    ctype_digit, 0,
142    ctype_space, 0,    ctype_space, 0,
143    ctype_word,  0,    ctype_word,  0,
# Line 500  for (;;) Line 502  for (;;)
502      const uschar *code;      const uschar *code;
503      int state_offset = current_state->offset;      int state_offset = current_state->offset;
504      int count, codevalue;      int count, codevalue;
505    #ifdef SUPPORT_UCP
506      int chartype, script;      int chartype, script;
507    #endif
508    
509  #ifdef DEBUG  #ifdef DEBUG
510      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
# Line 783  for (;;) Line 787  for (;;)
787        break;        break;
788    
789    
 #ifdef SUPPORT_UCP  
   
790        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
791        /* Check the next character by Unicode property. We will get here only        /* Check the next character by Unicode property. We will get here only
792        if the support is in the binary; otherwise a compile-time error occurs.        if the support is in the binary; otherwise a compile-time error occurs.
793        */        */
794    
795    #ifdef SUPPORT_UCP
796        case OP_PROP:        case OP_PROP:
797        case OP_NOTPROP:        case OP_NOTPROP:
798        if (clen > 0)        if (clen > 0)
# Line 970  for (;;) Line 973  for (;;)
973        argument. It keeps the code above fast for the other cases. The argument        argument. It keeps the code above fast for the other cases. The argument
974        is in the d variable. */        is in the d variable. */
975    
976    #ifdef SUPPORT_UCP
977        case OP_PROP_EXTRA + OP_TYPEPLUS:        case OP_PROP_EXTRA + OP_TYPEPLUS:
978        case OP_PROP_EXTRA + OP_TYPEMINPLUS:        case OP_PROP_EXTRA + OP_TYPEMINPLUS:
979        case OP_PROP_EXTRA + OP_TYPEPOSPLUS:        case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
# Line 1049  for (;;) Line 1053  for (;;)
1053          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
1054          }          }
1055        break;        break;
1056    #endif
1057    
1058        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1059        case OP_ANYNL_EXTRA + OP_TYPEPLUS:        case OP_ANYNL_EXTRA + OP_TYPEPLUS:
# Line 1085  for (;;) Line 1090  for (;;)
1090        break;        break;
1091    
1092        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1093    #ifdef SUPPORT_UCP
1094        case OP_PROP_EXTRA + OP_TYPEQUERY:        case OP_PROP_EXTRA + OP_TYPEQUERY:
1095        case OP_PROP_EXTRA + OP_TYPEMINQUERY:        case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1096        case OP_PROP_EXTRA + OP_TYPEPOSQUERY:        case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
# Line 1182  for (;;) Line 1188  for (;;)
1188          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1189          }          }
1190        break;        break;
1191    #endif
1192    
1193        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1194        case OP_ANYNL_EXTRA + OP_TYPEQUERY:        case OP_ANYNL_EXTRA + OP_TYPEQUERY:
# Line 1226  for (;;) Line 1233  for (;;)
1233        break;        break;
1234    
1235        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1236    #ifdef SUPPORT_UCP
1237        case OP_PROP_EXTRA + OP_TYPEEXACT:        case OP_PROP_EXTRA + OP_TYPEEXACT:
1238        case OP_PROP_EXTRA + OP_TYPEUPTO:        case OP_PROP_EXTRA + OP_TYPEUPTO:
1239        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
# Line 1313  for (;;) Line 1321  for (;;)
1321            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1322          }          }
1323        break;        break;
1324    #endif
1325    
1326        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1327        case OP_ANYNL_EXTRA + OP_TYPEEXACT:        case OP_ANYNL_EXTRA + OP_TYPEEXACT:
# Line 2073  Returns: > 0 => number of match Line 2082  Returns: > 0 => number of match
2082                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
2083  */  */
2084    
2085  PCRE_DATA_SCOPE int  PCRE_EXP_DEFN int
2086  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
2087    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
2088    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
# Line 2166  md->poptions = re->options; Line 2175  md->poptions = re->options;
2175  /* Handle different types of newline. The three bits give eight cases. If  /* Handle different types of newline. The three bits give eight cases. If
2176  nothing is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
2177    
2178  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
2179           PCRE_NEWLINE_BITS)           PCRE_NEWLINE_BITS)
2180    {    {
2181    case 0: newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
# Line 2175  switch ((((options & PCRE_NEWLINE_BITS) Line 2184  switch ((((options & PCRE_NEWLINE_BITS)
2184    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
2185         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
2186    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
2187      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
2188    default: return PCRE_ERROR_BADNEWLINE;    default: return PCRE_ERROR_BADNEWLINE;
2189    }    }
2190    
2191  if (newline < 0)  if (newline == -2)
2192      {
2193      md->nltype = NLTYPE_ANYCRLF;
2194      }
2195    else if (newline < 0)
2196    {    {
2197    md->nltype = NLTYPE_ANY;    md->nltype = NLTYPE_ANY;
2198    }    }
# Line 2308  for (;;) Line 2322  for (;;)
2322          {          {
2323          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
2324            current_subject++;            current_subject++;
2325    
2326            /* If we have just passed a CR and the newline option is ANY or
2327            ANYCRLF, and we are now at a LF, advance the match position by one more
2328            character. */
2329    
2330            if (current_subject[-1] == '\r' &&
2331                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
2332                 current_subject < end_subject &&
2333                 *current_subject == '\n')
2334              current_subject++;
2335          }          }
2336        }        }
2337    
# Line 2416  for (;;) Line 2440  for (;;)
2440      }      }
2441    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2442    
2443    /* If we have just passed a CR and the newline option is CRLF or ANY, and we    /* If we have just passed a CR and the newline option is CRLF or ANY or
2444    are now at a LF, advance the match position by one more character. */    ANYCRLF, and we are now at a LF, advance the match position by one more
2445      character. */
2446    
2447    if (current_subject[-1] == '\r' &&    if (current_subject[-1] == '\r' &&
2448         (md->nltype == NLTYPE_ANY || md->nllen == 2) &&         (md->nltype == NLTYPE_ANY ||
2449            md->nltype == NLTYPE_ANYCRLF ||
2450            md->nllen == 2) &&
2451         current_subject < end_subject &&         current_subject < end_subject &&
2452         *current_subject == '\n')         *current_subject == '\n')
2453      current_subject++;      current_subject++;

Legend:
Removed from v.97  
changed lines
  Added in v.168

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12