/[pcre]/code/trunk/pcrecpp.h.in
ViewVC logotype

Diff of /code/trunk/pcrecpp.h.in

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 80 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 81 by nigel, Sat Feb 24 21:40:59 2007 UTC
# Line 28  Line 28 
28  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  //  //
30  // Author: Sanjay Ghemawat  // Author: Sanjay Ghemawat
31    // Support for PCRE_XXX modifiers added by Giuseppe Maxia, July 2005
32    
33  #ifndef _PCRE_REGEXP_H  #ifndef _PCRE_REGEXP_H
34  #define _PCRE_REGEXP_H  #define _PCRE_REGEXP_H
# Line 159  Line 160 
160  //       --enable-utf8 flag.  //       --enable-utf8 flag.
161  //  //
162  // -----------------------------------------------------------------------  // -----------------------------------------------------------------------
163    // PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE
164    //
165    // PCRE defines some modifiers to change the behavior of the regular
166    // expression engine.
167    // The C++ wrapper defines an auxiliary class, RE_Options, as a vehicle
168    // to pass such modifiers to a RE class.
169    //
170    // Currently, the following modifiers are supported
171    //
172    //    modifier              description               Perl corresponding
173    //
174    //    PCRE_CASELESS         case insensitive match    /i
175    //    PCRE_MULTILINE        multiple lines match      /m
176    //    PCRE_DOTALL           dot matches newlines      /s
177    //    PCRE_DOLLAR_ENDONLY   $ matches only at end     N/A
178    //    PCRE_EXTRA            strict escape parsing     N/A
179    //    PCRE_EXTENDED         ignore whitespaces        /x
180    //    PCRE_UTF8             handles UTF8 chars        built-in
181    //    PCRE_UNGREEDY         reverses * and *?         N/A
182    //    PCRE_NO_AUTO_CAPTURE  disables matching parens  N/A (*)
183    //
184    // (For a full account on how each modifier works, please check the
185    // PCRE API reference manual).
186    //
187    // (*) Both Perl and PCRE allow non matching parentheses by means of the
188    // "?:" modifier within the pattern itself. e.g. (?:ab|cd) does not
189    // capture, while (ab|cd) does.
190    //
191    // For each modifier, there are two member functions whose name is made
192    // out of the modifier in lowercase, without the "PCRE_" prefix. For
193    // instance, PCRE_CASELESS is handled by
194    //    bool caseless(),
195    // which returns true if the modifier is set, and
196    //    RE_Options & set_caseless(bool),
197    // which sets or unsets the modifier.
198    //
199    // Moreover, PCRE_CONFIG_MATCH_LIMIT can be accessed through the
200    // set_match_limit() and match_limit() member functions.
201    // Setting match_limit to a non-zero value will limit the executation of
202    // pcre to keep it from doing bad things like blowing the stack or taking
203    // an eternity to return a result.  A value of 5000 is good enough to stop
204    // stack blowup in a 2MB thread stack.  Setting match_limit to zero will
205    // disable match limiting.
206    //
207    // Normally, to pass one or more modifiers to a RE class, you declare
208    // a RE_Options object, set the appropriate options, and pass this
209    // object to a RE constructor. Example:
210    //
211    //    RE_options opt;
212    //    opt.set_caseless(true);
213    //
214    //    if (RE("HELLO", opt).PartialMatch("hello world")) ...
215    //
216    // RE_options has two constructors. The default constructor takes no
217    // arguments and creates a set of flags that are off by default.
218    //
219    // The optional parameter 'option_flags' is to facilitate transfer
220    // of legacy code from C programs.  This lets you do
221    //    RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
222    //
223    // But new code is better off doing
224    //    RE(pattern,
225    //      RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str);
226    // (See below)
227    //
228    // If you are going to pass one of the most used modifiers, there are some
229    // convenience functions that return a RE_Options class with the
230    // appropriate modifier already set:
231    // CASELESS(), UTF8(), MULTILINE(), DOTALL(), EXTENDED()
232    //
233    // If you need to set several options at once, and you don't want to go
234    // through the pains of declaring a RE_Options object and setting several
235    // options, there is a parallel method that give you such ability on the
236    // fly. You can concatenate several set_xxxxx member functions, since each
237    // of them returns a reference to its class object.  e.g.: to pass
238    // PCRE_CASELESS, PCRE_EXTENDED, and PCRE_MULTILINE to a RE with one
239    // statement, you may write
240    //
241    //    RE(" ^ xyz \\s+ .* blah$", RE_Options()
242    //                            .set_caseless(true)
243    //                            .set_extended(true)
244    //                            .set_multiline(true)).PartialMatch(sometext);
245    //
246    // -----------------------------------------------------------------------
247  // SCANNING TEXT INCREMENTALLY  // SCANNING TEXT INCREMENTALLY
248  //  //
249  // The "Consume" operation may be useful if you want to repeatedly  // The "Consume" operation may be useful if you want to repeatedly
# Line 245  Line 330 
330    
331  namespace pcrecpp {  namespace pcrecpp {
332    
333    #define PCRE_SET_OR_CLEAR(b, o) \
334        if (b) all_options_ |= (o); else all_options_ &= ~(o); \
335        return *this
336    
337    #define PCRE_IS_SET(o)  \
338            (all_options_ & o) == o
339    
340  // We convert user-passed pointers into special Arg objects  // We convert user-passed pointers into special Arg objects
341  class Arg;  class Arg;
342  extern Arg no_arg;  extern Arg no_arg;
# Line 252  extern Arg no_arg; Line 344  extern Arg no_arg;
344  /***** Compiling regular expressions: the RE class *****/  /***** Compiling regular expressions: the RE class *****/
345    
346  // RE_Options allow you to set options to be passed along to pcre,  // RE_Options allow you to set options to be passed along to pcre,
347  // along with other options we put on top of pcre.  Only UTF and  // along with other options we put on top of pcre.
348  // match_limit are supported now.  Setting match_limit  // Only 9 modifiers, plus match_limit are supported now.
 // to a non-zero value will limit the executation of pcre to  
 // keep it from doing bad things like blowing the stack or taking  
 // an eternity to return a result.  A value of 5000 is good enough  
 // to stop stack blowup in a 2MB thread stack.  
 // Setting match_limit to zero will disable match limiting.  
349  class RE_Options {  class RE_Options {
350   public:   public:
351    // constructor    // constructor
352    RE_Options() : match_limit_(0), utf8_(false) {}    RE_Options() : match_limit_(0), all_options_(0) {}
353    
354      // alternative constructor.
355      // To facilitate transfer of legacy code from C programs
356      //
357      // This lets you do
358      //    RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
359      // But new code is better off doing
360      //    RE(pattern,
361      //      RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str);
362      RE_Options(int option_flags) : match_limit_(0), all_options_ (option_flags) {}
363    // we're fine with the default destructor, copy constructor, etc.    // we're fine with the default destructor, copy constructor, etc.
364    
365    // accessors and mutators    // accessors and mutators
366    int match_limit() const { return match_limit_; };    int match_limit() const { return match_limit_; };
367    void set_match_limit(int limit) {    RE_Options &set_match_limit(int limit) {
368      match_limit_ = limit;      match_limit_ = limit;
369        return *this;
370    }    }
371    
372    bool utf8() const { return utf8_; }    bool caseless() const {
373    void set_utf8(bool u) {      return PCRE_IS_SET(PCRE_CASELESS);
374      utf8_ = u;    }
375      RE_Options &set_caseless(bool x) {
376        PCRE_SET_OR_CLEAR(x, PCRE_CASELESS);
377      }
378    
379      bool multiline() const {
380        return PCRE_IS_SET(PCRE_MULTILINE);
381      }
382      RE_Options &set_multiline(bool x) {
383        PCRE_SET_OR_CLEAR(x, PCRE_MULTILINE);
384      }
385    
386      bool dotall() const {
387        return PCRE_IS_SET(PCRE_DOTALL);
388      }
389      RE_Options &set_dotall(bool x) {
390        PCRE_SET_OR_CLEAR(x,PCRE_DOTALL);
391      }
392    
393      bool extended() const {
394        return PCRE_IS_SET(PCRE_EXTENDED);
395      }
396      RE_Options &set_extended(bool x) {
397        PCRE_SET_OR_CLEAR(x,PCRE_EXTENDED);
398      }
399    
400      bool dollar_endonly() const {
401        return PCRE_IS_SET(PCRE_DOLLAR_ENDONLY);
402      }
403      RE_Options &set_dollar_endonly(bool x) {
404        PCRE_SET_OR_CLEAR(x,PCRE_DOLLAR_ENDONLY);
405      }
406    
407      bool extra() const {
408        return PCRE_IS_SET( PCRE_EXTRA);
409      }
410      RE_Options &set_extra(bool x) {
411        PCRE_SET_OR_CLEAR(x, PCRE_EXTRA);
412      }
413    
414      bool ungreedy() const {
415        return PCRE_IS_SET(PCRE_UNGREEDY);
416      }
417      RE_Options &set_ungreedy(bool x) {
418        PCRE_SET_OR_CLEAR(x, PCRE_UNGREEDY);
419      }
420    
421      bool utf8() const {
422        return PCRE_IS_SET(PCRE_UTF8);
423      }
424      RE_Options &set_utf8(bool x) {
425        PCRE_SET_OR_CLEAR(x, PCRE_UTF8);
426      }
427    
428      bool no_auto_capture() const {
429        return PCRE_IS_SET(PCRE_NO_AUTO_CAPTURE);
430      }
431      RE_Options &set_no_auto_capture(bool x) {
432        PCRE_SET_OR_CLEAR(x, PCRE_NO_AUTO_CAPTURE);
433      }
434    
435      RE_Options &set_all_options(int opt) {
436        all_options_ = opt;
437        return *this;
438      }
439      int all_options() const {
440        return all_options_ ;
441    }    }
442    
443    // TODO: add other pcre flags    // TODO: add other pcre flags
444    
445   private:   private:
446    int match_limit_;    int match_limit_;
447    bool utf8_;    int all_options_;
448  };  };
449    
450  // These functions return some common RE_Options  // These functions return some common RE_Options
451  static inline RE_Options UTF8() {  static inline RE_Options UTF8() {
452    RE_Options options;    return RE_Options().set_utf8(true);
453    options.set_utf8(true);  }
454    return options;  
455    static inline RE_Options CASELESS() {
456      return RE_Options().set_caseless(true);
457    }
458    static inline RE_Options MULTILINE() {
459      return RE_Options().set_multiline(true);
460  }  }
461    
462    static inline RE_Options DOTALL() {
463      return RE_Options().set_dotall(true);
464    }
465    
466    static inline RE_Options EXTENDED() {
467      return RE_Options().set_extended(true);
468    }
469    
470  // Interface for regular expression matching.  Also corresponds to a  // Interface for regular expression matching.  Also corresponds to a
471  // pre-compiled regular expression.  An "RE" object is safe for  // pre-compiled regular expression.  An "RE" object is safe for
# Line 600  MAKE_INTEGER_PARSER(long long, Line 776  MAKE_INTEGER_PARSER(long long,
776  MAKE_INTEGER_PARSER(unsigned long long, ulonglong);  MAKE_INTEGER_PARSER(unsigned long long, ulonglong);
777  #endif  #endif
778    
779    #undef PCRE_IS_SET
780    #undef PCRE_SET_OR_CLEAR
781  #undef MAKE_INTEGER_PARSER  #undef MAKE_INTEGER_PARSER
782    
783  }   // namespace pcrecpp  }   // namespace pcrecpp
784    
785    
786  #endif /* _PCRE_REGEXP_H */  #endif /* _PCRE_REGEXP_H */

Legend:
Removed from v.80  
changed lines
  Added in v.81

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12