/[pcre]/code/trunk/doc/html/pcreapi.html
ViewVC logotype

Diff of /code/trunk/doc/html/pcreapi.html

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 784 by ph10, Mon Dec 5 12:33:44 2011 UTC revision 959 by ph10, Sat Apr 14 16:16:58 2012 UTC
# Line 14  man page, in case the conversion went wr Line 14  man page, in case the conversion went wr
14  <br>  <br>
15  <ul>  <ul>
16  <li><a name="TOC1" href="#SEC1">PCRE NATIVE API BASIC FUNCTIONS</a>  <li><a name="TOC1" href="#SEC1">PCRE NATIVE API BASIC FUNCTIONS</a>
17  <li><a name="TOC2" href="#SEC2">PCRE NATIVE API AUXILIARY FUNCTIONS</a>  <li><a name="TOC2" href="#SEC2">PCRE NATIVE API STRING EXTRACTION FUNCTIONS</a>
18  <li><a name="TOC3" href="#SEC3">PCRE NATIVE API INDIRECTED FUNCTIONS</a>  <li><a name="TOC3" href="#SEC3">PCRE NATIVE API AUXILIARY FUNCTIONS</a>
19  <li><a name="TOC4" href="#SEC4">PCRE API OVERVIEW</a>  <li><a name="TOC4" href="#SEC4">PCRE NATIVE API INDIRECTED FUNCTIONS</a>
20  <li><a name="TOC5" href="#SEC5">NEWLINES</a>  <li><a name="TOC5" href="#SEC5">PCRE 8-BIT AND 16-BIT LIBRARIES</a>
21  <li><a name="TOC6" href="#SEC6">MULTITHREADING</a>  <li><a name="TOC6" href="#SEC6">PCRE API OVERVIEW</a>
22  <li><a name="TOC7" href="#SEC7">SAVING PRECOMPILED PATTERNS FOR LATER USE</a>  <li><a name="TOC7" href="#SEC7">NEWLINES</a>
23  <li><a name="TOC8" href="#SEC8">CHECKING BUILD-TIME OPTIONS</a>  <li><a name="TOC8" href="#SEC8">MULTITHREADING</a>
24  <li><a name="TOC9" href="#SEC9">COMPILING A PATTERN</a>  <li><a name="TOC9" href="#SEC9">SAVING PRECOMPILED PATTERNS FOR LATER USE</a>
25  <li><a name="TOC10" href="#SEC10">COMPILATION ERROR CODES</a>  <li><a name="TOC10" href="#SEC10">CHECKING BUILD-TIME OPTIONS</a>
26  <li><a name="TOC11" href="#SEC11">STUDYING A PATTERN</a>  <li><a name="TOC11" href="#SEC11">COMPILING A PATTERN</a>
27  <li><a name="TOC12" href="#SEC12">LOCALE SUPPORT</a>  <li><a name="TOC12" href="#SEC12">COMPILATION ERROR CODES</a>
28  <li><a name="TOC13" href="#SEC13">INFORMATION ABOUT A PATTERN</a>  <li><a name="TOC13" href="#SEC13">STUDYING A PATTERN</a>
29  <li><a name="TOC14" href="#SEC14">OBSOLETE INFO FUNCTION</a>  <li><a name="TOC14" href="#SEC14">LOCALE SUPPORT</a>
30  <li><a name="TOC15" href="#SEC15">REFERENCE COUNTS</a>  <li><a name="TOC15" href="#SEC15">INFORMATION ABOUT A PATTERN</a>
31  <li><a name="TOC16" href="#SEC16">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a>  <li><a name="TOC16" href="#SEC16">REFERENCE COUNTS</a>
32  <li><a name="TOC17" href="#SEC17">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a>  <li><a name="TOC17" href="#SEC17">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a>
33  <li><a name="TOC18" href="#SEC18">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a>  <li><a name="TOC18" href="#SEC18">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a>
34  <li><a name="TOC19" href="#SEC19">DUPLICATE SUBPATTERN NAMES</a>  <li><a name="TOC19" href="#SEC19">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a>
35  <li><a name="TOC20" href="#SEC20">FINDING ALL POSSIBLE MATCHES</a>  <li><a name="TOC20" href="#SEC20">DUPLICATE SUBPATTERN NAMES</a>
36  <li><a name="TOC21" href="#SEC21">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a>  <li><a name="TOC21" href="#SEC21">FINDING ALL POSSIBLE MATCHES</a>
37  <li><a name="TOC22" href="#SEC22">SEE ALSO</a>  <li><a name="TOC22" href="#SEC22">OBTAINING AN ESTIMATE OF STACK USAGE</a>
38  <li><a name="TOC23" href="#SEC23">AUTHOR</a>  <li><a name="TOC23" href="#SEC23">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a>
39  <li><a name="TOC24" href="#SEC24">REVISION</a>  <li><a name="TOC24" href="#SEC24">SEE ALSO</a>
40    <li><a name="TOC25" href="#SEC25">AUTHOR</a>
41    <li><a name="TOC26" href="#SEC26">REVISION</a>
42  </ul>  </ul>
 <br><a name="SEC1" href="#TOC1">PCRE NATIVE API BASIC FUNCTIONS</a><br>  
43  <P>  <P>
44  <b>#include &#60;pcre.h&#62;</b>  <b>#include &#60;pcre.h&#62;</b>
45  </P>  </P>
46    <br><a name="SEC1" href="#TOC1">PCRE NATIVE API BASIC FUNCTIONS</a><br>
47  <P>  <P>
48  <b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>  <b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
49  <b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>  <b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
# Line 65  man page, in case the conversion went wr Line 67  man page, in case the conversion went wr
67  <b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>  <b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
68  <b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>  <b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
69  </P>  </P>
 <br><a name="SEC2" href="#TOC1">PCRE NATIVE API AUXILIARY FUNCTIONS</a><br>  
 <P>  
 <b>pcre_jit_stack *pcre_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>  
 </P>  
 <P>  
 <b>void pcre_jit_stack_free(pcre_jit_stack *<i>stack</i>);</b>  
 </P>  
 <P>  
 <b>void pcre_assign_jit_stack(pcre_extra *<i>extra</i>,</b>  
 <b>pcre_jit_callback <i>callback</i>, void *<i>data</i>);</b>  
 </P>  
70  <P>  <P>
71  <b>int pcre_dfa_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>  <b>int pcre_dfa_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
72  <b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>  <b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
73  <b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>  <b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
74  <b>int *<i>workspace</i>, int <i>wscount</i>);</b>  <b>int *<i>workspace</i>, int <i>wscount</i>);</b>
75  </P>  </P>
76    <br><a name="SEC2" href="#TOC1">PCRE NATIVE API STRING EXTRACTION FUNCTIONS</a><br>
77  <P>  <P>
78  <b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b>  <b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b>
79  <b>const char *<i>subject</i>, int *<i>ovector</i>,</b>  <b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
# Line 122  man page, in case the conversion went wr Line 114  man page, in case the conversion went wr
114  <P>  <P>
115  <b>void pcre_free_substring_list(const char **<i>stringptr</i>);</b>  <b>void pcre_free_substring_list(const char **<i>stringptr</i>);</b>
116  </P>  </P>
117    <br><a name="SEC3" href="#TOC1">PCRE NATIVE API AUXILIARY FUNCTIONS</a><br>
118    <P>
119    <b>pcre_jit_stack *pcre_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>
120    </P>
121    <P>
122    <b>void pcre_jit_stack_free(pcre_jit_stack *<i>stack</i>);</b>
123    </P>
124    <P>
125    <b>void pcre_assign_jit_stack(pcre_extra *<i>extra</i>,</b>
126    <b>pcre_jit_callback <i>callback</i>, void *<i>data</i>);</b>
127    </P>
128  <P>  <P>
129  <b>const unsigned char *pcre_maketables(void);</b>  <b>const unsigned char *pcre_maketables(void);</b>
130  </P>  </P>
# Line 130  man page, in case the conversion went wr Line 133  man page, in case the conversion went wr
133  <b>int <i>what</i>, void *<i>where</i>);</b>  <b>int <i>what</i>, void *<i>where</i>);</b>
134  </P>  </P>
135  <P>  <P>
 <b>int pcre_info(const pcre *<i>code</i>, int *<i>optptr</i>, int</b>  
 <b>*<i>firstcharptr</i>);</b>  
 </P>  
 <P>  
136  <b>int pcre_refcount(pcre *<i>code</i>, int <i>adjust</i>);</b>  <b>int pcre_refcount(pcre *<i>code</i>, int <i>adjust</i>);</b>
137  </P>  </P>
138  <P>  <P>
139  <b>int pcre_config(int <i>what</i>, void *<i>where</i>);</b>  <b>int pcre_config(int <i>what</i>, void *<i>where</i>);</b>
140  </P>  </P>
141  <P>  <P>
142  <b>char *pcre_version(void);</b>  <b>const char *pcre_version(void);</b>
143    </P>
144    <P>
145    <b>int pcre_pattern_to_host_byte_order(pcre *<i>code</i>,</b>
146    <b>pcre_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
147  </P>  </P>
148  <br><a name="SEC3" href="#TOC1">PCRE NATIVE API INDIRECTED FUNCTIONS</a><br>  <br><a name="SEC4" href="#TOC1">PCRE NATIVE API INDIRECTED FUNCTIONS</a><br>
149  <P>  <P>
150  <b>void *(*pcre_malloc)(size_t);</b>  <b>void *(*pcre_malloc)(size_t);</b>
151  </P>  </P>
# Line 158  man page, in case the conversion went wr Line 161  man page, in case the conversion went wr
161  <P>  <P>
162  <b>int (*pcre_callout)(pcre_callout_block *);</b>  <b>int (*pcre_callout)(pcre_callout_block *);</b>
163  </P>  </P>
164  <br><a name="SEC4" href="#TOC1">PCRE API OVERVIEW</a><br>  <br><a name="SEC5" href="#TOC1">PCRE 8-BIT AND 16-BIT LIBRARIES</a><br>
165    <P>
166    From release 8.30, PCRE can be compiled as a library for handling 16-bit
167    character strings as well as, or instead of, the original library that handles
168    8-bit character strings. To avoid too much complication, this document
169    describes the 8-bit versions of the functions, with only occasional references
170    to the 16-bit library.
171    </P>
172    <P>
173    The 16-bit functions operate in the same way as their 8-bit counterparts; they
174    just use different data types for their arguments and results, and their names
175    start with <b>pcre16_</b> instead of <b>pcre_</b>. For every option that has UTF8
176    in its name (for example, PCRE_UTF8), there is a corresponding 16-bit name with
177    UTF8 replaced by UTF16. This facility is in fact just cosmetic; the 16-bit
178    option names define the same bit values.
179    </P>
180    <P>
181    References to bytes and UTF-8 in this document should be read as references to
182    16-bit data quantities and UTF-16 when using the 16-bit library, unless
183    specified otherwise. More details of the specific differences for the 16-bit
184    library are given in the
185    <a href="pcre16.html"><b>pcre16</b></a>
186    page.
187    </P>
188    <br><a name="SEC6" href="#TOC1">PCRE API OVERVIEW</a><br>
189  <P>  <P>
190  PCRE has its own native API, which is described in this document. There are  PCRE has its own native API, which is described in this document. There are
191  also some wrapper functions that correspond to the POSIX regular expression  also some wrapper functions (for the 8-bit library only) that correspond to the
192  API, but they do not give access to all the functionality. They are described  POSIX regular expression API, but they do not give access to all the
193  in the  functionality. They are described in the
194  <a href="pcreposix.html"><b>pcreposix</b></a>  <a href="pcreposix.html"><b>pcreposix</b></a>
195  documentation. Both of these APIs define a set of C function calls. A C++  documentation. Both of these APIs define a set of C function calls. A C++
196  wrapper is also distributed with PCRE. It is documented in the  wrapper (again for the 8-bit library only) is also distributed with PCRE. It is
197    documented in the
198  <a href="pcrecpp.html"><b>pcrecpp</b></a>  <a href="pcrecpp.html"><b>pcrecpp</b></a>
199  page.  page.
200  </P>  </P>
201  <P>  <P>
202  The native API C function prototypes are defined in the header file  The native API C function prototypes are defined in the header file
203  <b>pcre.h</b>, and on Unix systems the library itself is called <b>libpcre</b>.  <b>pcre.h</b>, and on Unix-like systems the (8-bit) library itself is called
204  It can normally be accessed by adding <b>-lpcre</b> to the command for linking  <b>libpcre</b>. It can normally be accessed by adding <b>-lpcre</b> to the
205  an application that uses PCRE. The header file defines the macros PCRE_MAJOR  command for linking an application that uses PCRE. The header file defines the
206  and PCRE_MINOR to contain the major and minor release numbers for the library.  macros PCRE_MAJOR and PCRE_MINOR to contain the major and minor release numbers
207  Applications can use these to include support for different releases of PCRE.  for the library. Applications can use these to include support for different
208    releases of PCRE.
209  </P>  </P>
210  <P>  <P>
211  In a Windows environment, if you want to statically link an application program  In a Windows environment, if you want to statically link an application program
# Line 244  internal tables that are generated when Line 273  internal tables that are generated when
273  </P>  </P>
274  <P>  <P>
275  The function <b>pcre_fullinfo()</b> is used to find out information about a  The function <b>pcre_fullinfo()</b> is used to find out information about a
276  compiled pattern; <b>pcre_info()</b> is an obsolete version that returns only  compiled pattern. The function <b>pcre_version()</b> returns a pointer to a
277  some of the available information, but is retained for backwards compatibility.  string containing the version of PCRE and its date of release.
 The function <b>pcre_version()</b> returns a pointer to a string containing the  
 version of PCRE and its date of release.  
278  </P>  </P>
279  <P>  <P>
280  The function <b>pcre_refcount()</b> maintains a reference count in a data block  The function <b>pcre_refcount()</b> maintains a reference count in a data block
# Line 284  points during a matching operation. Deta Line 311  points during a matching operation. Deta
311  <a href="pcrecallout.html"><b>pcrecallout</b></a>  <a href="pcrecallout.html"><b>pcrecallout</b></a>
312  documentation.  documentation.
313  <a name="newlines"></a></P>  <a name="newlines"></a></P>
314  <br><a name="SEC5" href="#TOC1">NEWLINES</a><br>  <br><a name="SEC7" href="#TOC1">NEWLINES</a><br>
315  <P>  <P>
316  PCRE supports five different conventions for indicating line breaks in  PCRE supports five different conventions for indicating line breaks in
317  strings: a single CR (carriage return) character, a single LF (linefeed)  strings: a single CR (carriage return) character, a single LF (linefeed)
# Line 323  The choice of newline convention does no Line 350  The choice of newline convention does no
350  the \n or \r escape sequences, nor does it affect what \R matches, which is  the \n or \r escape sequences, nor does it affect what \R matches, which is
351  controlled in a similar way, but by separate options.  controlled in a similar way, but by separate options.
352  </P>  </P>
353  <br><a name="SEC6" href="#TOC1">MULTITHREADING</a><br>  <br><a name="SEC8" href="#TOC1">MULTITHREADING</a><br>
354  <P>  <P>
355  The PCRE functions can be used in multi-threading applications, with the  The PCRE functions can be used in multi-threading applications, with the
356  proviso that the memory management functions pointed to by <b>pcre_malloc</b>,  proviso that the memory management functions pointed to by <b>pcre_malloc</b>,
# Line 340  memory stack areas for each thread. See Line 367  memory stack areas for each thread. See
367  <a href="pcrejit.html"><b>pcrejit</b></a>  <a href="pcrejit.html"><b>pcrejit</b></a>
368  documentation for more details.  documentation for more details.
369  </P>  </P>
370  <br><a name="SEC7" href="#TOC1">SAVING PRECOMPILED PATTERNS FOR LATER USE</a><br>  <br><a name="SEC9" href="#TOC1">SAVING PRECOMPILED PATTERNS FOR LATER USE</a><br>
371  <P>  <P>
372  The compiled form of a regular expression can be saved and re-used at a later  The compiled form of a regular expression can be saved and re-used at a later
373  time, possibly by a different program, and even on a host other than the one on  time, possibly by a different program, and even on a host other than the one on
374  which it was compiled. Details are given in the  which it was compiled. Details are given in the
375  <a href="pcreprecompile.html"><b>pcreprecompile</b></a>  <a href="pcreprecompile.html"><b>pcreprecompile</b></a>
376  documentation. However, compiling a regular expression with one version of PCRE  documentation, which includes a description of the
377  for use with a different version is not guaranteed to work and may cause  <b>pcre_pattern_to_host_byte_order()</b> function. However, compiling a regular
378  crashes.  expression with one version of PCRE for use with a different version is not
379    guaranteed to work and may cause crashes.
380  </P>  </P>
381  <br><a name="SEC8" href="#TOC1">CHECKING BUILD-TIME OPTIONS</a><br>  <br><a name="SEC10" href="#TOC1">CHECKING BUILD-TIME OPTIONS</a><br>
382  <P>  <P>
383  <b>int pcre_config(int <i>what</i>, void *<i>where</i>);</b>  <b>int pcre_config(int <i>what</i>, void *<i>where</i>);</b>
384  </P>  </P>
# Line 363  documentation has more details about the Line 391  documentation has more details about the
391  <P>  <P>
392  The first argument for <b>pcre_config()</b> is an integer, specifying which  The first argument for <b>pcre_config()</b> is an integer, specifying which
393  information is required; the second argument is a pointer to a variable into  information is required; the second argument is a pointer to a variable into
394  which the information is placed. The following information is available:  which the information is placed. The returned value is zero on success, or the
395    negative error code PCRE_ERROR_BADOPTION if the value in the first argument is
396    not recognized. The following information is available:
397  <pre>  <pre>
398    PCRE_CONFIG_UTF8    PCRE_CONFIG_UTF8
399  </pre>  </pre>
400  The output is an integer that is set to one if UTF-8 support is available;  The output is an integer that is set to one if UTF-8 support is available;
401  otherwise it is set to zero.  otherwise it is set to zero. If this option is given to the 16-bit version of
402    this function, <b>pcre16_config()</b>, the result is PCRE_ERROR_BADOPTION.
403    <pre>
404      PCRE_CONFIG_UTF16
405    </pre>
406    The output is an integer that is set to one if UTF-16 support is available;
407    otherwise it is set to zero. This value should normally be given to the 16-bit
408    version of this function, <b>pcre16_config()</b>. If it is given to the 8-bit
409    version of this function, the result is PCRE_ERROR_BADOPTION.
410  <pre>  <pre>
411    PCRE_CONFIG_UNICODE_PROPERTIES    PCRE_CONFIG_UNICODE_PROPERTIES
412  </pre>  </pre>
# Line 380  properties is available; otherwise it is Line 418  properties is available; otherwise it is
418  The output is an integer that is set to one if support for just-in-time  The output is an integer that is set to one if support for just-in-time
419  compiling is available; otherwise it is set to zero.  compiling is available; otherwise it is set to zero.
420  <pre>  <pre>
421      PCRE_CONFIG_JITTARGET
422    </pre>
423    The output is a pointer to a zero-terminated "const char *" string. If JIT
424    support is available, the string contains the name of the architecture for
425    which the JIT compiler is configured, for example "x86 32bit (little endian +
426    unaligned)". If JIT support is not available, the result is NULL.
427    <pre>
428    PCRE_CONFIG_NEWLINE    PCRE_CONFIG_NEWLINE
429  </pre>  </pre>
430  The output is an integer whose value specifies the default character sequence  The output is an integer whose value specifies the default character sequence
# Line 399  or CRLF. The default can be overridden w Line 444  or CRLF. The default can be overridden w
444    PCRE_CONFIG_LINK_SIZE    PCRE_CONFIG_LINK_SIZE
445  </pre>  </pre>
446  The output is an integer that contains the number of bytes used for internal  The output is an integer that contains the number of bytes used for internal
447  linkage in compiled regular expressions. The value is 2, 3, or 4. Larger values  linkage in compiled regular expressions. For the 8-bit library, the value can
448  allow larger regular expressions to be compiled, at the expense of slower  be 2, 3, or 4. For the 16-bit library, the value is either 2 or 4 and is still
449  matching. The default value of 2 is sufficient for all but the most massive  a number of bytes. The default value of 2 is sufficient for all but the most
450  patterns, since it allows the compiled pattern to be up to 64K in size.  massive patterns, since it allows the compiled pattern to be up to 64K in size.
451    Larger values allow larger regular expressions to be compiled, at the expense
452    of slower matching.
453  <pre>  <pre>
454    PCRE_CONFIG_POSIX_MALLOC_THRESHOLD    PCRE_CONFIG_POSIX_MALLOC_THRESHOLD
455  </pre>  </pre>
# Line 434  of recursive function calls. In this cas Line 481  of recursive function calls. In this cas
481  <b>pcre_stack_free</b> are called to manage memory blocks on the heap, thus  <b>pcre_stack_free</b> are called to manage memory blocks on the heap, thus
482  avoiding the use of the stack.  avoiding the use of the stack.
483  </P>  </P>
484  <br><a name="SEC9" href="#TOC1">COMPILING A PATTERN</a><br>  <br><a name="SEC11" href="#TOC1">COMPILING A PATTERN</a><br>
485  <P>  <P>
486  <b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>  <b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
487  <b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>  <b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
# Line 477  documentation). For those options that c Line 524  documentation). For those options that c
524  the pattern, the contents of the <i>options</i> argument specifies their  the pattern, the contents of the <i>options</i> argument specifies their
525  settings at the start of compilation and execution. The PCRE_ANCHORED,  settings at the start of compilation and execution. The PCRE_ANCHORED,
526  PCRE_BSR_<i>xxx</i>, PCRE_NEWLINE_<i>xxx</i>, PCRE_NO_UTF8_CHECK, and  PCRE_BSR_<i>xxx</i>, PCRE_NEWLINE_<i>xxx</i>, PCRE_NO_UTF8_CHECK, and
527  PCRE_NO_START_OPT options can be set at the time of matching as well as at  PCRE_NO_START_OPTIMIZE options can be set at the time of matching as well as at
528  compile time.  compile time.
529  </P>  </P>
530  <P>  <P>
# Line 489  not try to free it. Normally, the offset Line 536  not try to free it. Normally, the offset
536  byte that was being processed when the error was discovered is placed in the  byte that was being processed when the error was discovered is placed in the
537  variable pointed to by <i>erroffset</i>, which must not be NULL (if it is, an  variable pointed to by <i>erroffset</i>, which must not be NULL (if it is, an
538  immediate error is given). However, for an invalid UTF-8 string, the offset is  immediate error is given). However, for an invalid UTF-8 string, the offset is
539  that of the first byte of the failing character. Also, some errors are not  that of the first byte of the failing character.
 detected until checks are carried out when the whole pattern has been scanned;  
 in these cases the offset passed back is the length of the pattern.  
540  </P>  </P>
541  <P>  <P>
542  Note that the offset is in bytes, not characters, even in UTF-8 mode. It may  Some errors are not detected until the whole pattern has been scanned; in these
543  sometimes point into the middle of a UTF-8 character.  cases, the offset passed back is the length of the pattern. Note that the
544    offset is in bytes, not characters, even in UTF-8 mode. It may sometimes point
545    into the middle of a UTF-8 character.
546  </P>  </P>
547  <P>  <P>
548  If <b>pcre_compile2()</b> is used instead of <b>pcre_compile()</b>, and the  If <b>pcre_compile2()</b> is used instead of <b>pcre_compile()</b>, and the
# Line 651  pattern such as (\1)(a) succeeds when th Line 698  pattern such as (\1)(a) succeeds when th
698  an "a" in the subject), whereas it fails by default, for Perl compatibility.  an "a" in the subject), whereas it fails by default, for Perl compatibility.
699  </P>  </P>
700  <P>  <P>
701  (3) \U matches an upper case "U" character; by default \U causes a compile  (3) \U matches an upper case "U" character; by default \U causes a compile
702  time error (Perl uses \U to upper case subsequent characters).  time error (Perl uses \U to upper case subsequent characters).
703  </P>  </P>
704  <P>  <P>
705  (4) \u matches a lower case "u" character unless it is followed by four  (4) \u matches a lower case "u" character unless it is followed by four
706  hexadecimal digits, in which case the hexadecimal number defines the code point  hexadecimal digits, in which case the hexadecimal number defines the code point
707  to match. By default, \u causes a compile time error (Perl uses it to upper  to match. By default, \u causes a compile time error (Perl uses it to upper
708  case the following character).  case the following character).
709  </P>  </P>
710  <P>  <P>
711  (5) \x matches a lower case "x" character unless it is followed by two  (5) \x matches a lower case "x" character unless it is followed by two
712  hexadecimal digits, in which case the hexadecimal number defines the code point  hexadecimal digits, in which case the hexadecimal number defines the code point
713  to match. By default, as in Perl, a hexadecimal number is always expected after  to match. By default, as in Perl, a hexadecimal number is always expected after
714  \x, but it may have zero, one, or two digits (so, for example, \xz matches a  \x, but it may have zero, one, or two digits (so, for example, \xz matches a
715  binary zero character followed by z).  binary zero character followed by z).
716  <pre>  <pre>
717    PCRE_MULTILINE    PCRE_MULTILINE
# Line 699  preceding sequences should be recognized Line 746  preceding sequences should be recognized
746  that any Unicode newline sequence should be recognized. The Unicode newline  that any Unicode newline sequence should be recognized. The Unicode newline
747  sequences are the three just mentioned, plus the single characters VT (vertical  sequences are the three just mentioned, plus the single characters VT (vertical
748  tab, U+000B), FF (formfeed, U+000C), NEL (next line, U+0085), LS (line  tab, U+000B), FF (formfeed, U+000C), NEL (next line, U+0085), LS (line
749  separator, U+2028), and PS (paragraph separator, U+2029). The last two are  separator, U+2028), and PS (paragraph separator, U+2029). For the 8-bit
750  recognized only in UTF-8 mode.  library, the last two are recognized only in UTF-8 mode.
751  </P>  </P>
752  <P>  <P>
753  The newline setting in the options word uses three bits that are treated  The newline setting in the options word uses three bits that are treated
# Line 760  with Perl. It can also be set by a (?U) Line 807  with Perl. It can also be set by a (?U)
807    PCRE_UTF8    PCRE_UTF8
808  </pre>  </pre>
809  This option causes PCRE to regard both the pattern and the subject as strings  This option causes PCRE to regard both the pattern and the subject as strings
810  of UTF-8 characters instead of single-byte character strings. However, it is  of UTF-8 characters instead of single-byte strings. However, it is available
811  available only when PCRE is built to include UTF-8 support. If not, the use  only when PCRE is built to include UTF support. If not, the use of this option
812  of this option provokes an error. Details of how this option changes the  provokes an error. Details of how this option changes the behaviour of PCRE are
813  behaviour of PCRE are given in the  given in the
814  <a href="pcreunicode.html"><b>pcreunicode</b></a>  <a href="pcreunicode.html"><b>pcreunicode</b></a>
815  page.  page.
816  <pre>  <pre>
817    PCRE_NO_UTF8_CHECK    PCRE_NO_UTF8_CHECK
818  </pre>  </pre>
819  When PCRE_UTF8 is set, the validity of the pattern as a UTF-8 string is  When PCRE_UTF8 is set, the validity of the pattern as a UTF-8
820  automatically checked. There is a discussion about the  string is automatically checked. There is a discussion about the
821  <a href="pcre.html#utf8strings">validity of UTF-8 strings</a>  <a href="pcreunicode.html#utf8strings">validity of UTF-8 strings</a>
822  in the main  in the
823  <a href="pcre.html"><b>pcre</b></a>  <a href="pcreunicode.html"><b>pcreunicode</b></a>
824  page. If an invalid UTF-8 sequence of bytes is found, <b>pcre_compile()</b>  page. If an invalid UTF-8 sequence is found, <b>pcre_compile()</b> returns an
825  returns an error. If you already know that your pattern is valid, and you want  error. If you already know that your pattern is valid, and you want to skip
826  to skip this check for performance reasons, you can set the PCRE_NO_UTF8_CHECK  this check for performance reasons, you can set the PCRE_NO_UTF8_CHECK option.
827  option. When it is set, the effect of passing an invalid UTF-8 string as a  When it is set, the effect of passing an invalid UTF-8 string as a pattern is
828  pattern is undefined. It may cause your program to crash. Note that this option  undefined. It may cause your program to crash. Note that this option can also
829  can also be passed to <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b>, to suppress  be passed to <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b>, to suppress the
830  the UTF-8 validity checking of subject strings.  validity checking of subject strings.
831  </P>  </P>
832  <br><a name="SEC10" href="#TOC1">COMPILATION ERROR CODES</a><br>  <br><a name="SEC12" href="#TOC1">COMPILATION ERROR CODES</a><br>
833  <P>  <P>
834  The following table lists the error codes than may be returned by  The following table lists the error codes than may be returned by
835  <b>pcre_compile2()</b>, along with the error messages that may be returned by  <b>pcre_compile2()</b>, along with the error messages that may be returned by
836  both compiling functions. As PCRE has developed, some error codes have fallen  both compiling functions. Note that error messages are always 8-bit ASCII
837  out of use. To avoid confusion, they have not been re-used.  strings, even in 16-bit mode. As PCRE has developed, some error codes have
838    fallen out of use. To avoid confusion, they have not been re-used.
839  <pre>  <pre>
840     0  no error     0  no error
841     1  \ at end of pattern     1  \ at end of pattern
# Line 821  out of use. To avoid confusion, they hav Line 869  out of use. To avoid confusion, they hav
869    29  (?R or (?[+-]digits must be followed by )    29  (?R or (?[+-]digits must be followed by )
870    30  unknown POSIX class name    30  unknown POSIX class name
871    31  POSIX collating elements are not supported    31  POSIX collating elements are not supported
872    32  this version of PCRE is not compiled with PCRE_UTF8 support    32  this version of PCRE is compiled without UTF support
873    33  [this code is not in use]    33  [this code is not in use]
874    34  character value in \x{...} sequence is too large    34  character value in \x{...} sequence is too large
875    35  invalid condition (?(0)    35  invalid condition (?(0)
# Line 833  out of use. To avoid confusion, they hav Line 881  out of use. To avoid confusion, they hav
881    41  unrecognized character after (?P    41  unrecognized character after (?P
882    42  syntax error in subpattern name (missing terminator)    42  syntax error in subpattern name (missing terminator)
883    43  two named subpatterns have the same name    43  two named subpatterns have the same name
884    44  invalid UTF-8 string    44  invalid UTF-8 string (specifically UTF-8)
885    45  support for \P, \p, and \X has not been compiled    45  support for \P, \p, and \X has not been compiled
886    46  malformed \P or \p sequence    46  malformed \P or \p sequence
887    47  unknown property name after \P or \p    47  unknown property name after \P or \p
888    48  subpattern name is too long (maximum 32 characters)    48  subpattern name is too long (maximum 32 characters)
889    49  too many named subpatterns (maximum 10000)    49  too many named subpatterns (maximum 10000)
890    50  [this code is not in use]    50  [this code is not in use]
891    51  octal value is greater than \377 (not in UTF-8 mode)    51  octal value is greater than \377 in 8-bit non-UTF-8 mode
892    52  internal error: overran compiling workspace    52  internal error: overran compiling workspace
893    53  internal error: previously-checked referenced subpattern    53  internal error: previously-checked referenced subpattern
894          not found          not found
# Line 859  out of use. To avoid confusion, they hav Line 907  out of use. To avoid confusion, they hav
907    65  different names for subpatterns of the same number are    65  different names for subpatterns of the same number are
908          not allowed          not allowed
909    66  (*MARK) must have an argument    66  (*MARK) must have an argument
910    67  this version of PCRE is not compiled with PCRE_UCP support    67  this version of PCRE is not compiled with Unicode property
911            support
912    68  \c must be followed by an ASCII character    68  \c must be followed by an ASCII character
913    69  \k is not followed by a braced, angle-bracketed, or quoted name    69  \k is not followed by a braced, angle-bracketed, or quoted name
914      70  internal error: unknown opcode in find_fixedlength()
915      71  \N is not supported in a class
916      72  too many forward references
917      73  disallowed Unicode code point (&#62;= 0xd800 && &#60;= 0xdfff)
918      74  invalid UTF-16 string (specifically UTF-16)
919  </pre>  </pre>
920  The numbers 32 and 10000 in errors 48 and 49 are defaults; different values may  The numbers 32 and 10000 in errors 48 and 49 are defaults; different values may
921  be used if the limits were changed when PCRE was built.  be used if the limits were changed when PCRE was built.
922  <a name="studyingapattern"></a></P>  <a name="studyingapattern"></a></P>
923  <br><a name="SEC11" href="#TOC1">STUDYING A PATTERN</a><br>  <br><a name="SEC13" href="#TOC1">STUDYING A PATTERN</a><br>
924  <P>  <P>
925  <b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i></b>  <b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i></b>
926  <b>const char **<i>errptr</i>);</b>  <b>const char **<i>errptr</i>);</b>
# Line 895  wants to pass any of the other fields to Line 949  wants to pass any of the other fields to
949  <b>pcre_dfa_exec()</b>, it must set up its own <b>pcre_extra</b> block.  <b>pcre_dfa_exec()</b>, it must set up its own <b>pcre_extra</b> block.
950  </P>  </P>
951  <P>  <P>
952  The second argument of <b>pcre_study()</b> contains option bits. There is only  The second argument of <b>pcre_study()</b> contains option bits. There are three
953  one option: PCRE_STUDY_JIT_COMPILE. If this is set, and the just-in-time  options:
954  compiler is available, the pattern is further compiled into machine code that  <pre>
955  executes much faster than the <b>pcre_exec()</b> matching function. If    PCRE_STUDY_JIT_COMPILE
956  the just-in-time compiler is not available, this option is ignored. All other    PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
957  bits in the <i>options</i> argument must be zero.    PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
958    </pre>
959    If any of these are set, and the just-in-time compiler is available, the
960    pattern is further compiled into machine code that executes much faster than
961    the <b>pcre_exec()</b> interpretive matching function. If the just-in-time
962    compiler is not available, these options are ignored. All other bits in the
963    <i>options</i> argument must be zero.
964  </P>  </P>
965  <P>  <P>
966  JIT compilation is a heavyweight optimization. It can take some time for  JIT compilation is a heavyweight optimization. It can take some time for
# Line 925  When you are finished with a pattern, yo Line 985  When you are finished with a pattern, yo
985  study data by calling <b>pcre_free_study()</b>. This function was added to the  study data by calling <b>pcre_free_study()</b>. This function was added to the
986  API for release 8.20. For earlier versions, the memory could be freed with  API for release 8.20. For earlier versions, the memory could be freed with
987  <b>pcre_free()</b>, just like the pattern itself. This will still work in cases  <b>pcre_free()</b>, just like the pattern itself. This will still work in cases
988  where PCRE_STUDY_JIT_COMPILE is not used, but it is advisable to change to the  where JIT optimization is not used, but it is advisable to change to the new
989  new function when convenient.  function when convenient.
990  </P>  </P>
991  <P>  <P>
992  This is a typical way in which <b>pcre_study</b>() is used (except that in a  This is a typical way in which <b>pcre_study</b>() is used (except that in a
# Line 958  in a calling program via the pcre_ful Line 1018  in a calling program via the pcre_ful
1018  Studying a pattern is also useful for non-anchored patterns that do not have a  Studying a pattern is also useful for non-anchored patterns that do not have a
1019  single fixed starting character. A bitmap of possible starting bytes is  single fixed starting character. A bitmap of possible starting bytes is
1020  created. This speeds up finding a position in the subject at which to start  created. This speeds up finding a position in the subject at which to start
1021  matching.  matching. (In 16-bit mode, the bitmap is used for 16-bit values less than 256.)
1022  </P>  </P>
1023  <P>  <P>
1024  These two optimizations apply to both <b>pcre_exec()</b> and  These two optimizations apply to both <b>pcre_exec()</b> and
1025  <b>pcre_dfa_exec()</b>. However, they are not used by <b>pcre_exec()</b> if  <b>pcre_dfa_exec()</b>, and the information is also used by the JIT compiler.
1026  <b>pcre_study()</b> is called with the PCRE_STUDY_JIT_COMPILE option, and  The optimizations can be disabled by setting the PCRE_NO_START_OPTIMIZE option
1027  just-in-time compiling is successful. The optimizations can be disabled by  when calling <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>, but if this is done,
1028  setting the PCRE_NO_START_OPTIMIZE option when calling <b>pcre_exec()</b> or  JIT execution is also disabled. You might want to do this if your pattern
1029  <b>pcre_dfa_exec()</b>. You might want to do this if your pattern contains  contains callouts or (*MARK) and you want to make use of these facilities in
1030  callouts or (*MARK) (which cannot be handled by the JIT compiler), and you want  cases where matching fails. See the discussion of PCRE_NO_START_OPTIMIZE
 to make use of these facilities in cases where matching fails. See the  
 discussion of PCRE_NO_START_OPTIMIZE  
1031  <a href="#execoptions">below.</a>  <a href="#execoptions">below.</a>
1032  <a name="localesupport"></a></P>  <a name="localesupport"></a></P>
1033  <br><a name="SEC12" href="#TOC1">LOCALE SUPPORT</a><br>  <br><a name="SEC14" href="#TOC1">LOCALE SUPPORT</a><br>
1034  <P>  <P>
1035  PCRE handles caseless matching, and determines whether characters are letters,  PCRE handles caseless matching, and determines whether characters are letters,
1036  digits, or whatever, by reference to a set of tables, indexed by character  digits, or whatever, by reference to a set of tables, indexed by character
1037  value. When running in UTF-8 mode, this applies only to characters with codes  value. When running in UTF-8 mode, this applies only to characters
1038  less than 128. By default, higher-valued codes never match escapes such as \w  with codes less than 128. By default, higher-valued codes never match escapes
1039  or \d, but they can be tested with \p if PCRE is built with Unicode character  such as \w or \d, but they can be tested with \p if PCRE is built with
1040  property support. Alternatively, the PCRE_UCP option can be set at compile  Unicode character property support. Alternatively, the PCRE_UCP option can be
1041  time; this causes \w and friends to use Unicode property support instead of  set at compile time; this causes \w and friends to use Unicode property
1042  built-in tables. The use of locales with Unicode is discouraged. If you are  support instead of built-in tables. The use of locales with Unicode is
1043  handling characters with codes greater than 128, you should either use UTF-8  discouraged. If you are handling characters with codes greater than 128, you
1044  and Unicode, or use locales, but not try to mix the two.  should either use UTF-8 and Unicode, or use locales, but not try to mix the
1045    two.
1046  </P>  </P>
1047  <P>  <P>
1048  PCRE contains an internal set of tables that are used when the final argument  PCRE contains an internal set of tables that are used when the final argument
# Line 1033  this facility could be used to match a p Line 1092  this facility could be used to match a p
1092  one in which it was compiled. Passing table pointers at run time is discussed  one in which it was compiled. Passing table pointers at run time is discussed
1093  below in the section on matching a pattern.  below in the section on matching a pattern.
1094  <a name="infoaboutpattern"></a></P>  <a name="infoaboutpattern"></a></P>
1095  <br><a name="SEC13" href="#TOC1">INFORMATION ABOUT A PATTERN</a><br>  <br><a name="SEC15" href="#TOC1">INFORMATION ABOUT A PATTERN</a><br>
1096  <P>  <P>
1097  <b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>  <b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
1098  <b>int <i>what</i>, void *<i>where</i>);</b>  <b>int <i>what</i>, void *<i>where</i>);</b>
1099  </P>  </P>
1100  <P>  <P>
1101  The <b>pcre_fullinfo()</b> function returns information about a compiled  The <b>pcre_fullinfo()</b> function returns information about a compiled
1102  pattern. It replaces the obsolete <b>pcre_info()</b> function, which is  pattern. It replaces the <b>pcre_info()</b> function, which was removed from the
1103  nevertheless retained for backwards compability (and is documented below).  library at version 8.30, after more than 10 years of obsolescence.
1104  </P>  </P>
1105  <P>  <P>
1106  The first argument for <b>pcre_fullinfo()</b> is a pointer to the compiled  The first argument for <b>pcre_fullinfo()</b> is a pointer to the compiled
# Line 1051  information is required, and the fourth Line 1110  information is required, and the fourth
1110  to receive the data. The yield of the function is zero for success, or one of  to receive the data. The yield of the function is zero for success, or one of
1111  the following negative numbers:  the following negative numbers:
1112  <pre>  <pre>
1113    PCRE_ERROR_NULL       the argument <i>code</i> was NULL    PCRE_ERROR_NULL           the argument <i>code</i> was NULL
1114                          the argument <i>where</i> was NULL                              the argument <i>where</i> was NULL
1115    PCRE_ERROR_BADMAGIC   the "magic number" was not found    PCRE_ERROR_BADMAGIC       the "magic number" was not found
1116    PCRE_ERROR_BADOPTION  the value of <i>what</i> was invalid    PCRE_ERROR_BADENDIANNESS  the pattern was compiled with different
1117                                endianness
1118      PCRE_ERROR_BADOPTION      the value of <i>what</i> was invalid
1119  </pre>  </pre>
1120  The "magic number" is placed at the start of each compiled pattern as an simple  The "magic number" is placed at the start of each compiled pattern as an simple
1121  check against passing an arbitrary memory pointer. Here is a typical call of  check against passing an arbitrary memory pointer. The endianness error can
1122  <b>pcre_fullinfo()</b>, to obtain the length of the compiled pattern:  occur if a compiled pattern is saved and reloaded on a different host. Here is
1123    a typical call of <b>pcre_fullinfo()</b>, to obtain the length of the compiled
1124    pattern:
1125  <pre>  <pre>
1126    int rc;    int rc;
1127    size_t length;    size_t length;
# Line 1092  a NULL table pointer. Line 1155  a NULL table pointer.
1155  <pre>  <pre>
1156    PCRE_INFO_FIRSTBYTE    PCRE_INFO_FIRSTBYTE
1157  </pre>  </pre>
1158  Return information about the first byte of any matched string, for a  Return information about the first data unit of any matched string, for a
1159  non-anchored pattern. The fourth argument should point to an <b>int</b>  non-anchored pattern. (The name of this option refers to the 8-bit library,
1160  variable. (This option used to be called PCRE_INFO_FIRSTCHAR; the old name is  where data units are bytes.) The fourth argument should point to an <b>int</b>
1161  still recognized for backwards compatibility.)  variable.
1162    </P>
1163    <P>
1164    If there is a fixed first value, for example, the letter "c" from a pattern
1165    such as (cat|cow|coyote), its value is returned. In the 8-bit library, the
1166    value is always less than 256; in the 16-bit library the value can be up to
1167    0xffff.
1168  </P>  </P>
1169  <P>  <P>
1170  If there is a fixed first byte, for example, from a pattern such as  If there is no fixed first value, and if either
 (cat|cow|coyote), its value is returned. Otherwise, if either  
1171  <br>  <br>
1172  <br>  <br>
1173  (a) the pattern was compiled with the PCRE_MULTILINE option, and every branch  (a) the pattern was compiled with the PCRE_MULTILINE option, and every branch
# Line 1117  returned. For anchored patterns, -2 is r Line 1185  returned. For anchored patterns, -2 is r
1185    PCRE_INFO_FIRSTTABLE    PCRE_INFO_FIRSTTABLE
1186  </pre>  </pre>
1187  If the pattern was studied, and this resulted in the construction of a 256-bit  If the pattern was studied, and this resulted in the construction of a 256-bit
1188  table indicating a fixed set of bytes for the first byte in any matching  table indicating a fixed set of values for the first data unit in any matching
1189  string, a pointer to the table is returned. Otherwise NULL is returned. The  string, a pointer to the table is returned. Otherwise NULL is returned. The
1190  fourth argument should point to an <b>unsigned char *</b> variable.  fourth argument should point to an <b>unsigned char *</b> variable.
1191  <pre>  <pre>
# Line 1135  Return 1 if the (?J) or (?-J) option set Line 1203  Return 1 if the (?J) or (?-J) option set
1203  <pre>  <pre>
1204    PCRE_INFO_JIT    PCRE_INFO_JIT
1205  </pre>  </pre>
1206  Return 1 if the pattern was studied with the PCRE_STUDY_JIT_COMPILE option, and  Return 1 if the pattern was studied with one of the JIT options, and
1207  just-in-time compiling was successful. The fourth argument should point to an  just-in-time compiling was successful. The fourth argument should point to an
1208  <b>int</b> variable. A return value of 0 means that JIT support is not available  <b>int</b> variable. A return value of 0 means that JIT support is not available
1209  in this version of PCRE, or that the pattern was not studied with the  in this version of PCRE, or that the pattern was not studied with a JIT option,
1210  PCRE_STUDY_JIT_COMPILE option, or that the JIT compiler could not handle this  or that the JIT compiler could not handle this particular pattern. See the
 particular pattern. See the  
1211  <a href="pcrejit.html"><b>pcrejit</b></a>  <a href="pcrejit.html"><b>pcrejit</b></a>
1212  documentation for details of what can and cannot be handled.  documentation for details of what can and cannot be handled.
1213  <pre>  <pre>
1214    PCRE_INFO_JITSIZE    PCRE_INFO_JITSIZE
1215  </pre>  </pre>
1216  If the pattern was successfully studied with the PCRE_STUDY_JIT_COMPILE option,  If the pattern was successfully studied with a JIT option, return the size of
1217  return the size of the JIT compiled code, otherwise return zero. The fourth  the JIT compiled code, otherwise return zero. The fourth argument should point
1218  argument should point to a <b>size_t</b> variable.  to a <b>size_t</b> variable.
1219  <pre>  <pre>
1220    PCRE_INFO_LASTLITERAL    PCRE_INFO_LASTLITERAL
1221  </pre>  </pre>
1222  Return the value of the rightmost literal byte that must exist in any matched  Return the value of the rightmost literal data unit that must exist in any
1223  string, other than at its start, if such a byte has been recorded. The fourth  matched string, other than at its start, if such a value has been recorded. The
1224  argument should point to an <b>int</b> variable. If there is no such byte, -1 is  fourth argument should point to an <b>int</b> variable. If there is no such
1225  returned. For anchored patterns, a last literal byte is recorded only if it  value, -1 is returned. For anchored patterns, a last literal value is recorded
1226  follows something of variable length. For example, for the pattern  only if it follows something of variable length. For example, for the pattern
1227  /^a\d+z\d+/ the returned value is "z", but for /^a\dz\d/ the returned value  /^a\d+z\d+/ the returned value is "z", but for /^a\dz\d/ the returned value
1228  is -1.  is -1.
1229  <pre>  <pre>
1230      PCRE_INFO_MAXLOOKBEHIND
1231    </pre>
1232    Return the number of characters (NB not bytes) in the longest lookbehind
1233    assertion in the pattern. Note that the simple assertions \b and \B require a
1234    one-character lookbehind. This information is useful when doing multi-segment
1235    matching using the partial matching facilities.
1236    <pre>
1237    PCRE_INFO_MINLENGTH    PCRE_INFO_MINLENGTH
1238  </pre>  </pre>
1239  If the pattern was studied and a minimum length for matching subject strings  If the pattern was studied and a minimum length for matching subject strings
1240  was computed, its value is returned. Otherwise the returned value is -1. The  was computed, its value is returned. Otherwise the returned value is -1. The
1241  value is a number of characters, not bytes (this may be relevant in UTF-8  value is a number of characters, which in UTF-8 mode may be different from the
1242  mode). The fourth argument should point to an <b>int</b> variable. A  number of bytes. The fourth argument should point to an <b>int</b> variable. A
1243  non-negative value is a lower bound to the length of any matching string. There  non-negative value is a lower bound to the length of any matching string. There
1244  may not be any strings of that length that do actually match, but every string  may not be any strings of that length that do actually match, but every string
1245  that does match is at least that long.  that does match is at least that long.
# Line 1189  The map consists of a number of fixed-si Line 1263  The map consists of a number of fixed-si
1263  the number of entries, and PCRE_INFO_NAMEENTRYSIZE gives the size of each  the number of entries, and PCRE_INFO_NAMEENTRYSIZE gives the size of each
1264  entry; both of these return an <b>int</b> value. The entry size depends on the  entry; both of these return an <b>int</b> value. The entry size depends on the
1265  length of the longest name. PCRE_INFO_NAMETABLE returns a pointer to the first  length of the longest name. PCRE_INFO_NAMETABLE returns a pointer to the first
1266  entry of the table (a pointer to <b>char</b>). The first two bytes of each entry  entry of the table. This is a pointer to <b>char</b> in the 8-bit library, where
1267  are the number of the capturing parenthesis, most significant byte first. The  the first two bytes of each entry are the number of the capturing parenthesis,
1268  rest of the entry is the corresponding name, zero terminated.  most significant byte first. In the 16-bit library, the pointer points to
1269    16-bit data units, the first of which contains the parenthesis number. The rest
1270    of the entry is the corresponding name, zero terminated.
1271  </P>  </P>
1272  <P>  <P>
1273  The names are in alphabetical order. Duplicate names may appear if (?| is used  The names are in alphabetical order. Duplicate names may appear if (?| is used
# Line 1207  necessarily the case because later subpa Line 1283  necessarily the case because later subpa
1283  </P>  </P>
1284  <P>  <P>
1285  As a simple example of the name/number table, consider the following pattern  As a simple example of the name/number table, consider the following pattern
1286  (assume PCRE_EXTENDED is set, so white space - including newlines - is  after compilation by the 8-bit library (assume PCRE_EXTENDED is set, so white
1287  ignored):  space - including newlines - is ignored):
1288  <pre>  <pre>
1289    (?&#60;date&#62; (?&#60;year&#62;(\d\d)?\d\d) - (?&#60;month&#62;\d\d) - (?&#60;day&#62;\d\d) )    (?&#60;date&#62; (?&#60;year&#62;(\d\d)?\d\d) - (?&#60;month&#62;\d\d) - (?&#60;day&#62;\d\d) )
1290  </pre>  </pre>
# Line 1258  For such patterns, the PCRE_ANCHORED bit Line 1334  For such patterns, the PCRE_ANCHORED bit
1334  <pre>  <pre>
1335    PCRE_INFO_SIZE    PCRE_INFO_SIZE
1336  </pre>  </pre>
1337  Return the size of the compiled pattern. The fourth argument should point to a  Return the size of the compiled pattern in bytes (for both libraries). The
1338  <b>size_t</b> variable. This value does not include the size of the <b>pcre</b>  fourth argument should point to a <b>size_t</b> variable. This value does not
1339  structure that is returned by <b>pcre_compile()</b>. The value that is passed as  include the size of the <b>pcre</b> structure that is returned by
1340  the argument to <b>pcre_malloc()</b> when <b>pcre_compile()</b> is getting memory  <b>pcre_compile()</b>. The value that is passed as the argument to
1341  in which to place the compiled data is the value returned by this option plus  <b>pcre_malloc()</b> when <b>pcre_compile()</b> is getting memory in which to
1342  the size of the <b>pcre</b> structure. Studying a compiled pattern, with or  place the compiled data is the value returned by this option plus the size of
1343  without JIT, does not alter the value returned by this option.  the <b>pcre</b> structure. Studying a compiled pattern, with or without JIT,
1344    does not alter the value returned by this option.
1345  <pre>  <pre>
1346    PCRE_INFO_STUDYSIZE    PCRE_INFO_STUDYSIZE
1347  </pre>  </pre>
1348  Return the size of the data block pointed to by the <i>study_data</i> field in a  Return the size in bytes of the data block pointed to by the <i>study_data</i>
1349  <b>pcre_extra</b> block. If <b>pcre_extra</b> is NULL, or there is no study data,  field in a <b>pcre_extra</b> block. If <b>pcre_extra</b> is NULL, or there is no
1350  zero is returned. The fourth argument should point to a <b>size_t</b> variable.  study data, zero is returned. The fourth argument should point to a
1351  The <i>study_data</i> field is set by <b>pcre_study()</b> to record information  <b>size_t</b> variable. The <i>study_data</i> field is set by <b>pcre_study()</b>
1352  that will speed up matching (see the section entitled  to record information that will speed up matching (see the section entitled
1353  <a href="#studyingapattern">"Studying a pattern"</a>  <a href="#studyingapattern">"Studying a pattern"</a>
1354  above). The format of the <i>study_data</i> block is private, but its length  above). The format of the <i>study_data</i> block is private, but its length
1355  is made available via this option so that it can be saved and restored (see the  is made available via this option so that it can be saved and restored (see the
1356  <a href="pcreprecompile.html"><b>pcreprecompile</b></a>  <a href="pcreprecompile.html"><b>pcreprecompile</b></a>
1357  documentation for details).  documentation for details).
1358  </P>  </P>
1359  <br><a name="SEC14" href="#TOC1">OBSOLETE INFO FUNCTION</a><br>  <br><a name="SEC16" href="#TOC1">REFERENCE COUNTS</a><br>
 <P>  
 <b>int pcre_info(const pcre *<i>code</i>, int *<i>optptr</i>, int</b>  
 <b>*<i>firstcharptr</i>);</b>  
 </P>  
 <P>  
 The <b>pcre_info()</b> function is now obsolete because its interface is too  
 restrictive to return all the available data about a compiled pattern. New  
 programs should use <b>pcre_fullinfo()</b> instead. The yield of  
 <b>pcre_info()</b> is the number of capturing subpatterns, or one of the  
 following negative numbers:  
 <pre>  
   PCRE_ERROR_NULL       the argument <i>code</i> was NULL  
   PCRE_ERROR_BADMAGIC   the "magic number" was not found  
 </pre>  
 If the <i>optptr</i> argument is not NULL, a copy of the options with which the  
 pattern was compiled is placed in the integer it points to (see  
 PCRE_INFO_OPTIONS above).  
 </P>  
 <P>  
 If the pattern is not anchored and the <i>firstcharptr</i> argument is not NULL,  
 it is used to pass back information about the first character of any matched  
 string (see PCRE_INFO_FIRSTBYTE above).  
 </P>  
 <br><a name="SEC15" href="#TOC1">REFERENCE COUNTS</a><br>  
1360  <P>  <P>
1361  <b>int pcre_refcount(pcre *<i>code</i>, int <i>adjust</i>);</b>  <b>int pcre_refcount(pcre *<i>code</i>, int <i>adjust</i>);</b>
1362  </P>  </P>
# Line 1327  Except when it is zero, the reference co Line 1380  Except when it is zero, the reference co
1380  pattern is compiled on one host and then transferred to a host whose byte-order  pattern is compiled on one host and then transferred to a host whose byte-order
1381  is different. (This seems a highly unlikely scenario.)  is different. (This seems a highly unlikely scenario.)
1382  </P>  </P>
1383  <br><a name="SEC16" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>  <br><a name="SEC17" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
1384  <P>  <P>
1385  <b>int pcre_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>  <b>int pcre_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
1386  <b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>  <b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
# Line 1392  fields (not necessarily in this order): Line 1445  fields (not necessarily in this order):
1445    const unsigned char *<i>tables</i>;    const unsigned char *<i>tables</i>;
1446    unsigned char **<i>mark</i>;    unsigned char **<i>mark</i>;
1447  </pre>  </pre>
1448  The <i>flags</i> field is a bitmap that specifies which of the other fields  In the 16-bit version of this structure, the <i>mark</i> field has type
1449  are set. The flag bits are:  "PCRE_UCHAR16 **".
1450    </P>
1451    <P>
1452    The <i>flags</i> field is used to specify which of the other fields are set. The
1453    flag bits are:
1454  <pre>  <pre>
1455    PCRE_EXTRA_STUDY_DATA    PCRE_EXTRA_CALLOUT_DATA
1456    PCRE_EXTRA_EXECUTABLE_JIT    PCRE_EXTRA_EXECUTABLE_JIT
1457      PCRE_EXTRA_MARK
1458    PCRE_EXTRA_MATCH_LIMIT    PCRE_EXTRA_MATCH_LIMIT
1459    PCRE_EXTRA_MATCH_LIMIT_RECURSION    PCRE_EXTRA_MATCH_LIMIT_RECURSION
1460    PCRE_EXTRA_CALLOUT_DATA    PCRE_EXTRA_STUDY_DATA
1461    PCRE_EXTRA_TABLES    PCRE_EXTRA_TABLES
   PCRE_EXTRA_MARK  
1462  </pre>  </pre>
1463  Other flag bits should be set to zero. The <i>study_data</i> field and sometimes  Other flag bits should be set to zero. The <i>study_data</i> field and sometimes
1464  the <i>executable_jit</i> field are set in the <b>pcre_extra</b> block that is  the <i>executable_jit</i> field are set in the <b>pcre_extra</b> block that is
1465  returned by <b>pcre_study()</b>, together with the appropriate flag bits. You  returned by <b>pcre_study()</b>, together with the appropriate flag bits. You
1466  should not set these yourself, but you may add to the block by setting the  should not set these yourself, but you may add to the block by setting other
1467  other fields and their corresponding flag bits.  fields and their corresponding flag bits.
1468  </P>  </P>
1469  <P>  <P>
1470  The <i>match_limit</i> field provides a means of preventing PCRE from using up a  The <i>match_limit</i> field provides a means of preventing PCRE from using up a
# Line 1425  in the subject string. Line 1482  in the subject string.
1482  </P>  </P>
1483  <P>  <P>
1484  When <b>pcre_exec()</b> is called with a pattern that was successfully studied  When <b>pcre_exec()</b> is called with a pattern that was successfully studied
1485  with the PCRE_STUDY_JIT_COMPILE option, the way that the matching is executed  with a JIT option, the way that the matching is executed is entirely different.
1486  is entirely different. However, there is still the possibility of runaway  However, there is still the possibility of runaway matching that goes on for a
1487  matching that goes on for a very long time, and so the <i>match_limit</i> value  very long time, and so the <i>match_limit</i> value is also used in this case
1488  is also used in this case (but in a different way) to limit how long the  (but in a different way) to limit how long the matching can continue.
 matching can continue.  
1489  </P>  </P>
1490  <P>  <P>
1491  The default value for the limit can be set when PCRE is built; the default  The default value for the limit can be set when PCRE is built; the default
# Line 1450  This limit is of use only if it is set s Line 1506  This limit is of use only if it is set s
1506  Limiting the recursion depth limits the amount of machine stack that can be  Limiting the recursion depth limits the amount of machine stack that can be
1507  used, or, when PCRE has been compiled to use memory on the heap instead of the  used, or, when PCRE has been compiled to use memory on the heap instead of the
1508  stack, the amount of heap memory that can be used. This limit is not relevant,  stack, the amount of heap memory that can be used. This limit is not relevant,
1509  and is ignored, if the pattern was successfully studied with  and is ignored, when matching is done using JIT compiled code.
 PCRE_STUDY_JIT_COMPILE.  
1510  </P>  </P>
1511  <P>  <P>
1512  The default value for <i>match_limit_recursion</i> can be set when PCRE is  The default value for <i>match_limit_recursion</i> can be set when PCRE is
# Line 1482  documentation for a discussion of saving Line 1537  documentation for a discussion of saving
1537  </P>  </P>
1538  <P>  <P>
1539  If PCRE_EXTRA_MARK is set in the <i>flags</i> field, the <i>mark</i> field must  If PCRE_EXTRA_MARK is set in the <i>flags</i> field, the <i>mark</i> field must
1540  be set to point to a <b>char *</b> variable. If the pattern contains any  be set to point to a suitable variable. If the pattern contains any
1541  backtracking control verbs such as (*MARK:NAME), and the execution ends up with  backtracking control verbs such as (*MARK:NAME), and the execution ends up with
1542  a name to pass back, a pointer to the name string (zero terminated) is placed  a name to pass back, a pointer to the name string (zero terminated) is placed
1543  in the variable pointed to by the <i>mark</i> field. The names are within the  in the variable pointed to by the <i>mark</i> field. The names are within the
1544  compiled pattern; if you wish to retain such a name you must copy it before  compiled pattern; if you wish to retain such a name you must copy it before
1545  freeing the memory of a compiled pattern. If there is no name to pass back, the  freeing the memory of a compiled pattern. If there is no name to pass back, the
1546  variable pointed to by the <i>mark</i> field set to NULL. For details of the  variable pointed to by the <i>mark</i> field is set to NULL. For details of the
1547  backtracking control verbs, see the section entitled  backtracking control verbs, see the section entitled
1548  <a href="pcrepattern#backtrackcontrol">"Backtracking control"</a>  <a href="pcrepattern#backtrackcontrol">"Backtracking control"</a>
1549  in the  in the
# Line 1502  Option bits for pcre_exec() Line 1557  Option bits for pcre_exec()
1557  The unused bits of the <i>options</i> argument for <b>pcre_exec()</b> must be  The unused bits of the <i>options</i> argument for <b>pcre_exec()</b> must be
1558  zero. The only bits that may be set are PCRE_ANCHORED, PCRE_NEWLINE_<i>xxx</i>,  zero. The only bits that may be set are PCRE_ANCHORED, PCRE_NEWLINE_<i>xxx</i>,
1559  PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART,  PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART,
1560  PCRE_NO_START_OPTIMIZE, PCRE_NO_UTF8_CHECK, PCRE_PARTIAL_SOFT, and  PCRE_NO_START_OPTIMIZE, PCRE_NO_UTF8_CHECK, PCRE_PARTIAL_HARD, and
1561  PCRE_PARTIAL_HARD.  PCRE_PARTIAL_SOFT.
1562  </P>  </P>
1563  <P>  <P>
1564  If the pattern was successfully studied with the PCRE_STUDY_JIT_COMPILE option,  If the pattern was successfully studied with one of the just-in-time (JIT)
1565  the only supported options for JIT execution are PCRE_NO_UTF8_CHECK,  compile options, the only supported options for JIT execution are
1566  PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, and PCRE_NOTEMPTY_ATSTART. Note in  PCRE_NO_UTF8_CHECK, PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY,
1567  particular that partial matching is not supported. If an unsupported option is  PCRE_NOTEMPTY_ATSTART, PCRE_PARTIAL_HARD, and PCRE_PARTIAL_SOFT. If an
1568  used, JIT execution is disabled and the normal interpretive code in  unsupported option is used, JIT execution is disabled and the normal
1569  <b>pcre_exec()</b> is run.  interpretive code in <b>pcre_exec()</b> is run.
1570  <pre>  <pre>
1571    PCRE_ANCHORED    PCRE_ANCHORED
1572  </pre>  </pre>
# Line 1634  causing performance to suffer, but ensur Line 1689  causing performance to suffer, but ensur
1689  "no match", the callouts do occur, and that items such as (*COMMIT) and (*MARK)  "no match", the callouts do occur, and that items such as (*COMMIT) and (*MARK)
1690  are considered at every possible starting position in the subject string. If  are considered at every possible starting position in the subject string. If
1691  PCRE_NO_START_OPTIMIZE is set at compile time, it cannot be unset at matching  PCRE_NO_START_OPTIMIZE is set at compile time, it cannot be unset at matching
1692  time.  time. The use of PCRE_NO_START_OPTIMIZE disables JIT execution; when it is set,
1693    matching is always done using interpretively.
1694  </P>  </P>
1695  <P>  <P>
1696  Setting PCRE_NO_START_OPTIMIZE can change the outcome of a matching operation.  Setting PCRE_NO_START_OPTIMIZE can change the outcome of a matching operation.
# Line 1668  returned. Line 1724  returned.
1724  </pre>  </pre>
1725  When PCRE_UTF8 is set at compile time, the validity of the subject as a UTF-8  When PCRE_UTF8 is set at compile time, the validity of the subject as a UTF-8
1726  string is automatically checked when <b>pcre_exec()</b> is subsequently called.  string is automatically checked when <b>pcre_exec()</b> is subsequently called.
1727  The value of <i>startoffset</i> is also checked to ensure that it points to the  The entire string is checked before any other processing takes place. The value
1728  start of a UTF-8 character. There is a discussion about the validity of UTF-8  of <i>startoffset</i> is also checked to ensure that it points to the start of a
1729  strings in the  UTF-8 character. There is a discussion about the
1730  <a href="pcre.html#utf8strings">section on UTF-8 support</a>  <a href="pcreunicode.html#utf8strings">validity of UTF-8 strings</a>
1731  in the main  in the
1732  <a href="pcre.html"><b>pcre</b></a>  <a href="pcreunicode.html"><b>pcreunicode</b></a>
1733  page. If an invalid UTF-8 sequence of bytes is found, <b>pcre_exec()</b> returns  page. If an invalid sequence of bytes is found, <b>pcre_exec()</b> returns the
1734  the error PCRE_ERROR_BADUTF8 or, if PCRE_PARTIAL_HARD is set and the problem is  error PCRE_ERROR_BADUTF8 or, if PCRE_PARTIAL_HARD is set and the problem is a
1735  a truncated UTF-8 character at the end of the subject, PCRE_ERROR_SHORTUTF8. In  truncated character at the end of the subject, PCRE_ERROR_SHORTUTF8. In both
1736  both cases, information about the precise nature of the error may also be  cases, information about the precise nature of the error may also be returned
1737  returned (see the descriptions of these errors in the section entitled \fIError  (see the descriptions of these errors in the section entitled \fIError return
1738  return values from\fP <b>pcre_exec()</b>  values from\fP <b>pcre_exec()</b>
1739  <a href="#errorlist">below).</a>  <a href="#errorlist">below).</a>
1740  If <i>startoffset</i> contains a value that does not point to the start of a  If <i>startoffset</i> contains a value that does not point to the start of a
1741  UTF-8 character (or to the end of the subject), PCRE_ERROR_BADUTF8_OFFSET is  UTF-8 character (or to the end of the subject), PCRE_ERROR_BADUTF8_OFFSET is
# Line 1691  checks for performance reasons, you can Line 1747  checks for performance reasons, you can
1747  calling <b>pcre_exec()</b>. You might want to do this for the second and  calling <b>pcre_exec()</b>. You might want to do this for the second and
1748  subsequent calls to <b>pcre_exec()</b> if you are making repeated calls to find  subsequent calls to <b>pcre_exec()</b> if you are making repeated calls to find
1749  all the matches in a single subject string. However, you should be sure that  all the matches in a single subject string. However, you should be sure that
1750  the value of <i>startoffset</i> points to the start of a UTF-8 character (or the  the value of <i>startoffset</i> points to the start of a character (or the end
1751  end of the subject). When PCRE_NO_UTF8_CHECK is set, the effect of passing an  of the subject). When PCRE_NO_UTF8_CHECK is set, the effect of passing an
1752  invalid UTF-8 string as a subject or an invalid value of <i>startoffset</i> is  invalid string as a subject or an invalid value of <i>startoffset</i> is
1753  undefined. Your program may crash.  undefined. Your program may crash.
1754  <pre>  <pre>
1755    PCRE_PARTIAL_HARD    PCRE_PARTIAL_HARD
# Line 1728  The string to be matched by pcre_exec Line 1784  The string to be matched by pcre_exec
1784  </b><br>  </b><br>
1785  <P>  <P>
1786  The subject string is passed to <b>pcre_exec()</b> as a pointer in  The subject string is passed to <b>pcre_exec()</b> as a pointer in
1787  <i>subject</i>, a length (in bytes) in <i>length</i>, and a starting byte offset  <i>subject</i>, a length in bytes in <i>length</i>, and a starting byte offset
1788  in <i>startoffset</i>. If this is negative or greater than the length of the  in <i>startoffset</i>. If this is negative or greater than the length of the
1789  subject, <b>pcre_exec()</b> returns PCRE_ERROR_BADOFFSET. When the starting  subject, <b>pcre_exec()</b> returns PCRE_ERROR_BADOFFSET. When the starting
1790  offset is zero, the search for a match starts at the beginning of the subject,  offset is zero, the search for a match starts at the beginning of the subject,
# Line 1823  string that it matched that is returned. Line 1879  string that it matched that is returned.
1879  <P>  <P>
1880  If the vector is too small to hold all the captured substring offsets, it is  If the vector is too small to hold all the captured substring offsets, it is
1881  used as far as possible (up to two-thirds of its length), and the function  used as far as possible (up to two-thirds of its length), and the function
1882  returns a value of zero. If neither the actual string matched not any captured  returns a value of zero. If neither the actual string matched nor any captured
1883  substrings are of interest, <b>pcre_exec()</b> may be called with <i>ovector</i>  substrings are of interest, <b>pcre_exec()</b> may be called with <i>ovector</i>
1884  passed as NULL and <i>ovecsize</i> as zero. However, if the pattern contains  passed as NULL and <i>ovecsize</i> as zero. However, if the pattern contains
1885  back references and the <i>ovector</i> is not big enough to remember the related  back references and the <i>ovector</i> is not big enough to remember the related
# Line 2022  time. Line 2078  time.
2078  <pre>  <pre>
2079    PCRE_ERROR_JIT_STACKLIMIT (-27)    PCRE_ERROR_JIT_STACKLIMIT (-27)
2080  </pre>  </pre>
2081  This error is returned when a pattern that was successfully studied using the  This error is returned when a pattern that was successfully studied using a
2082  PCRE_STUDY_JIT_COMPILE option is being matched, but the memory available for  JIT compile option is being matched, but the memory available for the
2083  the just-in-time processing stack is not large enough. See the  just-in-time processing stack is not large enough. See the
2084  <a href="pcrejit.html"><b>pcrejit</b></a>  <a href="pcrejit.html"><b>pcrejit</b></a>
2085  documentation for more details.  documentation for more details.
2086    <pre>
2087      PCRE_ERROR_BADMODE (-28)
2088    </pre>
2089    This error is given if a pattern that was compiled by the 8-bit library is
2090    passed to a 16-bit library function, or vice versa.
2091    <pre>
2092      PCRE_ERROR_BADENDIANNESS (-29)
2093    </pre>
2094    This error is given if a pattern that was compiled and saved is reloaded on a
2095    host with different endianness. The utility function
2096    <b>pcre_pattern_to_host_byte_order()</b> can be used to convert such a pattern
2097    so that it runs on the new host.
2098  </P>  </P>
2099  <P>  <P>
2100  Error numbers -16 to -20 and -22 are not used by <b>pcre_exec()</b>.  Error numbers -16 to -20 and -22 are not used by <b>pcre_exec()</b>.
# Line 2035  Error numbers -16 to -20 and -22 are not Line 2103  Error numbers -16 to -20 and -22 are not
2103  Reason codes for invalid UTF-8 strings  Reason codes for invalid UTF-8 strings
2104  </b><br>  </b><br>
2105  <P>  <P>
2106    This section applies only to the 8-bit library. The corresponding information
2107    for the 16-bit library is given in the
2108    <a href="pcre16.html"><b>pcre16</b></a>
2109    page.
2110    </P>
2111    <P>
2112  When <b>pcre_exec()</b> returns either PCRE_ERROR_BADUTF8 or  When <b>pcre_exec()</b> returns either PCRE_ERROR_BADUTF8 or
2113  PCRE_ERROR_SHORTUTF8, and the size of the output vector (<i>ovecsize</i>) is at  PCRE_ERROR_SHORTUTF8, and the size of the output vector (<i>ovecsize</i>) is at
2114  least 2, the offset of the start of the invalid UTF-8 character is placed in  least 2, the offset of the start of the invalid UTF-8 character is placed in
# Line 2104  character. Line 2178  character.
2178  The first byte of a character has the value 0xfe or 0xff. These values can  The first byte of a character has the value 0xfe or 0xff. These values can
2179  never occur in a valid UTF-8 string.  never occur in a valid UTF-8 string.
2180  </P>  </P>
2181  <br><a name="SEC17" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>  <br><a name="SEC18" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
2182  <P>  <P>
2183  <b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>  <b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
2184  <b>int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>  <b>int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
# Line 2199  linked via a special interface to anothe Line 2273  linked via a special interface to anothe
2273  <b>pcre_free</b> directly; it is for these cases that the functions are  <b>pcre_free</b> directly; it is for these cases that the functions are
2274  provided.  provided.
2275  </P>  </P>
2276  <br><a name="SEC18" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>  <br><a name="SEC19" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
2277  <P>  <P>
2278  <b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>  <b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
2279  <b>const char *<i>name</i>);</b>  <b>const char *<i>name</i>);</b>
# Line 2263  names are not included in the compiled c Line 2337  names are not included in the compiled c
2337  numbers. For this reason, the use of different names for subpatterns of the  numbers. For this reason, the use of different names for subpatterns of the
2338  same number causes an error at compile time.  same number causes an error at compile time.
2339  </P>  </P>
2340  <br><a name="SEC19" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>  <br><a name="SEC20" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
2341  <P>  <P>
2342  <b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>  <b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
2343  <b>const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>  <b>const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
# Line 2301  described above in the section entitled Line 2375  described above in the section entitled
2375  Given all the relevant entries for the name, you can extract each of their  Given all the relevant entries for the name, you can extract each of their
2376  numbers, and hence the captured data, if any.  numbers, and hence the captured data, if any.
2377  </P>  </P>
2378  <br><a name="SEC20" href="#TOC1">FINDING ALL POSSIBLE MATCHES</a><br>  <br><a name="SEC21" href="#TOC1">FINDING ALL POSSIBLE MATCHES</a><br>
2379  <P>  <P>
2380  The traditional matching function uses a similar algorithm to Perl, which stops  The traditional matching function uses a similar algorithm to Perl, which stops
2381  when it finds the first match, starting at a given point in the subject. If you  when it finds the first match, starting at a given point in the subject. If you
# Line 2319  When your callout function is called, ex Line 2393  When your callout function is called, ex
2393  substring. Then return 1, which forces <b>pcre_exec()</b> to backtrack and try  substring. Then return 1, which forces <b>pcre_exec()</b> to backtrack and try
2394  other alternatives. Ultimately, when it runs out of matches, <b>pcre_exec()</b>  other alternatives. Ultimately, when it runs out of matches, <b>pcre_exec()</b>
2395  will yield PCRE_ERROR_NOMATCH.  will yield PCRE_ERROR_NOMATCH.
2396    </P>
2397    <br><a name="SEC22" href="#TOC1">OBTAINING AN ESTIMATE OF STACK USAGE</a><br>
2398    <P>
2399    Matching certain patterns using <b>pcre_exec()</b> can use a lot of process
2400    stack, which in certain environments can be rather limited in size. Some users
2401    find it helpful to have an estimate of the amount of stack that is used by
2402    <b>pcre_exec()</b>, to help them set recursion limits, as described in the
2403    <a href="pcrestack.html"><b>pcrestack</b></a>
2404    documentation. The estimate that is output by <b>pcretest</b> when called with
2405    the <b>-m</b> and <b>-C</b> options is obtained by calling <b>pcre_exec</b> with
2406    the values NULL, NULL, NULL, -999, and -999 for its first five arguments.
2407    </P>
2408    <P>
2409    Normally, if its first argument is NULL, <b>pcre_exec()</b> immediately returns
2410    the negative error code PCRE_ERROR_NULL, but with this special combination of
2411    arguments, it returns instead a negative number whose absolute value is the
2412    approximate stack frame size in bytes. (A negative number is used so that it is
2413    clear that no match has happened.) The value is approximate because in some
2414    cases, recursive calls to <b>pcre_exec()</b> occur when there are one or two
2415    additional variables on the stack.
2416    </P>
2417    <P>
2418    If PCRE has been compiled to use the heap instead of the stack for recursion,
2419    the value returned is the size of each block that is obtained from the heap.
2420  <a name="dfamatch"></a></P>  <a name="dfamatch"></a></P>
2421  <br><a name="SEC21" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>  <br><a name="SEC23" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
2422  <P>  <P>
2423  <b>int pcre_dfa_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>  <b>int pcre_dfa_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
2424  <b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>  <b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
# Line 2495  recursively, using private vectors for < Line 2593  recursively, using private vectors for <
2593  error is given if the output vector is not large enough. This should be  error is given if the output vector is not large enough. This should be
2594  extremely rare, as a vector of size 1000 is used.  extremely rare, as a vector of size 1000 is used.
2595  </P>  </P>
2596  <br><a name="SEC22" href="#TOC1">SEE ALSO</a><br>  <br><a name="SEC24" href="#TOC1">SEE ALSO</a><br>
2597  <P>  <P>
2598  <b>pcrebuild</b>(3), <b>pcrecallout</b>(3), <b>pcrecpp(3)</b>(3),  <b>pcre16</b>(3), <b>pcrebuild</b>(3), <b>pcrecallout</b>(3), <b>pcrecpp(3)</b>(3),
2599  <b>pcrematching</b>(3), <b>pcrepartial</b>(3), <b>pcreposix</b>(3),  <b>pcrematching</b>(3), <b>pcrepartial</b>(3), <b>pcreposix</b>(3),
2600  <b>pcreprecompile</b>(3), <b>pcresample</b>(3), <b>pcrestack</b>(3).  <b>pcreprecompile</b>(3), <b>pcresample</b>(3), <b>pcrestack</b>(3).
2601  </P>  </P>
2602  <br><a name="SEC23" href="#TOC1">AUTHOR</a><br>  <br><a name="SEC25" href="#TOC1">AUTHOR</a><br>
2603  <P>  <P>
2604  Philip Hazel  Philip Hazel
2605  <br>  <br>
# Line 2510  University Computing Service Line 2608  University Computing Service
2608  Cambridge CB2 3QH, England.  Cambridge CB2 3QH, England.
2609  <br>  <br>
2610  </P>  </P>
2611  <br><a name="SEC24" href="#TOC1">REVISION</a><br>  <br><a name="SEC26" href="#TOC1">REVISION</a><br>
2612  <P>  <P>
2613  Last updated: 02 December 2011  Last updated: 14 April 2012
2614  <br>  <br>
2615  Copyright &copy; 1997-2011 University of Cambridge.  Copyright &copy; 1997-2012 University of Cambridge.
2616  <br>  <br>
2617  <p>  <p>
2618  Return to the <a href="index.html">PCRE index page</a>.  Return to the <a href="index.html">PCRE index page</a>.

Legend:
Removed from v.784  
changed lines
  Added in v.959

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12