smartcoder
diff --git a/‎ext/pcre/config.w32
Lines changed: 1 addition & 0 deletions b/‎ext/pcre/config.w32
Lines changed: 1 addition & 0 deletions
diff --git a/‎ext/pcre/config0.m4
Lines changed: 2 additions & 1 deletion b/‎ext/pcre/config0.m4
Lines changed: 2 additions & 1 deletion
diff --git a/‎ext/pcre/pcrelib/ChangeLog
Lines changed: 164 additions & 0 deletions b/‎ext/pcre/pcrelib/ChangeLog
Lines changed: 164 additions & 0 deletions
diff --git a/‎ext/pcre/pcrelib/HACKING
Lines changed: 20 additions & 15 deletions b/‎ext/pcre/pcrelib/HACKING
Lines changed: 20 additions & 15 deletions
diff --git a/‎ext/pcre/pcrelib/NEWS
Lines changed: 41 additions & 1 deletion b/‎ext/pcre/pcrelib/NEWS
Lines changed: 41 additions & 1 deletion
@@ -10,3 +10,4 @@ AC_DEFINE('HAVE_BUNDLED_PCRE', 1, 'Using bundled PCRE library');
 AC_DEFINE('HAVE_PCRE', 1, 'Have PCRE library');
 PHP_PCRE="yes";
 PHP_INSTALL_HEADERS("ext/pcre", "php_pcre.h pcrelib/");
+ADD_FLAG("CFLAGS_PCRE", " /D HAVE_CONFIG_H");
@@ -59,7 +59,8 @@ PHP_ARG_WITH(pcre-regex,,
     				 pcrelib/pcre_ord2utf8.c pcrelib/pcre_refcount.c pcrelib/pcre_study.c \
     				 pcrelib/pcre_tables.c pcrelib/pcre_valid_utf8.c \
     				 pcrelib/pcre_version.c pcrelib/pcre_xclass.c"
-    PHP_NEW_EXTENSION(pcre, $pcrelib_sources php_pcre.c, no,,-I@ext_srcdir@/pcrelib)
+    PHP_PCRE_CFLAGS="-DHAVE_CONFIG_H -I@ext_srcdir@/pcrelib"
+    PHP_NEW_EXTENSION(pcre, $pcrelib_sources php_pcre.c, no,,$PHP_PCRE_CFLAGS)
     PHP_ADD_BUILD_DIR($ext_builddir/pcrelib)
     PHP_INSTALL_HEADERS([ext/pcre], [php_pcre.h pcrelib/])
     AC_DEFINE(HAVE_BUNDLED_PCRE, 1, [ ])
 
@@ -1,6 +1,170 @@
 ChangeLog for PCRE
 ------------------
 
+Version 8.32 30-November-2012
+-----------------------------
+
+1.  Improved JIT compiler optimizations for first character search and single
+    character iterators.
+
+2.  Supporting IBM XL C compilers for PPC architectures in the JIT compiler.
+    Patch by Daniel Richard G.
+
+3.  Single character iterator optimizations in the JIT compiler.
+
+4.  Improved JIT compiler optimizations for character ranges.
+
+5.  Rename the "leave" variable names to "quit" to improve WinCE compatibility.
+    Reported by Giuseppe D'Angelo.
+
+6.  The PCRE_STARTLINE bit, indicating that a match can occur only at the start
+    of a line, was being set incorrectly in cases where .* appeared inside
+    atomic brackets at the start of a pattern, or where there was a subsequent
+    *PRUNE or *SKIP.
+
+7.  Improved instruction cache flush for POWER/PowerPC.
+    Patch by Daniel Richard G.
+
+8.  Fixed a number of issues in pcregrep, making it more compatible with GNU
+    grep:
+
+    (a) There is now no limit to the number of patterns to be matched.
+
+    (b) An error is given if a pattern is too long.
+
+    (c) Multiple uses of --exclude, --exclude-dir, --include, and --include-dir
+        are now supported.
+
+    (d) --exclude-from and --include-from (multiple use) have been added.
+
+    (e) Exclusions and inclusions now apply to all files and directories, not
+        just to those obtained from scanning a directory recursively.
+
+    (f) Multiple uses of -f and --file-list are now supported.
+
+    (g) In a Windows environment, the default for -d has been changed from
+        "read" (the GNU grep default) to "skip", because otherwise the presence
+        of a directory in the file list provokes an error.
+
+    (h) The documentation has been revised and clarified in places.
+
+9.  Improve the matching speed of capturing brackets.
+
+10. Changed the meaning of \X so that it now matches a Unicode extended
+    grapheme cluster.
+
+11. Patch by Daniel Richard G to the autoconf files to add a macro for sorting
+    out POSIX threads when JIT support is configured.
+
+12. Added support for PCRE_STUDY_EXTRA_NEEDED.
+
+13. In the POSIX wrapper regcomp() function, setting re_nsub field in the preg
+    structure could go wrong in environments where size_t is not the same size
+    as int.
+
+14. Applied user-supplied patch to pcrecpp.cc to allow PCRE_NO_UTF8_CHECK to be
+    set.
+
+15. The EBCDIC support had decayed; later updates to the code had included
+    explicit references to (e.g.) \x0a instead of CHAR_LF. There has been a
+    general tidy up of EBCDIC-related issues, and the documentation was also
+    not quite right. There is now a test that can be run on ASCII systems to
+    check some of the EBCDIC-related things (but is it not a full test).
+
+16. The new PCRE_STUDY_EXTRA_NEEDED option is now used by pcregrep, resulting
+    in a small tidy to the code.
+
+17. Fix JIT tests when UTF is disabled and both 8 and 16 bit mode are enabled.
+
+18. If the --only-matching (-o) option in pcregrep is specified multiple
+    times, each one causes appropriate output. For example, -o1 -o2 outputs the
+    substrings matched by the 1st and 2nd capturing parentheses. A separating
+    string can be specified by --om-separator (default empty).
+
+19. Improving the first n character searches.
+
+20. Turn case lists for horizontal and vertical white space into macros so that
+    they are defined only once.
+
+21. This set of changes together give more compatible Unicode case-folding
+    behaviour for characters that have more than one other case when UCP
+    support is available.
+
+    (a) The Unicode property table now has offsets into a new table of sets of
+        three or more characters that are case-equivalent. The MultiStage2.py
+        script that generates these tables (the pcre_ucd.c file) now scans
+        CaseFolding.txt instead of UnicodeData.txt for character case
+        information.
+
+    (b) The code for adding characters or ranges of characters to a character
+        class has been abstracted into a generalized function that also handles
+        case-independence. In UTF-mode with UCP support, this uses the new data
+        to handle characters with more than one other case.
+
+    (c) A bug that is fixed as a result of (b) is that codepoints less than 256
+        whose other case is greater than 256 are now correctly matched
+        caselessly. Previously, the high codepoint matched the low one, but not
+        vice versa.
+
+    (d) The processing of \h, \H, \v, and \ in character classes now makes use
+        of the new class addition function, using character lists defined as
+        macros alongside the case definitions of 20 above.
+
+    (e) Caseless back references now work with characters that have more than
+        one other case.
+
+    (f) General caseless matching of characters with more than one other case
+        is supported.
+
+22. Unicode character properties were updated from Unicode 6.2.0
+
+23. Improved CMake support under Windows. Patch by Daniel Richard G.
+
+24. Add support for 32-bit character strings, and UTF-32
+
+25. Major JIT compiler update (code refactoring and bugfixing).
+    Experimental Sparc 32 support is added.
+
+26. Applied a modified version of Daniel Richard G's patch to create
+    pcre.h.generic and config.h.generic by "make" instead of in the
+    PrepareRelease script.
+
+27. Added a definition for CHAR_NULL (helpful for the z/OS port), and use it in
+    pcre_compile.c when checking for a zero character.
+
+28. Introducing a native interface for JIT. Through this interface, the compiled
+    machine code can be directly executed. The purpose of this interface is to
+    provide fast pattern matching, so several sanity checks are not performed.
+    However, feature tests are still performed. The new interface provides
+    1.4x speedup compared to the old one.
+
+29. If pcre_exec() or pcre_dfa_exec() was called with a negative value for
+    the subject string length, the error given was PCRE_ERROR_BADOFFSET, which
+    was confusing. There is now a new error PCRE_ERROR_BADLENGTH for this case.
+
+30. In 8-bit UTF-8 mode, pcretest failed to give an error for data codepoints
+    greater than 0x7fffffff (which cannot be represented in UTF-8, even under
+    the "old" RFC 2279). Instead, it ended up passing a negative length to
+    pcre_exec().
+
+31. Add support for GCC's visibility feature to hide internal functions.
+
+32. Running "pcretest -C pcre8" or "pcretest -C pcre16" gave a spurious error
+    "unknown -C option" after outputting 0 or 1.
+
+33. There is now support for generating a code coverage report for the test
+    suite in environments where gcc is the compiler and lcov is installed. This
+    is mainly for the benefit of the developers.
+
+34. If PCRE is built with --enable-valgrind, certain memory regions are marked
+    unaddressable using valgrind annotations, allowing valgrind to detect
+    invalid memory accesses. This is mainly for the benefit of the developers.
+
+25. (*UTF) can now be used to start a pattern in any of the three libraries.
+
+26. Give configure error if --enable-cpp but no C++ compiler found.
+
+
 Version 8.31 06-July-2012
 -------------------------
 
 
@@ -49,16 +49,17 @@ complexity in Perl regular expressions, I couldn't do this. In any case, a
 first pass through the pattern is helpful for other reasons. 
 
 
-Support for 16-bit data strings
--------------------------------
+Support for 16-bit and 32-bit data strings
+-------------------------------------------
 
-From release 8.30, PCRE supports 16-bit as well as 8-bit data strings, by being 
-compilable in either 8-bit or 16-bit modes, or both. Thus, two different 
-libraries can be created. In the description that follows, the word "short" is 
+From release 8.30, PCRE supports 16-bit as well as 8-bit data strings; and from
+release 8.32, PCRE supports 32-bit data strings. The library can be compiled
+in any combination of 8-bit, 16-bit or 32-bit modes, creating different
+libraries. In the description that follows, the word "short" is 
 used for a 16-bit data quantity, and the word "unit" is used for a quantity
-that is a byte in 8-bit mode and a short in 16-bit mode. However, so as not to
-over-complicate the text, the names of PCRE functions are given in 8-bit form
-only.
+that is a byte in 8-bit mode, a short in 16-bit mode and a 32-bit unsigned
+integer in 32-bit mode. However, so as not to over-complicate the text, the
+names of PCRE functions are given in 8-bit form only.
 
 
 Computing the memory requirement: how it was
@@ -138,9 +139,10 @@ Format of compiled patterns
 ---------------------------
 
 The compiled form of a pattern is a vector of units (bytes in 8-bit mode, or
-shorts in 16-bit mode), containing items of variable length. The first unit in
-an item contains an opcode, and the length of the item is either implicit in
-the opcode or contained in the data that follows it.
+shorts in 16-bit mode, 32-bit unsigned integers in 32-bit mode), containing
+items of variable length. The first unit in an item contains an opcode, and
+the length of the item is either implicit in the opcode or contained in the
+data that follows it.
 
 In many cases listed below, LINK_SIZE data values are specified for offsets
 within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
@@ -207,7 +209,8 @@ Matching literal characters
 
 The OP_CHAR opcode is followed by a single character that is to be matched 
 casefully. For caseless matching, OP_CHARI is used. In UTF-8 or UTF-16 modes,
-the character may be more than one unit long.
+the character may be more than one unit long. In UTF-32 mode, characters
+are always exactly one unit long.
 
 
 Repeating single characters
@@ -228,7 +231,8 @@ following opcodes, which come in caseful and caseless versions:
   OP_POSQUERY     OP_POSQUERYI  
 
 Each opcode is followed by the character that is to be repeated. In ASCII mode,
-these are two-unit items; in UTF-8 or UTF-16 modes, the length is variable.
+these are two-unit items; in UTF-8 or UTF-16 modes, the length is variable; in
+UTF-32 mode these are one-unit items.
 Those with "MIN" in their names are the minimizing versions. Those with "POS"
 in their names are possessive versions. Other repeats make use of these
 opcodes:
@@ -299,7 +303,7 @@ bit map containing a 1 bit for every character that is acceptable. The bits are
 counted from the least significant end of each unit. In caseless mode, bits for
 both cases are set.
 
-The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8/16 mode,
+The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8/16/32 mode,
 subject characters with values greater than 255 can be handled correctly. For
 OP_CLASS they do not match, whereas for OP_NCLASS they do.
 
@@ -412,7 +416,8 @@ OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
 is OP_REVERSE, followed by a two byte (one short) count of the number of
 characters to move back the pointer in the subject string. In ASCII mode, the 
 count is a number of units, but in UTF-8/16 mode each character may occupy more
-than one unit. A separate count is present in each alternative of a lookbehind
+than one unit; in UTF-32 mode each character occupies exactly one unit.
+A separate count is present in each alternative of a lookbehind
 assertion, allowing them to have different fixed lengths.
 
 
 
@@ -1,6 +1,46 @@
 News about PCRE releases
 ------------------------
 
+Release 8.32 30-November-2012
+-----------------------------
+
+This release fixes a number of bugs, but also has some new features. These are
+the highlights:
+
+.  There is now support for 32-bit character strings and UTF-32. Like the
+   16-bit support, this is done by compiling a separate 32-bit library.
+
+.  \X now matches a Unicode extended grapheme cluster.
+
+.  Case-independent matching of Unicode characters that have more than one
+   "other case" now makes all three (or more) characters equivalent. This
+   applies, for example, to Greek Sigma, which has two lowercase versions.
+
+.  Unicode character properties are updated to Unicode 6.2.0.
+
+.  The EBCDIC support, which had decayed, has had a spring clean.
+
+.  A number of JIT optimizations have been added, which give faster JIT
+   execution speed. In addition, a new direct interface to JIT execution is
+   available. This bypasses some of the sanity checks of pcre_exec() to give a
+   noticeable speed-up.
+
+.  A number of issues in pcregrep have been fixed, making it more compatible
+   with GNU grep. In particular, --exclude and --include (and variants) apply
+   to all files now, not just those obtained from scanning a directory
+   recursively. In Windows environments, the default action for directories is
+   now "skip" instead of "read" (which provokes an error).
+
+.  If the --only-matching (-o) option in pcregrep is specified multiple
+   times, each one causes appropriate output. For example, -o1 -o2 outputs the
+   substrings matched by the 1st and 2nd capturing parentheses. A separating
+   string can be specified by --om-separator (default empty).
+
+.  When PCRE is built via Autotools using a version of gcc that has the
+   "visibility" feature, it is used to hide internal library functions that are
+   not part of the public API.
+
+
 Release 8.31 06-July-2012
 -------------------------
 
@@ -9,7 +49,7 @@ This is mainly a bug-fixing release, with a small number of developments:
 . The JIT compiler now supports partial matching and the (*MARK) and
   (*COMMIT) verbs.
 
-. PCRE_INFO_MAXLOOKBEHIND can be used to find the longest lookbehing in a
+. PCRE_INFO_MAXLOOKBEHIND can be used to find the longest lookbehind in a
   pattern.
 
 . There should be a performance improvement when using the heap instead of the