Skip to content

Commit 327e3b3

Browse files
committed
Micro optimizations for pcre
1 parent 29087f0 commit 327e3b3

File tree

1 file changed

+59
-50
lines changed

1 file changed

+59
-50
lines changed

ext/pcre/php_pcre.c

Lines changed: 59 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,8 +1029,7 @@ PHPAPI zend_string *php_pcre_replace(zend_string *regex,
10291029
/* }}} */
10301030

10311031
/* {{{ php_pcre_replace_impl() */
1032-
PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zval *replace_val,
1033-
int is_callable_replace, int limit, int *replace_count)
1032+
PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zval *replace_val, int is_callable_replace, int limit, int *replace_count)
10341033
{
10351034
pcre_extra *extra = pce->extra;/* Holds results of studying */
10361035
pcre_extra extra_data; /* Used locally for exec options */
@@ -1058,19 +1057,22 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
10581057
unsigned char *mark = NULL; /* Target for MARK name */
10591058
zend_string *result; /* Result of replacement */
10601059
zend_string *eval_result=NULL; /* Result of custom function */
1060+
10611061
ALLOCA_FLAG(use_heap);
10621062

10631063
if (extra == NULL) {
10641064
extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
10651065
extra = &extra_data;
10661066
}
1067+
10671068
extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
10681069
extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
10691070

1070-
if (pce->preg_options & PREG_REPLACE_EVAL) {
1071+
if (UNEXPECTED(pce->preg_options & PREG_REPLACE_EVAL)) {
10711072
php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
10721073
return NULL;
10731074
}
1075+
10741076
if (!is_callable_replace) {
10751077
replace = Z_STRVAL_P(replace_val);
10761078
replace_len = (int)Z_STRLEN_P(replace_val);
@@ -1080,18 +1082,14 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
10801082
/* Calculate the size of the offsets array, and allocate memory for it. */
10811083
num_subpats = pce->capture_count + 1;
10821084
size_offsets = num_subpats * 3;
1083-
if (size_offsets <= 32) {
1084-
offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1085-
} else {
1086-
offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1087-
}
1085+
offsets = (int *)do_alloca_ex(size_offsets * sizeof(int), 32 * sizeof(int), use_heap);
10881086

10891087
/*
10901088
* Build a mapping from subpattern numbers to their names. We will
10911089
* allocate the table only if there are any named subpatterns.
10921090
*/
10931091
subpat_names = NULL;
1094-
if (pce->name_count > 0) {
1092+
if (UNEXPECTED(pce->name_count > 0)) {
10951093
subpat_names = make_subpats_table(num_subpats, pce);
10961094
if (!subpat_names) {
10971095
return NULL;
@@ -1120,29 +1118,30 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
11201118
exoptions |= PCRE_NO_UTF8_CHECK;
11211119

11221120
/* Check for too many substrings condition. */
1123-
if (count == 0) {
1121+
if (UNEXPECTED(count == 0)) {
11241122
php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1125-
count = size_offsets/3;
1123+
count = size_offsets / 3;
11261124
}
11271125

11281126
piece = subject + start_offset;
11291127

1130-
if (count > 0 && (limit == -1 || limit > 0)) {
1131-
if (replace_count) {
1128+
/* if (EXPECTED(count > 0 && (limit == -1 || limit > 0))) */
1129+
if (EXPECTED(count > 0 && limit)) {
1130+
if (UNEXPECTED(replace_count)) {
11321131
++*replace_count;
11331132
}
1133+
11341134
/* Set the match location in subject */
11351135
match = subject + offsets[0];
11361136

11371137
new_len = result_len + offsets[0] - start_offset; /* part before the match */
11381138

1139-
if (is_callable_replace) {
1140-
/* Use custom function to get replacement string and its length. */
1141-
eval_result = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark);
1142-
new_len += (int)eval_result->len;
1143-
} else { /* do regular substitution */
1139+
/* if (!is_callable_replace) */
1140+
if (EXPECTED(replace)) {
1141+
/* do regular substitution */
11441142
walk = replace;
11451143
walk_last = 0;
1144+
11461145
while (walk < replace_end) {
11471146
if ('\\' == *walk || '$' == *walk) {
11481147
if (walk_last == '\\') {
@@ -1160,33 +1159,23 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
11601159
walk++;
11611160
walk_last = walk[-1];
11621161
}
1163-
}
11641162

1165-
if (new_len >= alloc_len) {
1166-
if (alloc_len == 0) {
1167-
alloc_len = 2 * subject_len;
1168-
if (new_len >= alloc_len) {
1169-
alloc_len = alloc_len + 2 * new_len;
1170-
}
1171-
result = zend_string_alloc(alloc_len, 0);
1172-
} else {
1163+
if (new_len >= alloc_len) {
11731164
alloc_len = alloc_len + 2 * new_len;
1174-
result = zend_string_extend(result, alloc_len, 0);
1165+
if (result == NULL) {
1166+
result = zend_string_alloc(alloc_len, 0);
1167+
} else {
1168+
result = zend_string_extend(result, alloc_len, 0);
1169+
}
11751170
}
1176-
}
1177-
/* copy the part of the string before the match */
1178-
memcpy(&result->val[result_len], piece, match-piece);
1179-
result_len += (int)(match-piece);
11801171

1181-
/* copy replacement and backrefs */
1182-
walkbuf = result->val + result_len;
1183-
1184-
/* If using custom function, copy result to the buffer and clean up. */
1185-
if (is_callable_replace) {
1186-
memcpy(walkbuf, eval_result->val, eval_result->len);
1187-
result_len += (int)eval_result->len;
1188-
if (eval_result) zend_string_release(eval_result);
1189-
} else { /* do regular backreference copying */
1172+
/* copy the part of the string before the match */
1173+
memcpy(&result->val[result_len], piece, match-piece);
1174+
result_len += (int)(match-piece);
1175+
1176+
/* copy replacement and backrefs */
1177+
walkbuf = result->val + result_len;
1178+
11901179
walk = replace;
11911180
walk_last = 0;
11921181
while (walk < replace_end) {
@@ -1211,12 +1200,36 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
12111200
*walkbuf = '\0';
12121201
/* increment the result length by how much we've added to the string */
12131202
result_len += (int)(walkbuf - (result->val + result_len));
1203+
} else {
1204+
/* Use custom function to get replacement string and its length. */
1205+
eval_result = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark);
1206+
ZEND_ASSERT(eval_result);
1207+
new_len += (int)eval_result->len;
1208+
if (new_len >= alloc_len) {
1209+
alloc_len = alloc_len + 2 * new_len;
1210+
if (result == NULL) {
1211+
result = zend_string_alloc(alloc_len, 0);
1212+
} else {
1213+
result = zend_string_extend(result, alloc_len, 0);
1214+
}
1215+
}
1216+
/* copy the part of the string before the match */
1217+
memcpy(&result->val[result_len], piece, match-piece);
1218+
result_len += (int)(match-piece);
1219+
1220+
/* copy replacement and backrefs */
1221+
walkbuf = result->val + result_len;
1222+
1223+
/* If using custom function, copy result to the buffer and clean up. */
1224+
memcpy(walkbuf, eval_result->val, eval_result->len);
1225+
result_len += (int)eval_result->len;
1226+
zend_string_release(eval_result);
12141227
}
12151228

1216-
if (limit != -1)
1229+
if (EXPECTED(limit)) {
12171230
limit--;
1218-
1219-
} else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
1231+
}
1232+
} else if (count == PCRE_ERROR_NOMATCH || UNEXPECTED(limit == 0)) {
12201233
/* If we previously set PCRE_NOTEMPTY after a null match,
12211234
this is not necessarily the end. We need to advance
12221235
the start offset, and continue. Fudge the offset values
@@ -1266,12 +1279,8 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
12661279
start_offset = offsets[1];
12671280
}
12681281

1269-
if (size_offsets <= 32) {
1270-
free_alloca(offsets, use_heap);
1271-
} else {
1272-
efree(offsets);
1273-
}
1274-
if (subpat_names) {
1282+
free_alloca(offsets, use_heap);
1283+
if (UNEXPECTED(subpat_names)) {
12751284
efree(subpat_names);
12761285
}
12771286

0 commit comments

Comments
 (0)