diff --git a/ext/intl/grapheme/grapheme_string.c b/ext/intl/grapheme/grapheme_string.c index 475bbe4184791..89a0756389a32 100644 --- a/ext/intl/grapheme/grapheme_string.c +++ b/ext/intl/grapheme/grapheme_string.c @@ -69,10 +69,8 @@ PHP_FUNCTION(grapheme_strlen) RETURN_FALSE; } - ret_len = grapheme_ascii_check(string, string_len); - - if ( ret_len >= 0 ) - RETURN_LONG(ret_len); + if (string_is_ascii(string, string_len)) + RETURN_LONG(string_len); /* convert the string to UTF-16. */ status = U_ZERO_ERROR; @@ -154,7 +152,7 @@ PHP_FUNCTION(grapheme_strpos) } /* if it is there, and if the haystack is ascii, we are all done */ - if ( grapheme_ascii_check(haystack, haystack_len) >= 0 ) { + if ( string_is_ascii(haystack, haystack_len) ) { RETURN_LONG(found - haystack); } @@ -211,7 +209,7 @@ PHP_FUNCTION(grapheme_stripos) } - is_ascii = ( grapheme_ascii_check(haystack, haystack_len) >= 0 ); + is_ascii = ( string_is_ascii(haystack, haystack_len) ); if ( is_ascii ) { needle_dup = (unsigned char *)estrndup((char *)needle, needle_len); @@ -229,7 +227,7 @@ PHP_FUNCTION(grapheme_stripos) } /* if needle was ascii too, we are all done, otherwise we need to try using Unicode to see what we get */ - if ( grapheme_ascii_check(needle, needle_len) >= 0 ) { + if ( string_is_ascii(needle, needle_len) ) { RETURN_FALSE; } } @@ -284,7 +282,7 @@ PHP_FUNCTION(grapheme_strrpos) RETURN_FALSE; } - is_ascii = grapheme_ascii_check(haystack, haystack_len) >= 0; + is_ascii = string_is_ascii(haystack, haystack_len); if ( is_ascii ) { @@ -297,7 +295,7 @@ PHP_FUNCTION(grapheme_strrpos) /* if the needle was ascii too, we are done */ - if ( grapheme_ascii_check(needle, needle_len) >= 0 ) { + if ( string_is_ascii(needle, needle_len) ) { RETURN_FALSE; } @@ -354,7 +352,7 @@ PHP_FUNCTION(grapheme_strripos) RETURN_FALSE; } - is_ascii = grapheme_ascii_check(haystack, haystack_len) >= 0; + is_ascii = string_is_ascii(haystack, haystack_len); if ( is_ascii ) { unsigned char *needle_dup, *haystack_dup; @@ -375,7 +373,7 @@ PHP_FUNCTION(grapheme_strripos) /* if the needle was ascii too, we are done */ - if ( grapheme_ascii_check(needle, needle_len) >= 0 ) { + if ( string_is_ascii(needle, needle_len) ) { RETURN_FALSE; } @@ -430,7 +428,7 @@ PHP_FUNCTION(grapheme_substr) /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */ - if ( grapheme_ascii_check(str, str_len) >= 0 ) { + if ( string_is_ascii(str, str_len) ) { grapheme_substr_ascii((char *)str, str_len, start, length, ZEND_NUM_ARGS(), (char **) &sub_str, &sub_str_len); if ( NULL == sub_str ) { @@ -630,7 +628,7 @@ static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_cas } /* if it is there, and if the haystack is ascii, we are all done */ - if ( grapheme_ascii_check(haystack, haystack_len) >= 0 ) { + if ( string_is_ascii(haystack, haystack_len) ) { size_t found_offset = found - haystack; if (part) { @@ -874,7 +872,7 @@ PHP_FUNCTION(grapheme_extract) (size + 1 because the size-th character might be the beginning of a grapheme cluster) */ - if ( -1 != grapheme_ascii_check(pstr, size + 1 < str_len ? size + 1 : str_len ) ) { + if ( string_is_ascii(pstr, size + 1 < str_len ? size + 1 : str_len ) ) { long nsize = ( size < str_len ? size : str_len ); if ( NULL != next ) { ZVAL_LONG(next, start+nsize); diff --git a/ext/intl/grapheme/grapheme_util.c b/ext/intl/grapheme/grapheme_util.c index 92008554d47b1..bf9fffba963b3 100644 --- a/ext/intl/grapheme/grapheme_util.c +++ b/ext/intl/grapheme/grapheme_util.c @@ -379,20 +379,6 @@ grapheme_strpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned ch /* }}} */ -/* {{{ grapheme_ascii_check: ASCII check */ -int grapheme_ascii_check(const unsigned char *day, int32_t len) -{ - int ret_len = len; - while ( len-- ) { - if ( *day++ > 0x7f ) - return -1; - } - - return ret_len; -} - -/* }}} */ - /* {{{ grapheme_split_string: find and optionally return grapheme boundaries */ int grapheme_split_string(const UChar *text, int32_t text_length, int boundary_array[], int boundary_array_len TSRMLS_DC ) { diff --git a/ext/intl/grapheme/grapheme_util.h b/ext/intl/grapheme/grapheme_util.h index c91aeaff7dd12..73002de181bfa 100644 --- a/ext/intl/grapheme/grapheme_util.h +++ b/ext/intl/grapheme/grapheme_util.h @@ -32,8 +32,6 @@ grapheme_strrpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned c int grapheme_strpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int *puchar_pos, int f_ignore_case TSRMLS_DC); -int grapheme_ascii_check(const unsigned char *day, int32_t len); - int grapheme_split_string(const UChar *text, int32_t text_length, int boundary_array[], int boundary_array_len TSRMLS_DC ); int32_t diff --git a/ext/standard/basic_functions.c b/ext/standard/basic_functions.c index 13791177457e8..196eb2243c5b1 100644 --- a/ext/standard/basic_functions.c +++ b/ext/standard/basic_functions.c @@ -2494,6 +2494,10 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_substr_compare, 0, 0, 3) ZEND_ARG_INFO(0, length) ZEND_ARG_INFO(0, case_sensitivity) ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(arginfo_str_is_ascii, ZEND_SEND_BY_VAL, ZEND_RETURN_VALUE, 1) + ZEND_ARG_INFO(0, str) +ZEND_END_ARG_INFO() /* }}} */ /* {{{ syslog.c */ #ifdef HAVE_SYSLOG_H @@ -2771,6 +2775,7 @@ const zend_function_entry basic_functions[] = { /* {{{ */ PHP_FE(str_split, arginfo_str_split) PHP_FE(strpbrk, arginfo_strpbrk) PHP_FE(substr_compare, arginfo_substr_compare) + PHP_FE(str_is_ascii, arginfo_str_is_ascii) #ifdef HAVE_STRCOLL PHP_FE(strcoll, arginfo_strcoll) diff --git a/ext/standard/php_string.h b/ext/standard/php_string.h index 65219f257adee..ac1dad88763bc 100644 --- a/ext/standard/php_string.h +++ b/ext/standard/php_string.h @@ -93,6 +93,7 @@ PHP_FUNCTION(str_word_count); PHP_FUNCTION(str_split); PHP_FUNCTION(strpbrk); PHP_FUNCTION(substr_compare); +PHP_FUNCTION(str_is_ascii); #ifdef HAVE_STRCOLL PHP_FUNCTION(strcoll); #endif @@ -147,6 +148,8 @@ PHPAPI int string_natural_compare_function_ex(zval *result, zval *op1, zval *op2 PHPAPI int string_natural_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC); PHPAPI int string_natural_case_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC); +PHPAPI zend_bool string_is_ascii(const unsigned char *str, size_t len); + #ifndef HAVE_STRERROR PHPAPI char *php_strerror(int errnum); #define strerror php_strerror diff --git a/ext/standard/string.c b/ext/standard/string.c index f3f78100b4c6f..3317830b8193c 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -5607,6 +5607,38 @@ PHP_FUNCTION(substr_compare) } /* }}} */ +/* {{{ string_is_ascii +*/ +PHPAPI zend_bool string_is_ascii(const unsigned char *str, size_t str_len) +{ + while (str_len--) { + if (*str++ > 0x7f) { + return 0; + } + } + + return 1; +} +/* }}} */ + +/* {{{ proto bool str_is_ascii(string str) + Checks whether the given string contains only ASCII characters */ +PHP_FUNCTION(str_is_ascii) +{ + unsigned char *str = NULL; + int str_len = 0; + zend_bool ret = 0; + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &str, &str_len) == FAILURE) { + return; + } + + ret = string_is_ascii(str, str_len); + + RETURN_BOOL(ret); +} +/* }}} str_is_ascii */ + /* * Local variables: * tab-width: 4 diff --git a/ext/standard/tests/general_functions/str_is_ascii.phpt b/ext/standard/tests/general_functions/str_is_ascii.phpt new file mode 100644 index 0000000000000..02490217bca12 --- /dev/null +++ b/ext/standard/tests/general_functions/str_is_ascii.phpt @@ -0,0 +1,12 @@ +--TEST-- +str_is_ascii() function basic functionality +--FILE-- + +--EXPECT-- +bool(true) +bool(true) +bool(false)