Skip to content

Add new function string_is_ascii #353

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 12 additions & 14 deletions ext/intl/grapheme/grapheme_string.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,8 @@ PHP_FUNCTION(grapheme_strlen)
RETURN_FALSE;
}

ret_len = grapheme_ascii_check(string, string_len);

if ( ret_len >= 0 )
RETURN_LONG(ret_len);
if (string_is_ascii(string, string_len))
RETURN_LONG(string_len);

/* convert the string to UTF-16. */
status = U_ZERO_ERROR;
Expand Down Expand Up @@ -154,7 +152,7 @@ PHP_FUNCTION(grapheme_strpos)
}

/* if it is there, and if the haystack is ascii, we are all done */
if ( grapheme_ascii_check(haystack, haystack_len) >= 0 ) {
if ( string_is_ascii(haystack, haystack_len) ) {

RETURN_LONG(found - haystack);
}
Expand Down Expand Up @@ -211,7 +209,7 @@ PHP_FUNCTION(grapheme_stripos)
}


is_ascii = ( grapheme_ascii_check(haystack, haystack_len) >= 0 );
is_ascii = ( string_is_ascii(haystack, haystack_len) );

if ( is_ascii ) {
needle_dup = (unsigned char *)estrndup((char *)needle, needle_len);
Expand All @@ -229,7 +227,7 @@ PHP_FUNCTION(grapheme_stripos)
}

/* if needle was ascii too, we are all done, otherwise we need to try using Unicode to see what we get */
if ( grapheme_ascii_check(needle, needle_len) >= 0 ) {
if ( string_is_ascii(needle, needle_len) ) {
RETURN_FALSE;
}
}
Expand Down Expand Up @@ -284,7 +282,7 @@ PHP_FUNCTION(grapheme_strrpos)
RETURN_FALSE;
}

is_ascii = grapheme_ascii_check(haystack, haystack_len) >= 0;
is_ascii = string_is_ascii(haystack, haystack_len);

if ( is_ascii ) {

Expand All @@ -297,7 +295,7 @@ PHP_FUNCTION(grapheme_strrpos)

/* if the needle was ascii too, we are done */

if ( grapheme_ascii_check(needle, needle_len) >= 0 ) {
if ( string_is_ascii(needle, needle_len) ) {
RETURN_FALSE;
}

Expand Down Expand Up @@ -354,7 +352,7 @@ PHP_FUNCTION(grapheme_strripos)
RETURN_FALSE;
}

is_ascii = grapheme_ascii_check(haystack, haystack_len) >= 0;
is_ascii = string_is_ascii(haystack, haystack_len);

if ( is_ascii ) {
unsigned char *needle_dup, *haystack_dup;
Expand All @@ -375,7 +373,7 @@ PHP_FUNCTION(grapheme_strripos)

/* if the needle was ascii too, we are done */

if ( grapheme_ascii_check(needle, needle_len) >= 0 ) {
if ( string_is_ascii(needle, needle_len) ) {
RETURN_FALSE;
}

Expand Down Expand Up @@ -430,7 +428,7 @@ PHP_FUNCTION(grapheme_substr)

/* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */

if ( grapheme_ascii_check(str, str_len) >= 0 ) {
if ( string_is_ascii(str, str_len) ) {
grapheme_substr_ascii((char *)str, str_len, start, length, ZEND_NUM_ARGS(), (char **) &sub_str, &sub_str_len);

if ( NULL == sub_str ) {
Expand Down Expand Up @@ -630,7 +628,7 @@ static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_cas
}

/* if it is there, and if the haystack is ascii, we are all done */
if ( grapheme_ascii_check(haystack, haystack_len) >= 0 ) {
if ( string_is_ascii(haystack, haystack_len) ) {
size_t found_offset = found - haystack;

if (part) {
Expand Down Expand Up @@ -874,7 +872,7 @@ PHP_FUNCTION(grapheme_extract)
(size + 1 because the size-th character might be the beginning of a grapheme cluster)
*/

if ( -1 != grapheme_ascii_check(pstr, size + 1 < str_len ? size + 1 : str_len ) ) {
if ( string_is_ascii(pstr, size + 1 < str_len ? size + 1 : str_len ) ) {
long nsize = ( size < str_len ? size : str_len );
if ( NULL != next ) {
ZVAL_LONG(next, start+nsize);
Expand Down
14 changes: 0 additions & 14 deletions ext/intl/grapheme/grapheme_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -379,20 +379,6 @@ grapheme_strpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned ch

/* }}} */

/* {{{ grapheme_ascii_check: ASCII check */
int grapheme_ascii_check(const unsigned char *day, int32_t len)
{
int ret_len = len;
while ( len-- ) {
if ( *day++ > 0x7f )
return -1;
}

return ret_len;
}

/* }}} */

/* {{{ grapheme_split_string: find and optionally return grapheme boundaries */
int grapheme_split_string(const UChar *text, int32_t text_length, int boundary_array[], int boundary_array_len TSRMLS_DC )
{
Expand Down
2 changes: 0 additions & 2 deletions ext/intl/grapheme/grapheme_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ grapheme_strrpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned c
int
grapheme_strpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int *puchar_pos, int f_ignore_case TSRMLS_DC);

int grapheme_ascii_check(const unsigned char *day, int32_t len);

int grapheme_split_string(const UChar *text, int32_t text_length, int boundary_array[], int boundary_array_len TSRMLS_DC );

int32_t
Expand Down
5 changes: 5 additions & 0 deletions ext/standard/basic_functions.c
Original file line number Diff line number Diff line change
Expand Up @@ -2494,6 +2494,10 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_substr_compare, 0, 0, 3)
ZEND_ARG_INFO(0, length)
ZEND_ARG_INFO(0, case_sensitivity)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(arginfo_str_is_ascii, ZEND_SEND_BY_VAL, ZEND_RETURN_VALUE, 1)
ZEND_ARG_INFO(0, str)
ZEND_END_ARG_INFO()
/* }}} */
/* {{{ syslog.c */
#ifdef HAVE_SYSLOG_H
Expand Down Expand Up @@ -2771,6 +2775,7 @@ const zend_function_entry basic_functions[] = { /* {{{ */
PHP_FE(str_split, arginfo_str_split)
PHP_FE(strpbrk, arginfo_strpbrk)
PHP_FE(substr_compare, arginfo_substr_compare)
PHP_FE(str_is_ascii, arginfo_str_is_ascii)

#ifdef HAVE_STRCOLL
PHP_FE(strcoll, arginfo_strcoll)
Expand Down
3 changes: 3 additions & 0 deletions ext/standard/php_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ PHP_FUNCTION(str_word_count);
PHP_FUNCTION(str_split);
PHP_FUNCTION(strpbrk);
PHP_FUNCTION(substr_compare);
PHP_FUNCTION(str_is_ascii);
#ifdef HAVE_STRCOLL
PHP_FUNCTION(strcoll);
#endif
Expand Down Expand Up @@ -147,6 +148,8 @@ PHPAPI int string_natural_compare_function_ex(zval *result, zval *op1, zval *op2
PHPAPI int string_natural_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC);
PHPAPI int string_natural_case_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC);

PHPAPI zend_bool string_is_ascii(const unsigned char *str, size_t len);

#ifndef HAVE_STRERROR
PHPAPI char *php_strerror(int errnum);
#define strerror php_strerror
Expand Down
32 changes: 32 additions & 0 deletions ext/standard/string.c
Original file line number Diff line number Diff line change
Expand Up @@ -5607,6 +5607,38 @@ PHP_FUNCTION(substr_compare)
}
/* }}} */

/* {{{ string_is_ascii
*/
PHPAPI zend_bool string_is_ascii(const unsigned char *str, size_t str_len)
{
while (str_len--) {
if (*str++ > 0x7f) {
return 0;
}
}

return 1;
}
/* }}} */

/* {{{ proto bool str_is_ascii(string str)
Checks whether the given string contains only ASCII characters */
PHP_FUNCTION(str_is_ascii)
{
unsigned char *str = NULL;
int str_len = 0;
zend_bool ret = 0;

if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &str, &str_len) == FAILURE) {
return;
}

ret = string_is_ascii(str, str_len);

RETURN_BOOL(ret);
}
/* }}} str_is_ascii */

/*
* Local variables:
* tab-width: 4
Expand Down
12 changes: 12 additions & 0 deletions ext/standard/tests/general_functions/str_is_ascii.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
--TEST--
str_is_ascii() function basic functionality
--FILE--
<?php
var_dump(str_is_ascii(''));
var_dump(str_is_ascii('ABC 123'));
var_dump(str_is_ascii('Iñtërnâtiônàlizætiøn'));
?>
--EXPECT--
bool(true)
bool(true)
bool(false)