|
22 | 22 |
|
23 | 23 | /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
|
24 | 24 |
|
| 25 | +#define _GNU_SOURCE 1 |
25 | 26 | #include <stdio.h>
|
| 27 | +#include <stdint.h> |
26 | 28 | #include "php.h"
|
27 | 29 | #include "php_rand.h"
|
28 | 30 | #include "php_string.h"
|
|
57 | 59 | #include "php_globals.h"
|
58 | 60 | #include "basic_functions.h"
|
59 | 61 | #include "php_smart_str.h"
|
| 62 | +#include <Zend/zend_exceptions.h> |
60 | 63 | #ifdef ZTS
|
61 | 64 | #include "TSRM.h"
|
62 | 65 | #endif
|
@@ -2772,112 +2775,288 @@ PHPAPI char *php_strtr(char *str, int len, char *str_from, char *str_to, int trl
|
2772 | 2775 | }
|
2773 | 2776 | /* }}} */
|
2774 | 2777 |
|
2775 |
| -/* {{{ php_strtr_array |
2776 |
| - */ |
2777 |
| -static void php_strtr_array(zval *return_value, char *str, int slen, HashTable *hash) |
| 2778 | +/* {{{ Definitions for php_strtr_array */ |
| 2779 | +typedef size_t STRLEN; /* STRLEN should be unsigned */ |
| 2780 | +typedef uint16_t HASH; |
| 2781 | +typedef struct { |
| 2782 | + HASH table_mask; |
| 2783 | + STRLEN entries[1]; |
| 2784 | +} SHIFT_TAB; |
| 2785 | +typedef struct { |
| 2786 | + HASH table_mask; |
| 2787 | + int entries[1]; |
| 2788 | +} HASH_TAB; |
| 2789 | +typedef struct { |
| 2790 | + const char *s; |
| 2791 | + STRLEN l; |
| 2792 | +} STR; |
| 2793 | +typedef struct _match_node MATCH_NODE; |
| 2794 | +struct _match_node { |
| 2795 | + STRLEN pos; |
| 2796 | + MATCH_NODE *next; |
| 2797 | +}; |
| 2798 | +typedef struct _pat_and_repl { |
| 2799 | + STR pat; |
| 2800 | + STR repl; |
| 2801 | +} PATNREPL; |
| 2802 | + |
| 2803 | +#define S(a) ((a)->s) |
| 2804 | +#define L(a) ((a)->l) |
| 2805 | + |
| 2806 | +#define SHIFT_TAB_BITS 13 |
| 2807 | +#define HASH_TAB_BITS 10 /* should be less than sizeof(HASH) * 8 */ |
| 2808 | +#define SHIFT_TAB_SIZE (1U << SHIFT_TAB_BITS) |
| 2809 | +#define HASH_TAB_SIZE (1U << HASH_TAB_BITS) |
| 2810 | + |
| 2811 | +typedef struct { |
| 2812 | + int B; /* size of suffixes */ |
| 2813 | + int Bp; /* size of prefixes */ |
| 2814 | + STRLEN m; /* minimum pattern length */ |
| 2815 | + int patnum; /* number of patterns */ |
| 2816 | + SHIFT_TAB *shift; /* table mapping hash to allowed shift */ |
| 2817 | + HASH_TAB *hash; /* table mapping hash to int (pair of pointers) */ |
| 2818 | + HASH *prefix; /* array of hashes of prefixes by pattern suffix hash order */ |
| 2819 | + PATNREPL *patterns; /* array of prefixes by pattern suffix hash order */ |
| 2820 | +} PPRES; |
| 2821 | +/* }}} */ |
| 2822 | + |
| 2823 | +/* {{{ php_strtr_hash */ |
| 2824 | +static inline HASH php_strtr_hash(const char *str, int len) |
2778 | 2825 | {
|
2779 |
| - zval **entry; |
2780 |
| - char *string_key; |
2781 |
| - uint string_key_len; |
2782 |
| - zval **trans; |
2783 |
| - zval ctmp; |
2784 |
| - ulong num_key; |
2785 |
| - int minlen = 128*1024; |
2786 |
| - int maxlen = 0, pos, len, found; |
2787 |
| - char *key; |
2788 |
| - HashPosition hpos; |
2789 |
| - smart_str result = {0}; |
2790 |
| - HashTable tmp_hash; |
2791 |
| - |
2792 |
| - zend_hash_init(&tmp_hash, zend_hash_num_elements(hash), NULL, NULL, 0); |
2793 |
| - zend_hash_internal_pointer_reset_ex(hash, &hpos); |
2794 |
| - while (zend_hash_get_current_data_ex(hash, (void **)&entry, &hpos) == SUCCESS) { |
2795 |
| - switch (zend_hash_get_current_key_ex(hash, &string_key, &string_key_len, &num_key, 0, &hpos)) { |
2796 |
| - case HASH_KEY_IS_STRING: |
2797 |
| - len = string_key_len-1; |
2798 |
| - if (len < 1) { |
2799 |
| - zend_hash_destroy(&tmp_hash); |
2800 |
| - RETURN_FALSE; |
2801 |
| - } |
2802 |
| - zend_hash_add(&tmp_hash, string_key, string_key_len, entry, sizeof(zval*), NULL); |
2803 |
| - if (len > maxlen) { |
2804 |
| - maxlen = len; |
2805 |
| - } |
2806 |
| - if (len < minlen) { |
2807 |
| - minlen = len; |
2808 |
| - } |
2809 |
| - break; |
| 2826 | + HASH res = 0; |
| 2827 | + int i; |
| 2828 | + for (i = 0; i < len; i++) { |
| 2829 | + res = (res << 5) + res + (unsigned char)str[i]; |
| 2830 | + } |
2810 | 2831 |
|
2811 |
| - case HASH_KEY_IS_LONG: |
2812 |
| - Z_TYPE(ctmp) = IS_LONG; |
2813 |
| - Z_LVAL(ctmp) = num_key; |
| 2832 | + return res; |
| 2833 | +} |
| 2834 | +/* }}} */ |
| 2835 | +/* {{{ php_strtr_populate_shift */ |
| 2836 | +static inline void php_strtr_populate_shift(PATNREPL *patterns, int patnum, int B, STRLEN m, SHIFT_TAB *shift) |
| 2837 | +{ |
| 2838 | + int i; |
| 2839 | + STRLEN j, |
| 2840 | + max_shift; |
| 2841 | + |
| 2842 | + max_shift = m - B + 1; |
| 2843 | + for (i = 0; i < SHIFT_TAB_SIZE; i++) { |
| 2844 | + shift->entries[i] = max_shift; |
| 2845 | + } |
| 2846 | + for (i = 0; i < patnum; i++) { |
| 2847 | + for (j = 0; j < m - B + 1; j++) { |
| 2848 | + HASH h = php_strtr_hash(&S(&patterns[i].pat)[j], B) & shift->table_mask; |
| 2849 | + assert((long long) m - (long long) j - B >= 0); |
| 2850 | + shift->entries[h] = MIN(shift->entries[h], m - j - B); |
| 2851 | + } |
| 2852 | + } |
| 2853 | +} |
| 2854 | +/* }}} */ |
| 2855 | +/* {{{ php_strtr_compare_hash_suffix */ |
| 2856 | +static int php_strtr_compare_hash_suffix(const void *a, const void *b, void *ctx_g) |
| 2857 | +{ |
| 2858 | + const PPRES *res = ctx_g; |
| 2859 | + const PATNREPL *pnr_a = a, |
| 2860 | + *pnr_b = b; |
| 2861 | + HASH hash_a = php_strtr_hash(&S(&pnr_a->pat)[res->m - res->B], res->B) |
| 2862 | + & res->hash->table_mask, |
| 2863 | + hash_b = php_strtr_hash(&S(&pnr_b->pat)[res->m - res->B], res->B) |
| 2864 | + & res->hash->table_mask; |
| 2865 | + /* TODO: don't recalculate the hashes all the time */ |
| 2866 | + return hash_a - hash_b; |
| 2867 | +} |
| 2868 | +/* }}} */ |
2814 | 2869 |
|
2815 |
| - convert_to_string(&ctmp); |
2816 |
| - len = Z_STRLEN(ctmp); |
2817 |
| - zend_hash_add(&tmp_hash, Z_STRVAL(ctmp), len+1, entry, sizeof(zval*), NULL); |
2818 |
| - zval_dtor(&ctmp); |
| 2870 | +/* {{{ PPRES *php_strtr_array_prepare(STR *text, PATNREPL *patterns, int patnum, int B, int Bp) */ |
| 2871 | +static PPRES *php_strtr_array_prepare(STR *text, PATNREPL *patterns, int patnum, int B, int Bp) |
| 2872 | +{ |
| 2873 | + int i; |
| 2874 | + PPRES *res = emalloc(sizeof *res); |
2819 | 2875 |
|
2820 |
| - if (len > maxlen) { |
2821 |
| - maxlen = len; |
2822 |
| - } |
2823 |
| - if (len < minlen) { |
2824 |
| - minlen = len; |
2825 |
| - } |
2826 |
| - break; |
| 2876 | + res->m = (STRLEN)-1; |
| 2877 | + for (i = 0; i < patnum; i++) { |
| 2878 | + if (L(&patterns[i].pat) < res->m) { |
| 2879 | + res->m = L(&patterns[i].pat); |
2827 | 2880 | }
|
2828 |
| - zend_hash_move_forward_ex(hash, &hpos); |
2829 | 2881 | }
|
| 2882 | + assert(res->m > 0); |
| 2883 | + res->B = B = MIN(B, res->m); |
| 2884 | + res->Bp = Bp = MIN(Bp, res->m); |
2830 | 2885 |
|
2831 |
| - key = emalloc(maxlen+1); |
2832 |
| - pos = 0; |
| 2886 | + res->shift = safe_emalloc(SHIFT_TAB_SIZE, sizeof(*res->shift->entries), sizeof(*res->shift)); |
| 2887 | + res->shift->table_mask = SHIFT_TAB_SIZE - 1; |
| 2888 | + php_strtr_populate_shift(patterns, patnum, B, res->m, res->shift); |
2833 | 2889 |
|
2834 |
| - while (pos < slen) { |
2835 |
| - if ((pos + maxlen) > slen) { |
2836 |
| - maxlen = slen - pos; |
2837 |
| - } |
| 2890 | + res->hash = safe_emalloc(HASH_TAB_SIZE, sizeof(*res->hash->entries), sizeof(*res->shift)); |
| 2891 | + res->hash->table_mask = HASH_TAB_SIZE - 1; |
| 2892 | + |
| 2893 | + res->patterns = safe_emalloc(patnum, sizeof(*res->patterns), 0); |
| 2894 | + memcpy(res->patterns, patterns, sizeof(*patterns) * patnum); |
| 2895 | + qsort_r(res->patterns, patnum, sizeof(*res->patterns), php_strtr_compare_hash_suffix, res); |
| 2896 | + |
| 2897 | + res->prefix = safe_emalloc(patnum, sizeof(*res->prefix), 0); |
| 2898 | + for (i = 0; i < patnum; i++) { |
| 2899 | + res->prefix[i] = php_strtr_hash(S(&res->patterns[i].pat), Bp); |
| 2900 | + } |
2838 | 2901 |
|
2839 |
| - found = 0; |
2840 |
| - memcpy(key, str+pos, maxlen); |
| 2902 | + /* Initialize the rest of ->hash */ |
| 2903 | + for (i = 0; i < HASH_TAB_SIZE; i++) { |
| 2904 | + res->hash->entries[i] = -1; |
| 2905 | + } |
| 2906 | + { |
| 2907 | + HASH last_h = -1; /* assumes not all bits are used in res->hash */ |
| 2908 | + /* res->patterns is already ordered by hash. |
| 2909 | + * Make res->hash->entries[h] de index of the first pattern in |
| 2910 | + * res->patterns that has hash h */ |
| 2911 | + for (i = 0; i < patnum; i++) { |
| 2912 | + HASH h = php_strtr_hash(&S(&res->patterns[i].pat)[res->m - res->B], res->B) |
| 2913 | + & res->hash->table_mask; |
| 2914 | + if (h != last_h) { |
| 2915 | + res->hash->entries[h] = i; |
| 2916 | + last_h = h; |
| 2917 | + } |
| 2918 | + } |
| 2919 | + } |
| 2920 | + res->hash->entries[HASH_TAB_SIZE] = patnum; |
| 2921 | + for (i = HASH_TAB_SIZE - 1; i >= 0; i--) { |
| 2922 | + if (res->hash->entries[i] == -1) { |
| 2923 | + res->hash->entries[i] = res->hash->entries[i + 1]; |
| 2924 | + } |
| 2925 | + } |
2841 | 2926 |
|
2842 |
| - for (len = maxlen; len >= minlen; len--) { |
2843 |
| - key[len] = 0; |
| 2927 | + res->patnum = patnum; |
2844 | 2928 |
|
2845 |
| - if (zend_hash_find(&tmp_hash, key, len+1, (void**)&trans) == SUCCESS) { |
2846 |
| - char *tval; |
2847 |
| - int tlen; |
2848 |
| - zval tmp; |
| 2929 | + return res; |
| 2930 | +} |
| 2931 | +/* }}} */ |
| 2932 | +/* {{{ php_strtr_array_destroy_ppres(PPRES *d) */ |
| 2933 | +static void php_strtr_array_destroy_ppres(PPRES *d) |
| 2934 | +{ |
| 2935 | + efree(d->shift); |
| 2936 | + efree(d->hash); |
| 2937 | + efree(d->prefix); |
| 2938 | + efree(d->patterns); |
| 2939 | + efree(d); |
| 2940 | +} |
| 2941 | +/* }}} */ |
2849 | 2942 |
|
2850 |
| - if (Z_TYPE_PP(trans) != IS_STRING) { |
2851 |
| - tmp = **trans; |
2852 |
| - zval_copy_ctor(&tmp); |
2853 |
| - convert_to_string(&tmp); |
2854 |
| - tval = Z_STRVAL(tmp); |
2855 |
| - tlen = Z_STRLEN(tmp); |
2856 |
| - } else { |
2857 |
| - tval = Z_STRVAL_PP(trans); |
2858 |
| - tlen = Z_STRLEN_PP(trans); |
2859 |
| - } |
| 2943 | +/* {{{ php_strtr_array_do_repl(STR *text, PPRES *d, zval *return_value) */ |
| 2944 | +static void php_strtr_array_do_repl(STR *text, PPRES *d, zval *return_value) |
| 2945 | +{ |
| 2946 | + STRLEN pos = 0, |
| 2947 | + lastpos = L(text) - d->m; |
| 2948 | + smart_str result = {0}; |
2860 | 2949 |
|
2861 |
| - smart_str_appendl(&result, tval, tlen); |
2862 |
| - pos += len; |
2863 |
| - found = 1; |
| 2950 | + while (pos <= lastpos) { |
| 2951 | + HASH h = php_strtr_hash(&S(text)[pos + d->m - d->B], d->B) & d->shift->table_mask; |
| 2952 | + STRLEN shift = d->shift->entries[h]; |
2864 | 2953 |
|
2865 |
| - if (Z_TYPE_PP(trans) != IS_STRING) { |
2866 |
| - zval_dtor(&tmp); |
2867 |
| - } |
2868 |
| - break; |
| 2954 | + if (shift > 0) { |
| 2955 | + smart_str_appendl(&result, &S(text)[pos], shift); |
| 2956 | + pos += shift; |
| 2957 | + } else { |
| 2958 | + HASH h2 = h & d->hash->table_mask, |
| 2959 | + prefix_h = php_strtr_hash(&S(text)[pos], d->Bp); |
| 2960 | + |
| 2961 | + int offset_start = d->hash->entries[h2], |
| 2962 | + offset_end = d->hash->entries[h2 + 1], /* exclusive */ |
| 2963 | + i = 0; |
| 2964 | + |
| 2965 | + for (i = offset_start; i < offset_end; i++) { |
| 2966 | + PATNREPL *pnr; |
| 2967 | + if (d->prefix[i] != prefix_h) |
| 2968 | + continue; |
| 2969 | + |
| 2970 | + pnr = &d->patterns[i]; |
| 2971 | + if (L(&pnr->pat) > L(text) - pos || |
| 2972 | + memcmp(S(&pnr->pat), &S(text)[pos], L(&pnr->pat)) != 0) |
| 2973 | + continue; |
| 2974 | + |
| 2975 | + smart_str_appendl(&result, S(&pnr->repl), (int)L(&pnr->repl)); |
| 2976 | + pos += L(&pnr->pat); |
| 2977 | + goto end_outer_loop; |
2869 | 2978 | }
|
| 2979 | + |
| 2980 | + smart_str_appendc(&result, S(text)[pos]); |
| 2981 | + pos++; |
| 2982 | +end_outer_loop: ; |
2870 | 2983 | }
|
| 2984 | + } |
| 2985 | + |
| 2986 | + if (pos < L(text)) { |
| 2987 | + smart_str_appendl(&result, &S(text)[pos], (int)(L(text) - pos)); |
| 2988 | + } |
| 2989 | + |
| 2990 | + if (result.c != NULL) { |
| 2991 | + smart_str_0(&result); |
| 2992 | + RETVAL_STRINGL(result.c, result.len, 0); |
| 2993 | + } else { |
| 2994 | + RETURN_EMPTY_STRING(); |
| 2995 | + } |
| 2996 | +} |
| 2997 | +/* }}} */ |
| 2998 | + |
| 2999 | +/* {{{ php_strtr_array */ |
| 3000 | +static void php_strtr_array(zval *return_value, char *str, int slen, HashTable *pats) |
| 3001 | +{ |
| 3002 | + PPRES *data; |
| 3003 | + STR text; |
| 3004 | + PATNREPL *patterns; |
| 3005 | + HashPosition hpos; |
| 3006 | + zval **entry; |
| 3007 | + int num_pats = zend_hash_num_elements(pats), |
| 3008 | + i; |
| 3009 | + |
| 3010 | + S(&text) = str; |
| 3011 | + L(&text) = slen; |
| 3012 | + patterns = safe_emalloc(num_pats, sizeof(*patterns), 0); |
| 3013 | + |
| 3014 | + for (i = 0, zend_hash_internal_pointer_reset_ex(pats, &hpos); |
| 3015 | + zend_hash_get_current_data_ex(pats, (void **)&entry, &hpos) == SUCCESS; |
| 3016 | + i++, zend_hash_move_forward_ex(pats, &hpos)) { |
| 3017 | + char *string_key; |
| 3018 | + uint string_key_len; |
| 3019 | + ulong num_key; |
| 3020 | + int free_str = 0, |
| 3021 | + free_repl = 0; |
| 3022 | + zval *tzv; |
| 3023 | + |
| 3024 | + switch (zend_hash_get_current_key_ex(pats, &string_key, &string_key_len, &num_key, 0, &hpos)) { |
| 3025 | + case HASH_KEY_IS_LONG: |
| 3026 | + string_key_len = 1 + zend_spprintf(&string_key, 0, "%ld", (long)num_key); |
| 3027 | + free_str = 1; |
| 3028 | + /* break missing intentionally */ |
| 3029 | + |
| 3030 | + case HASH_KEY_IS_STRING: |
| 3031 | + string_key_len--; /* exclude final '\0' */ |
| 3032 | + if (string_key_len == 0) { /* empty string given as pattern */ |
| 3033 | + efree(patterns); |
| 3034 | + RETURN_FALSE; |
| 3035 | + } |
| 3036 | + if (string_key_len > slen) { /* this pattern can never match */ |
| 3037 | + continue; |
| 3038 | + } |
| 3039 | + |
| 3040 | + if (Z_TYPE_PP(entry) != IS_STRING) { |
| 3041 | + tzv = *entry; |
| 3042 | + zval_addref_p(tzv); |
| 3043 | + SEPARATE_ZVAL(&tzv); |
| 3044 | + convert_to_string(tzv); |
| 3045 | + entry = &tzv; |
| 3046 | + free_repl = 1; |
| 3047 | + } |
2871 | 3048 |
|
2872 |
| - if (! found) { |
2873 |
| - smart_str_appendc(&result, str[pos++]); |
| 3049 | + S(&patterns[i].pat) = string_key; |
| 3050 | + L(&patterns[i].pat) = string_key_len; |
| 3051 | + S(&patterns[i].repl) = Z_STRVAL_PP(entry); |
| 3052 | + L(&patterns[i].repl) = Z_STRLEN_PP(entry); |
2874 | 3053 | }
|
2875 | 3054 | }
|
2876 | 3055 |
|
2877 |
| - efree(key); |
2878 |
| - zend_hash_destroy(&tmp_hash); |
2879 |
| - smart_str_0(&result); |
2880 |
| - RETVAL_STRINGL(result.c, result.len, 0); |
| 3056 | + data = php_strtr_array_prepare(&text, patterns, i, 2, 2); |
| 3057 | + efree(patterns); |
| 3058 | + php_strtr_array_do_repl(&text, data, return_value); |
| 3059 | + php_strtr_array_destroy_ppres(data); |
2881 | 3060 | }
|
2882 | 3061 | /* }}} */
|
2883 | 3062 |
|
|
0 commit comments