1
- function preg_match_all ( pattern , s , flag , offset ) {
1
+ var preg_match_all = ( function ( ) {
2
2
// discuss at:
3
3
// original by: Camille Hodoul (http://webdev-snippets.net)
4
4
// note: The pattern must be a RegExp object.
@@ -13,187 +13,214 @@ function preg_match_all(pattern, s, flag, offset) {
13
13
// returns 1: ['X-MyHeader','X-AZE','USERAGENT'],
14
14
// returns 1: ['MyValue','adqdsdfff','Chrome123123 é']]
15
15
16
- // UNFINISHED this is a work in progress.
17
- // It's using Javascript's regex engine, which is different from PHP's PCRE.
18
-
19
- var order = flag || 'PREG_PATTERN_ORDER' ;
20
- // TODO support flag combination
21
- var matches = [ ] ;
22
- var nbP = find_parens_sub ( pattern . source ) ;
23
-
24
- if ( typeof ( offset ) !== 'undefined' && offset > 0 ) {
25
- // try to reproduce the behavior of the offset parameter, but I'm not sure how to test it.
26
- // I have to rebuild a pattern.
27
- var ps = pattern . toString ( ) ;
28
- var delimiter = ps . charAt ( 0 ) ;
29
-
30
- // FIXME : If the user has escaped his delimiter in the pattern, I should unescape it before passing it to the RegExp constructor.
31
- // but this should be done after the .join() ofc
32
-
33
- var t = ps . split ( delimiter ) ;
34
- t . shift ( ) ;
35
- var flags = t . pop ( ) ;
36
- t [ 0 ] = '.{' + offset + '}' + t [ 0 ] ;
37
- ps = t . join ( delimiter ) ;
38
- pattern = new RegExp ( ps , flags ) ; // Have to rebuild it at runtime so no literal...
39
- }
16
+ // UNFINISHED this is a work in progress.
17
+ // It's using Javascript's regex engine, which is different from PHP's PCRE.
40
18
41
-
42
- // If the flag is 2 or 3, I should init the matches array with n+1 arrays
43
- // Where n = nb of capturing parentheses
44
-
45
- if ( order == 'PREG_PATTERN_ORDER' || order == 'PREG_OFFSET_CAPTURE' ) {
46
- for ( var i = 0 ; i < 1 + nbP ; i ++ ) {
47
- matches [ i ] = [ ] ;
48
- }
49
- }
50
19
51
- s . replace ( pattern , function ( ) {
52
- var args = [ ] . slice . call ( arguments ) ;
53
- // Remove unnecessary elements from the args array
54
- var fullMatch = args . pop ( ) ;
55
- var offset = args . pop ( ) ;
56
- var substr = args [ 0 ] ;
57
- // args now only contains the matches
58
- if ( order === 'PREG_SET_ORDER' ) {
59
- matches . push ( args ) ;
60
- } else if ( order === 'PREG_PATTERN_ORDER' ) {
61
- var l = args . length ;
62
- matches [ 0 ] . push ( substr ) ;
63
- for ( var i = 1 ; i < l ; i ++ ) {
64
- if ( ! matches [ ( i ) ] ) matches [ ( i ) ] = [ ] ;
65
- matches [ ( i ) ] . push ( args [ i ] ) ;
66
- }
67
- } else if ( order === 'PREG_OFFSET_CAPTURE' ) {
68
- if ( ! matches [ 0 ] ) matches [ 0 ] = [ ] ;
69
- matches [ 0 ] . push ( [ args [ 0 ] , offset ] ) ;
70
- var l = args . length ;
71
- for ( var i = 1 ; i < l ; i ++ ) {
72
- if ( ! matches [ i ] ) matches [ i ] = [ ] ;
73
- matches [ i ] . push ( [ args [ i ] , fullMatch . indexOf ( args [ i ] ) ] ) ;
20
+
21
+
22
+ // _find_parens_sub and _strcmp are declared in the closure, to avoid polluting the global scope.
23
+
24
+ // _find_parens_sub : I copied this function from http://www.opensource.apple.com/source/pcre/pcre-4.2/pcre/pcre_compile.c ,
25
+ // but removed the parts I don't need, so it isn't an exact implementation.
26
+ // the comments in the code are from the original file.
27
+ // This functions returns the number of capturing parentheses in a pattern
28
+ function _find_parens_sub ( ptr , count ) {
29
+ var count = count || 0 ;
30
+ var start_count = count ;
31
+ var hwm_count = start_count ;
32
+ var i = 0 ;
33
+ var dup_parens = false ;
34
+ // If the first character is a parenthesis, check on the type of group we are
35
+ // dealing with. The very first call may not start with a parenthesis.
36
+ if ( ptr [ 0 ] == '(' ) {
37
+ if ( ptr [ 1 ] == '?' && ptr [ 2 ] == '|' ) {
38
+ i += 3 ;
39
+ dup_parens = true ;
74
40
}
75
- }
76
- } ) ;
77
- return matches ;
78
- }
79
-
80
-
81
- // I copied this function from http://www.opensource.apple.com/source/pcre/pcre-4.2/pcre/pcre_compile.c ,
82
- // but removed the parts I don't need, so it isn't an exact implementation. It should do the trick though.
83
-
84
- function find_parens_sub ( ptr , count ) {
85
- var count = count || 0 ;
86
- var start_count = count ;
87
- var hwm_count = start_count ;
88
- var i = 0 ;
89
- var dup_parens = false ;
90
- // If the first character is a parenthesis, check on the type of group we are
91
- // dealing with. The very first call may not start with a parenthesis.
92
- if ( ptr [ 0 ] == '(' ) {
93
- if ( ptr [ 1 ] == '?' && ptr [ 2 ] == '|' ) {
94
- i += 3 ;
95
- dup_parens = true ;
96
- }
97
41
98
- // Handle a normal, unnamed capturing parenthesis
99
- else if ( ptr [ 1 ] != '?' && ptr [ 1 ] != '*' ) {
100
- count += 1 ;
101
- i ++ ;
102
- }
103
- // Handle a condition. If it is an assertion, just carry on so that it
104
- // is processed as normal. If not, skip to the closing parenthesis of the
105
- // condition (there can't be any nested parens. */
106
- else if ( ptr [ i + 2 ] == '(' ) {
107
- i += 2 ;
108
- if ( ptr [ i + 1 ] != '(' ) {
109
- while ( ! ! ptr [ i ] && ptr [ i ] != ')' ) i ++ ;
110
- if ( ptr [ i ] != 0 ) i ++ ;
42
+ // Handle a normal, unnamed capturing parenthesis
43
+ else if ( ptr [ 1 ] != '?' && ptr [ 1 ] != '*' ) {
44
+ count += 1 ;
45
+ i ++ ;
111
46
}
112
- }
113
- // We have either (? or (* and not a condition
114
- else {
115
- i += 2 ;
116
- if ( ptr [ i ] == 'P' ) i ++ ;
117
-
118
- // We have to disambiguate (?<! and (?<= from (?<name> for named groups
119
- if ( ( ptr [ i ] == '<' && ptr [ i + 1 ] != '!' && ptr [ i + 1 ] != '=' ) || ptr [ i ] == '\'' ) {
120
- count ++ ;
47
+ // Handle a condition. If it is an assertion, just carry on so that it
48
+ // is processed as normal. If not, skip to the closing parenthesis of the
49
+ // condition (there can't be any nested parens. */
50
+ else if ( ptr [ i + 2 ] == '(' ) {
51
+ i += 2 ;
52
+ if ( ptr [ i + 1 ] != '(' ) {
53
+ while ( ! ! ptr [ i ] && ptr [ i ] != ')' ) i ++ ;
54
+ if ( ptr [ i ] != 0 ) i ++ ;
55
+ }
121
56
}
122
- }
123
- }
57
+ // We have either (? or (* and not a condition
58
+ else {
59
+ i += 2 ;
60
+ if ( ptr [ i ] == 'P' ) i ++ ;
124
61
125
- // Past any initial parenthesis handling, scan for parentheses or vertical
126
- // bars.
127
- for ( ; ! ! ptr [ i ] ; i ++ ) {
128
- // Skip over backslashed characters and also entire \Q...\E
129
- if ( ptr [ i ] == '\\' ) {
130
- if ( ! ptr [ ++ i ] ) throw new Error ( 'Weird backslash ?' ) ;
131
- if ( ptr [ i ] == 'Q' ) {
132
- for ( ; ; ) {
133
- while ( ! ! ptr [ ++ i ] && ptr [ i ] != '\\' ) { } ;
134
- if ( ! ptr [ i ] ) throw new Error ( 'No \\E ?' ) ;
135
- if ( ptr [ ++ i ] == 'E' ) break ;
62
+ // We have to disambiguate (?<! and (?<= from (?<name> for named groups
63
+ if ( ( ptr [ i ] == '<' && ptr [ i + 1 ] != '!' && ptr [ i + 1 ] != '=' ) || ptr [ i ] == '\'' ) {
64
+ count ++ ;
136
65
}
137
- }
138
- continue ;
66
+ }
139
67
}
140
- // Skip over character classes; this logic must be similar to the way they
141
- // are handled for real. If the first character is '^', skip it. Also, if the
142
- // first few characters (either before or after ^) are \Q\E or \E we skip them
143
- // too.
144
- if ( ptr [ i ] == '[' ) {
145
- var negate_class = false ;
146
- for ( ; ; ) {
147
- var c = ptr [ ++ i ] ;
148
- if ( c == '\\' ) {
149
- if ( ptr [ i ] == 'E' ) i ++ ;
150
- else if ( ! strncmp ( ptr [ i + 1 ] ) , 'Q\\E' , 3 ) {
151
- i += 3 ;
152
- } else {
153
- break ;
68
+
69
+ // Past any initial parenthesis handling, scan for parentheses or vertical
70
+ // bars.
71
+ for ( ; ! ! ptr [ i ] ; i ++ ) {
72
+ // Skip over backslashed characters and also entire \Q...\E
73
+ if ( ptr [ i ] == '\\' ) {
74
+ if ( ! ptr [ ++ i ] ) throw new Error ( 'Weird backslash ?' ) ;
75
+ if ( ptr [ i ] == 'Q' ) {
76
+ for ( ; ; ) {
77
+ while ( ! ! ptr [ ++ i ] && ptr [ i ] != '\\' ) { } ;
78
+ if ( ! ptr [ i ] ) throw new Error ( 'No \\E ?' ) ;
79
+ if ( ptr [ ++ i ] == 'E' ) break ;
154
80
}
155
- } else if ( ! negate_class && c == '^' ) {
156
- negate_class = true ;
157
- } else break ;
158
81
}
82
+ continue ;
83
+ }
84
+ // Skip over character classes; this logic must be similar to the way they
85
+ // are handled for real. If the first character is '^', skip it. Also, if the
86
+ // first few characters (either before or after ^) are \Q\E or \E we skip them
87
+ // too.
88
+ if ( ptr [ i ] == '[' ) {
89
+ var negate_class = false ;
90
+ for ( ; ; ) {
91
+ var c = ptr [ ++ i ] ;
92
+ if ( c == '\\' ) {
93
+ if ( ptr [ i ] == 'E' ) i ++ ;
94
+ else if ( ! _strncmp ( ptr [ i + 1 ] ) , 'Q\\E' , 3 ) {
95
+ i += 3 ;
96
+ } else {
97
+ break ;
98
+ }
99
+ } else if ( ! negate_class && c == '^' ) {
100
+ negate_class = true ;
101
+ } else break ;
102
+ }
159
103
160
104
161
- // If the next character is ']', it is a data character that must be
162
- // skipped, except in JavaScript compatibility mode
163
- if ( ptr [ i ] == ']' && false ) {
164
- i ++ ;
165
- }
166
- while ( ptr [ ++ i ] != ']' ) {
167
- if ( ! ptr [ i ] ) {
168
- return count ;
105
+ // If the next character is ']', it is a data character that must be
106
+ // skipped, except in JavaScript compatibility mode
107
+ if ( ptr [ i ] == ']' && false ) {
108
+ i ++ ;
169
109
}
170
- if ( ptr [ i ] == '\\' ) {
171
- if ( ! ptr [ ++ i ] ) throw new Error ( 'Weird backslash ?' ) ;
172
- if ( ptr [ i ] == 'Q' ) {
173
- for ( ; ; ) {
174
- while ( ! ! ptr [ ++ i ] && ptr [ i ] != '\\' ) { } ;
175
- if ( ! ptr [ i ] ) throw new Error ( 'No \\E ?' ) ;
176
- if ( ptr [ ++ i ] == 'E' ) break ;
110
+ while ( ptr [ ++ i ] != ']' ) {
111
+ if ( ! ptr [ i ] ) {
112
+ return count ;
113
+ }
114
+ if ( ptr [ i ] == '\\' ) {
115
+ if ( ! ptr [ ++ i ] ) throw new Error ( 'Weird backslash ?' ) ;
116
+ if ( ptr [ i ] == 'Q' ) {
117
+ for ( ; ; ) {
118
+ while ( ! ! ptr [ ++ i ] && ptr [ i ] != '\\' ) { } ;
119
+ if ( ! ptr [ i ] ) throw new Error ( 'No \\E ?' ) ;
120
+ if ( ptr [ ++ i ] == 'E' ) break ;
121
+ }
177
122
}
178
- }
179
- continue ;
123
+ continue ;
124
+ }
180
125
}
126
+ continue ;
181
127
}
182
- continue ;
183
- }
184
128
185
129
186
- // Check for the special metacharacters
187
- if ( ptr [ i ] == '(' ) {
130
+ // Check for the special metacharacters
131
+ if ( ptr [ i ] == '(' ) {
188
132
189
- count = find_parens_sub ( ptr . slice ( i ) , count ) ;
190
- return count ;
191
- } else if ( ptr [ i ] == ')' ) {
192
- if ( dup_parens && count < hwm_count ) count = hwm_count ;
193
- } else if ( ptr [ i ] == '|' && dup_parens ) {
194
- if ( count > hwm_count ) hwm_count = count ;
195
- count = start_count ;
133
+ count = _find_parens_sub ( ptr . slice ( i ) , count ) ;
134
+ return count ;
135
+ } else if ( ptr [ i ] == ')' ) {
136
+ if ( dup_parens && count < hwm_count ) count = hwm_count ;
137
+ } else if ( ptr [ i ] == '|' && dup_parens ) {
138
+ if ( count > hwm_count ) hwm_count = count ;
139
+ count = start_count ;
140
+ }
196
141
}
142
+ return count ;
197
143
}
198
- return count ;
199
- }
144
+
145
+
146
+ function _strncmp ( str1 , str2 , lgth ) {
147
+ // discuss at: http://phpjs.org/functions/strncmp/
148
+ // original by: Waldo Malqui Silva
149
+ // input by: Steve Hilder
150
+ // improved by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
151
+ // revised by: gorthaur
152
+ // reimplemented by: Brett Zamir (http://brett-zamir.me)
153
+
154
+ var s1 = ( str1 + '' )
155
+ . substr ( 0 , lgth ) ;
156
+ var s2 = ( str2 + '' )
157
+ . substr ( 0 , lgth ) ;
158
+
159
+ return ( ( s1 == s2 ) ? 0 : ( ( s1 > s2 ) ? 1 : - 1 ) ) ;
160
+ }
161
+
162
+ // the actual function
163
+ return function ( pattern , s , flag , offset ) {
164
+
165
+ var order = flag || 'PREG_PATTERN_ORDER' ;
166
+ // TODO support flag combination
167
+ var matches = [ ] ;
168
+ var nbP = _find_parens_sub ( pattern . source ) ;
169
+
170
+ if ( typeof ( offset ) !== 'undefined' && offset > 0 ) {
171
+ // try to reproduce the behavior of the offset parameter, but I'm not sure how to test it.
172
+ // I have to rebuild a pattern.
173
+ var ps = pattern . toString ( ) ;
174
+ var delimiter = ps . charAt ( 0 ) ;
175
+
176
+ // FIXME : If the user has escaped his delimiter in the pattern, I should unescape it before passing it to the RegExp constructor.
177
+ // but this should be done after the .join() ofc
178
+
179
+ var t = ps . split ( delimiter ) ;
180
+ t . shift ( ) ;
181
+ var flags = t . pop ( ) ;
182
+ t [ 0 ] = '.{' + offset + '}' + t [ 0 ] ;
183
+ ps = t . join ( delimiter ) ;
184
+ // Have to rebuild it at runtime so no literal...
185
+ pattern = new RegExp ( ps , flags ) ;
186
+ }
187
+
188
+
189
+ // If the flag is 2 or 3, I should init the matches array with n+1 arrays
190
+ // Where n = nb of capturing parentheses
191
+
192
+ if ( order == 'PREG_PATTERN_ORDER' || order == 'PREG_OFFSET_CAPTURE' ) {
193
+ for ( var i = 0 ; i < 1 + nbP ; i ++ ) {
194
+ matches [ i ] = [ ] ;
195
+ }
196
+ }
197
+
198
+ s . replace ( pattern , function ( ) {
199
+ var args = [ ] . slice . call ( arguments ) ;
200
+ // Remove unnecessary elements from the args array
201
+ var fullMatch = args . pop ( ) ;
202
+ var offset = args . pop ( ) ;
203
+ var substr = args [ 0 ] ;
204
+ // args now only contains the matches
205
+ if ( order === 'PREG_SET_ORDER' ) {
206
+ matches . push ( args ) ;
207
+ } else if ( order === 'PREG_PATTERN_ORDER' ) {
208
+ var l = args . length ;
209
+ matches [ 0 ] . push ( substr ) ;
210
+ for ( var i = 1 ; i < l ; i ++ ) {
211
+ if ( ! matches [ ( i ) ] ) matches [ ( i ) ] = [ ] ;
212
+ matches [ ( i ) ] . push ( args [ i ] ) ;
213
+ }
214
+ } else if ( order === 'PREG_OFFSET_CAPTURE' ) {
215
+ if ( ! matches [ 0 ] ) matches [ 0 ] = [ ] ;
216
+ matches [ 0 ] . push ( [ args [ 0 ] , offset ] ) ;
217
+ var l = args . length ;
218
+ for ( var i = 1 ; i < l ; i ++ ) {
219
+ if ( ! matches [ i ] ) matches [ i ] = [ ] ;
220
+ matches [ i ] . push ( [ args [ i ] , fullMatch . indexOf ( args [ i ] ) ] ) ;
221
+ }
222
+ }
223
+ } ) ;
224
+ return matches ;
225
+ } ;
226
+ } ) ( ) ;
0 commit comments