1 |
dilfridge 15/02/07 17:47:06 |
2 |
|
3 |
Added: icu-54.1-CVE-2014-9654.patch |
4 |
Log: |
5 |
Backport patch for CVE-2014-9654, bug 539108. Unfortunately this does bad things to a header file, so forcing a rebuild by mangling subslots... |
6 |
|
7 |
(Portage version: 2.2.15/cvs/Linux x86_64, signed Manifest commit with key 84AD142F) |
8 |
|
9 |
Revision Changes Path |
10 |
1.1 dev-libs/icu/files/icu-54.1-CVE-2014-9654.patch |
11 |
|
12 |
file : http://sources.gentoo.org/viewvc.cgi/gentoo-x86/dev-libs/icu/files/icu-54.1-CVE-2014-9654.patch?rev=1.1&view=markup |
13 |
plain: http://sources.gentoo.org/viewvc.cgi/gentoo-x86/dev-libs/icu/files/icu-54.1-CVE-2014-9654.patch?rev=1.1&content-type=text/plain |
14 |
|
15 |
Index: icu-54.1-CVE-2014-9654.patch |
16 |
=================================================================== |
17 |
Index: /icu/trunk/source/common/unicode/utypes.h |
18 |
=================================================================== |
19 |
--- /icu/trunk/source/common/unicode/utypes.h (revision 36800) |
20 |
+++ /icu/trunk/source/common/unicode/utypes.h (revision 36801) |
21 |
@@ -648,4 +648,5 @@ |
22 |
U_REGEX_TIME_OUT, /**< Maximum allowed match time exceeded */ |
23 |
U_REGEX_STOPPED_BY_CALLER, /**< Matching operation aborted by user callback fn. */ |
24 |
+ U_REGEX_PATTERN_TOO_BIG, /**< Pattern exceeds limits on size or complexity. @draft ICU 55 */ |
25 |
U_REGEX_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for regexp errors */ |
26 |
|
27 |
Index: /icu/trunk/source/common/utypes.c |
28 |
=================================================================== |
29 |
--- /icu/trunk/source/common/utypes.c (revision 36800) |
30 |
+++ /icu/trunk/source/common/utypes.c (revision 36801) |
31 |
@@ -2,5 +2,5 @@ |
32 |
****************************************************************************** |
33 |
* |
34 |
-* Copyright (C) 1997-2011, International Business Machines |
35 |
+* Copyright (C) 1997-2014, International Business Machines |
36 |
* Corporation and others. All Rights Reserved. |
37 |
* |
38 |
@@ -166,5 +166,6 @@ |
39 |
"U_REGEX_STACK_OVERFLOW", |
40 |
"U_REGEX_TIME_OUT", |
41 |
- "U_REGEX_STOPPED_BY_CALLER" |
42 |
+ "U_REGEX_STOPPED_BY_CALLER", |
43 |
+ "U_REGEX_PATTERN_TOO_BIG" |
44 |
}; |
45 |
|
46 |
Index: /icu/trunk/source/i18n/regexcmp.cpp |
47 |
=================================================================== |
48 |
--- /icu/trunk/source/i18n/regexcmp.cpp (revision 36800) |
49 |
+++ /icu/trunk/source/i18n/regexcmp.cpp (revision 36801) |
50 |
@@ -305,5 +305,5 @@ |
51 |
// the position in the compiled pattern. |
52 |
// |
53 |
- fRXPat->fFrameSize+=RESTACKFRAME_HDRCOUNT; |
54 |
+ allocateStackData(RESTACKFRAME_HDRCOUNT); |
55 |
|
56 |
// |
57 |
@@ -371,7 +371,7 @@ |
58 |
//4 NOP Resreved, will be replaced by a save if there are |
59 |
// OR | operators at the top level |
60 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(URX_STATE_SAVE, 2), *fStatus); |
61 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(URX_JMP, 3), *fStatus); |
62 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(URX_FAIL, 0), *fStatus); |
63 |
+ appendOp(URX_STATE_SAVE, 2); |
64 |
+ appendOp(URX_JMP, 3); |
65 |
+ appendOp(URX_FAIL, 0); |
66 |
|
67 |
// Standard open nonCapture paren action emits the two NOPs and |
68 |
@@ -396,5 +396,5 @@ |
69 |
|
70 |
// add the END operation to the compiled pattern. |
71 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(URX_END, 0), *fStatus); |
72 |
+ appendOp(URX_END, 0); |
73 |
|
74 |
// Terminate the pattern compilation state machine. |
75 |
@@ -418,5 +418,5 @@ |
76 |
int32_t op = (int32_t)fRXPat->fCompiledPat->elementAti(savePosition); |
77 |
U_ASSERT(URX_TYPE(op) == URX_NOP); // original contents of reserved location |
78 |
- op = URX_BUILD(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+1); |
79 |
+ op = buildOp(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+1); |
80 |
fRXPat->fCompiledPat->setElementAt(op, savePosition); |
81 |
|
82 |
@@ -424,6 +424,5 @@ |
83 |
// the JMP will eventually be the location following the ')' for the |
84 |
// group. This will be patched in later, when the ')' is encountered. |
85 |
- op = URX_BUILD(URX_JMP, 0); |
86 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
87 |
+ appendOp(URX_JMP, 0); |
88 |
|
89 |
// Push the position of the newly added JMP op onto the parentheses stack. |
90 |
@@ -434,5 +433,5 @@ |
91 |
// for a SAVE in the event that there is yet another '|' following |
92 |
// this one. |
93 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus); |
94 |
+ appendOp(URX_NOP, 0); |
95 |
fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); |
96 |
} |
97 |
@@ -460,10 +459,8 @@ |
98 |
{ |
99 |
fixLiterals(); |
100 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus); |
101 |
- int32_t varsLoc = fRXPat->fFrameSize; // Reserve three slots in match stack frame. |
102 |
- fRXPat->fFrameSize += 3; |
103 |
- int32_t cop = URX_BUILD(URX_START_CAPTURE, varsLoc); |
104 |
- fRXPat->fCompiledPat->addElement(cop, *fStatus); |
105 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus); |
106 |
+ appendOp(URX_NOP, 0); |
107 |
+ int32_t varsLoc = allocateStackData(3); // Reserve three slots in match stack frame. |
108 |
+ appendOp(URX_START_CAPTURE, varsLoc); |
109 |
+ appendOp(URX_NOP, 0); |
110 |
|
111 |
// On the Parentheses stack, start a new frame and add the postions |
112 |
@@ -490,6 +487,6 @@ |
113 |
{ |
114 |
fixLiterals(); |
115 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus); |
116 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus); |
117 |
+ appendOp(URX_NOP, 0); |
118 |
+ appendOp(URX_NOP, 0); |
119 |
|
120 |
// On the Parentheses stack, start a new frame and add the postions |
121 |
@@ -513,10 +510,8 @@ |
122 |
{ |
123 |
fixLiterals(); |
124 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus); |
125 |
- int32_t varLoc = fRXPat->fDataSize; // Reserve a data location for saving the |
126 |
- fRXPat->fDataSize += 1; // state stack ptr. |
127 |
- int32_t stoOp = URX_BUILD(URX_STO_SP, varLoc); |
128 |
- fRXPat->fCompiledPat->addElement(stoOp, *fStatus); |
129 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus); |
130 |
+ appendOp(URX_NOP, 0); |
131 |
+ int32_t varLoc = allocateData(1); // Reserve a data location for saving the state stack ptr. |
132 |
+ appendOp(URX_STO_SP, varLoc); |
133 |
+ appendOp(URX_NOP, 0); |
134 |
|
135 |
// On the Parentheses stack, start a new frame and add the postions |
136 |
@@ -561,24 +556,12 @@ |
137 |
{ |
138 |
fixLiterals(); |
139 |
- int32_t dataLoc = fRXPat->fDataSize; |
140 |
- fRXPat->fDataSize += 2; |
141 |
- int32_t op = URX_BUILD(URX_LA_START, dataLoc); |
142 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
143 |
- |
144 |
- op = URX_BUILD(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+ 2); |
145 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
146 |
- |
147 |
- op = URX_BUILD(URX_JMP, fRXPat->fCompiledPat->size()+ 3); |
148 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
149 |
- |
150 |
- op = URX_BUILD(URX_LA_END, dataLoc); |
151 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
152 |
- |
153 |
- op = URX_BUILD(URX_BACKTRACK, 0); |
154 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
155 |
- |
156 |
- op = URX_BUILD(URX_NOP, 0); |
157 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
158 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
159 |
+ int32_t dataLoc = allocateData(2); |
160 |
+ appendOp(URX_LA_START, dataLoc); |
161 |
+ appendOp(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+ 2); |
162 |
+ appendOp(URX_JMP, fRXPat->fCompiledPat->size()+ 3); |
163 |
+ appendOp(URX_LA_END, dataLoc); |
164 |
+ appendOp(URX_BACKTRACK, 0); |
165 |
+ appendOp(URX_NOP, 0); |
166 |
+ appendOp(URX_NOP, 0); |
167 |
|
168 |
// On the Parentheses stack, start a new frame and add the postions |
169 |
@@ -605,14 +588,8 @@ |
170 |
{ |
171 |
fixLiterals(); |
172 |
- int32_t dataLoc = fRXPat->fDataSize; |
173 |
- fRXPat->fDataSize += 2; |
174 |
- int32_t op = URX_BUILD(URX_LA_START, dataLoc); |
175 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
176 |
- |
177 |
- op = URX_BUILD(URX_STATE_SAVE, 0); // dest address will be patched later. |
178 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
179 |
- |
180 |
- op = URX_BUILD(URX_NOP, 0); |
181 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
182 |
+ int32_t dataLoc = allocateData(2); |
183 |
+ appendOp(URX_LA_START, dataLoc); |
184 |
+ appendOp(URX_STATE_SAVE, 0); // dest address will be patched later. |
185 |
+ appendOp(URX_NOP, 0); |
186 |
|
187 |
// On the Parentheses stack, start a new frame and add the postions |
188 |
@@ -652,21 +629,17 @@ |
189 |
|
190 |
// Allocate data space |
191 |
- int32_t dataLoc = fRXPat->fDataSize; |
192 |
- fRXPat->fDataSize += 4; |
193 |
+ int32_t dataLoc = allocateData(4); |
194 |
|
195 |
// Emit URX_LB_START |
196 |
- int32_t op = URX_BUILD(URX_LB_START, dataLoc); |
197 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
198 |
+ appendOp(URX_LB_START, dataLoc); |
199 |
|
200 |
// Emit URX_LB_CONT |
201 |
- op = URX_BUILD(URX_LB_CONT, dataLoc); |
202 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
203 |
- fRXPat->fCompiledPat->addElement(0, *fStatus); // MinMatchLength. To be filled later. |
204 |
- fRXPat->fCompiledPat->addElement(0, *fStatus); // MaxMatchLength. To be filled later. |
205 |
- |
206 |
- // Emit the NOP |
207 |
- op = URX_BUILD(URX_NOP, 0); |
208 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
209 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
210 |
+ appendOp(URX_LB_CONT, dataLoc); |
211 |
+ appendOp(URX_RESERVED_OP, 0); // MinMatchLength. To be filled later. |
212 |
+ appendOp(URX_RESERVED_OP, 0); // MaxMatchLength. To be filled later. |
213 |
+ |
214 |
+ // Emit the NOPs |
215 |
+ appendOp(URX_NOP, 0); |
216 |
+ appendOp(URX_NOP, 0); |
217 |
|
218 |
// On the Parentheses stack, start a new frame and add the postions |
219 |
@@ -708,22 +681,18 @@ |
220 |
|
221 |
// Allocate data space |
222 |
- int32_t dataLoc = fRXPat->fDataSize; |
223 |
- fRXPat->fDataSize += 4; |
224 |
+ int32_t dataLoc = allocateData(4); |
225 |
|
226 |
// Emit URX_LB_START |
227 |
- int32_t op = URX_BUILD(URX_LB_START, dataLoc); |
228 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
229 |
+ appendOp(URX_LB_START, dataLoc); |
230 |
|
231 |
// Emit URX_LBN_CONT |
232 |
- op = URX_BUILD(URX_LBN_CONT, dataLoc); |
233 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
234 |
- fRXPat->fCompiledPat->addElement(0, *fStatus); // MinMatchLength. To be filled later. |
235 |
- fRXPat->fCompiledPat->addElement(0, *fStatus); // MaxMatchLength. To be filled later. |
236 |
- fRXPat->fCompiledPat->addElement(0, *fStatus); // Continue Loc. To be filled later. |
237 |
- |
238 |
- // Emit the NOP |
239 |
- op = URX_BUILD(URX_NOP, 0); |
240 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
241 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
242 |
+ appendOp(URX_LBN_CONT, dataLoc); |
243 |
+ appendOp(URX_RESERVED_OP, 0); // MinMatchLength. To be filled later. |
244 |
+ appendOp(URX_RESERVED_OP, 0); // MaxMatchLength. To be filled later. |
245 |
+ appendOp(URX_RESERVED_OP, 0); // Continue Loc. To be filled later. |
246 |
+ |
247 |
+ // Emit the NOPs |
248 |
+ appendOp(URX_NOP, 0); |
249 |
+ appendOp(URX_NOP, 0); |
250 |
|
251 |
// On the Parentheses stack, start a new frame and add the postions |
252 |
@@ -795,10 +764,7 @@ |
253 |
if (URX_TYPE(repeatedOp) == URX_SETREF) { |
254 |
// Emit optimized code for [char set]+ |
255 |
- int32_t loopOpI = URX_BUILD(URX_LOOP_SR_I, URX_VAL(repeatedOp)); |
256 |
- fRXPat->fCompiledPat->addElement(loopOpI, *fStatus); |
257 |
- frameLoc = fRXPat->fFrameSize; |
258 |
- fRXPat->fFrameSize++; |
259 |
- int32_t loopOpC = URX_BUILD(URX_LOOP_C, frameLoc); |
260 |
- fRXPat->fCompiledPat->addElement(loopOpC, *fStatus); |
261 |
+ appendOp(URX_LOOP_SR_I, URX_VAL(repeatedOp)); |
262 |
+ frameLoc = allocateStackData(1); |
263 |
+ appendOp(URX_LOOP_C, frameLoc); |
264 |
break; |
265 |
} |
266 |
@@ -808,5 +774,5 @@ |
267 |
URX_TYPE(repeatedOp) == URX_DOTANY_UNIX) { |
268 |
// Emit Optimized code for .+ operations. |
269 |
- int32_t loopOpI = URX_BUILD(URX_LOOP_DOT_I, 0); |
270 |
+ int32_t loopOpI = buildOp(URX_LOOP_DOT_I, 0); |
271 |
if (URX_TYPE(repeatedOp) == URX_DOTANY_ALL) { |
272 |
// URX_LOOP_DOT_I operand is a flag indicating ". matches any" mode. |
273 |
@@ -816,9 +782,7 @@ |
274 |
loopOpI |= 2; |
275 |
} |
276 |
- fRXPat->fCompiledPat->addElement(loopOpI, *fStatus); |
277 |
- frameLoc = fRXPat->fFrameSize; |
278 |
- fRXPat->fFrameSize++; |
279 |
- int32_t loopOpC = URX_BUILD(URX_LOOP_C, frameLoc); |
280 |
- fRXPat->fCompiledPat->addElement(loopOpC, *fStatus); |
281 |
+ appendOp(loopOpI); |
282 |
+ frameLoc = allocateStackData(1); |
283 |
+ appendOp(URX_LOOP_C, frameLoc); |
284 |
break; |
285 |
} |
286 |
@@ -834,16 +798,13 @@ |
287 |
// Emit the code sequence that can handle it. |
288 |
insertOp(topLoc); |
289 |
- frameLoc = fRXPat->fFrameSize; |
290 |
- fRXPat->fFrameSize++; |
291 |
- |
292 |
- int32_t op = URX_BUILD(URX_STO_INP_LOC, frameLoc); |
293 |
+ frameLoc = allocateStackData(1); |
294 |
+ |
295 |
+ int32_t op = buildOp(URX_STO_INP_LOC, frameLoc); |
296 |
fRXPat->fCompiledPat->setElementAt(op, topLoc); |
297 |
|
298 |
- op = URX_BUILD(URX_JMP_SAV_X, topLoc+1); |
299 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
300 |
+ appendOp(URX_JMP_SAV_X, topLoc+1); |
301 |
} else { |
302 |
// Simpler code when the repeated body must match something non-empty |
303 |
- int32_t jmpOp = URX_BUILD(URX_JMP_SAV, topLoc); |
304 |
- fRXPat->fCompiledPat->addElement(jmpOp, *fStatus); |
305 |
+ appendOp(URX_JMP_SAV, topLoc); |
306 |
} |
307 |
} |
308 |
@@ -857,6 +818,5 @@ |
309 |
{ |
310 |
int32_t topLoc = blockTopLoc(FALSE); |
311 |
- int32_t saveStateOp = URX_BUILD(URX_STATE_SAVE, topLoc); |
312 |
- fRXPat->fCompiledPat->addElement(saveStateOp, *fStatus); |
313 |
+ appendOp(URX_STATE_SAVE, topLoc); |
314 |
} |
315 |
break; |
316 |
@@ -872,5 +832,5 @@ |
317 |
{ |
318 |
int32_t saveStateLoc = blockTopLoc(TRUE); |
319 |
- int32_t saveStateOp = URX_BUILD(URX_STATE_SAVE, fRXPat->fCompiledPat->size()); |
320 |
+ int32_t saveStateOp = buildOp(URX_STATE_SAVE, fRXPat->fCompiledPat->size()); |
321 |
fRXPat->fCompiledPat->setElementAt(saveStateOp, saveStateLoc); |
322 |
} |
323 |
@@ -891,12 +851,10 @@ |
324 |
int32_t jmp2_loc = fRXPat->fCompiledPat->size(); |
325 |
|
326 |
- int32_t jmp1_op = URX_BUILD(URX_JMP, jmp2_loc+1); |
327 |
+ int32_t jmp1_op = buildOp(URX_JMP, jmp2_loc+1); |
328 |
fRXPat->fCompiledPat->setElementAt(jmp1_op, jmp1_loc); |
329 |
|
330 |
- int32_t jmp2_op = URX_BUILD(URX_JMP, jmp2_loc+2); |
331 |
- fRXPat->fCompiledPat->addElement(jmp2_op, *fStatus); |
332 |
- |
333 |
- int32_t save_op = URX_BUILD(URX_STATE_SAVE, jmp1_loc+1); |
334 |
- fRXPat->fCompiledPat->addElement(save_op, *fStatus); |
335 |
+ appendOp(URX_JMP, jmp2_loc+2); |
336 |
+ |
337 |
+ appendOp(URX_STATE_SAVE, jmp1_loc+1); |
338 |
} |
339 |
break; |
340 |
@@ -938,10 +896,8 @@ |
341 |
if (URX_TYPE(repeatedOp) == URX_SETREF) { |
342 |
// Emit optimized code for a [char set]* |
343 |
- int32_t loopOpI = URX_BUILD(URX_LOOP_SR_I, URX_VAL(repeatedOp)); |
344 |
+ int32_t loopOpI = buildOp(URX_LOOP_SR_I, URX_VAL(repeatedOp)); |
345 |
fRXPat->fCompiledPat->setElementAt(loopOpI, topLoc); |
346 |
- dataLoc = fRXPat->fFrameSize; |
347 |
- fRXPat->fFrameSize++; |
348 |
- int32_t loopOpC = URX_BUILD(URX_LOOP_C, dataLoc); |
349 |
- fRXPat->fCompiledPat->addElement(loopOpC, *fStatus); |
350 |
+ dataLoc = allocateStackData(1); |
351 |
+ appendOp(URX_LOOP_C, dataLoc); |
352 |
break; |
353 |
} |
354 |
@@ -951,5 +907,5 @@ |
355 |
URX_TYPE(repeatedOp) == URX_DOTANY_UNIX) { |
356 |
// Emit Optimized code for .* operations. |
357 |
- int32_t loopOpI = URX_BUILD(URX_LOOP_DOT_I, 0); |
358 |
+ int32_t loopOpI = buildOp(URX_LOOP_DOT_I, 0); |
359 |
if (URX_TYPE(repeatedOp) == URX_DOTANY_ALL) { |
360 |
// URX_LOOP_DOT_I operand is a flag indicating . matches any mode. |
361 |
@@ -960,8 +916,6 @@ |
362 |
} |
363 |
fRXPat->fCompiledPat->setElementAt(loopOpI, topLoc); |
364 |
- dataLoc = fRXPat->fFrameSize; |
365 |
- fRXPat->fFrameSize++; |
366 |
- int32_t loopOpC = URX_BUILD(URX_LOOP_C, dataLoc); |
367 |
- fRXPat->fCompiledPat->addElement(loopOpC, *fStatus); |
368 |
+ dataLoc = allocateStackData(1); |
369 |
+ appendOp(URX_LOOP_C, dataLoc); |
370 |
break; |
371 |
} |
372 |
@@ -972,5 +926,5 @@ |
373 |
|
374 |
int32_t saveStateLoc = blockTopLoc(TRUE); |
375 |
- int32_t jmpOp = URX_BUILD(URX_JMP_SAV, saveStateLoc+1); |
376 |
+ int32_t jmpOp = buildOp(URX_JMP_SAV, saveStateLoc+1); |
377 |
|
378 |
// Check for minimum match length of zero, which requires |
379 |
@@ -978,10 +932,9 @@ |
380 |
if (minMatchLength(saveStateLoc, fRXPat->fCompiledPat->size()-1) == 0) { |
381 |
insertOp(saveStateLoc); |
382 |
- dataLoc = fRXPat->fFrameSize; |
383 |
- fRXPat->fFrameSize++; |
384 |
- |
385 |
- int32_t op = URX_BUILD(URX_STO_INP_LOC, dataLoc); |
386 |
+ dataLoc = allocateStackData(1); |
387 |
+ |
388 |
+ int32_t op = buildOp(URX_STO_INP_LOC, dataLoc); |
389 |
fRXPat->fCompiledPat->setElementAt(op, saveStateLoc+1); |
390 |
- jmpOp = URX_BUILD(URX_JMP_SAV_X, saveStateLoc+2); |
391 |
+ jmpOp = buildOp(URX_JMP_SAV_X, saveStateLoc+2); |
392 |
} |
393 |
|
394 |
@@ -990,10 +943,10 @@ |
395 |
int32_t continueLoc = fRXPat->fCompiledPat->size()+1; |
396 |
|
397 |
- // Put together the save state op store it into the compiled code. |
398 |
- int32_t saveStateOp = URX_BUILD(URX_STATE_SAVE, continueLoc); |
399 |
+ // Put together the save state op and store it into the compiled code. |
400 |
+ int32_t saveStateOp = buildOp(URX_STATE_SAVE, continueLoc); |
401 |
fRXPat->fCompiledPat->setElementAt(saveStateOp, saveStateLoc); |
402 |
|
403 |
// Append the URX_JMP_SAV or URX_JMPX operation to the compiled pattern. |
404 |
- fRXPat->fCompiledPat->addElement(jmpOp, *fStatus); |
405 |
+ appendOp(jmpOp); |
406 |
} |
407 |
break; |
408 |
@@ -1009,8 +962,7 @@ |
409 |
int32_t jmpLoc = blockTopLoc(TRUE); // loc 1. |
410 |
int32_t saveLoc = fRXPat->fCompiledPat->size(); // loc 3. |
411 |
- int32_t jmpOp = URX_BUILD(URX_JMP, saveLoc); |
412 |
- int32_t stateSaveOp = URX_BUILD(URX_STATE_SAVE, jmpLoc+1); |
413 |
+ int32_t jmpOp = buildOp(URX_JMP, saveLoc); |
414 |
fRXPat->fCompiledPat->setElementAt(jmpOp, jmpLoc); |
415 |
- fRXPat->fCompiledPat->addElement(stateSaveOp, *fStatus); |
416 |
+ appendOp(URX_STATE_SAVE, jmpLoc+1); |
417 |
} |
418 |
break; |
419 |
@@ -1085,7 +1037,7 @@ |
420 |
// First the STO_SP before the start of the loop |
421 |
insertOp(topLoc); |
422 |
- int32_t varLoc = fRXPat->fDataSize; // Reserve a data location for saving the |
423 |
- fRXPat->fDataSize += 1; // state stack ptr. |
424 |
- int32_t op = URX_BUILD(URX_STO_SP, varLoc); |
425 |
+ |
426 |
+ int32_t varLoc = allocateData(1); // Reserve a data location for saving the |
427 |
+ int32_t op = buildOp(URX_STO_SP, varLoc); |
428 |
fRXPat->fCompiledPat->setElementAt(op, topLoc); |
429 |
|
430 |
@@ -1096,6 +1048,5 @@ |
431 |
|
432 |
// Then the LD_SP after the end of the loop |
433 |
- op = URX_BUILD(URX_LD_SP, varLoc); |
434 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
435 |
+ appendOp(URX_LD_SP, varLoc); |
436 |
} |
437 |
|
438 |
@@ -1133,13 +1084,11 @@ |
439 |
{ |
440 |
fixLiterals(FALSE); |
441 |
- int32_t op; |
442 |
if (fModeFlags & UREGEX_DOTALL) { |
443 |
- op = URX_BUILD(URX_DOTANY_ALL, 0); |
444 |
+ appendOp(URX_DOTANY_ALL, 0); |
445 |
} else if (fModeFlags & UREGEX_UNIX_LINES) { |
446 |
- op = URX_BUILD(URX_DOTANY_UNIX, 0); |
447 |
+ appendOp(URX_DOTANY_UNIX, 0); |
448 |
} else { |
449 |
- op = URX_BUILD(URX_DOTANY, 0); |
450 |
- } |
451 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
452 |
+ appendOp(URX_DOTANY, 0); |
453 |
+ } |
454 |
} |
455 |
break; |
456 |
@@ -1148,15 +1097,13 @@ |
457 |
{ |
458 |
fixLiterals(FALSE); |
459 |
- int32_t op = 0; |
460 |
if ( (fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & UREGEX_UNIX_LINES) == 0) { |
461 |
- op = URX_CARET; |
462 |
+ appendOp(URX_CARET, 0); |
463 |
} else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UREGEX_UNIX_LINES) == 0) { |
464 |
- op = URX_CARET_M; |
465 |
+ appendOp(URX_CARET_M, 0); |
466 |
} else if ((fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & UREGEX_UNIX_LINES) != 0) { |
467 |
- op = URX_CARET; // Only testing true start of input. |
468 |
+ appendOp(URX_CARET, 0); // Only testing true start of input. |
469 |
} else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UREGEX_UNIX_LINES) != 0) { |
470 |
- op = URX_CARET_M_UNIX; |
471 |
- } |
472 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(op, 0), *fStatus); |
473 |
+ appendOp(URX_CARET_M_UNIX, 0); |
474 |
+ } |
475 |
} |
476 |
break; |
477 |
@@ -1165,15 +1112,13 @@ |
478 |
{ |
479 |
fixLiterals(FALSE); |
480 |
- int32_t op = 0; |
481 |
if ( (fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & UREGEX_UNIX_LINES) == 0) { |
482 |
- op = URX_DOLLAR; |
483 |
+ appendOp(URX_DOLLAR, 0); |
484 |
} else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UREGEX_UNIX_LINES) == 0) { |
485 |
- op = URX_DOLLAR_M; |
486 |
+ appendOp(URX_DOLLAR_M, 0); |
487 |
} else if ((fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & UREGEX_UNIX_LINES) != 0) { |
488 |
- op = URX_DOLLAR_D; |
489 |
+ appendOp(URX_DOLLAR_D, 0); |
490 |
} else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UREGEX_UNIX_LINES) != 0) { |
491 |
- op = URX_DOLLAR_MD; |
492 |
- } |
493 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(op, 0), *fStatus); |
494 |
+ appendOp(URX_DOLLAR_MD, 0); |
495 |
+ } |
496 |
} |
497 |
break; |
498 |
@@ -1181,5 +1126,5 @@ |
499 |
case doBackslashA: |
500 |
fixLiterals(FALSE); |
501 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(URX_CARET, 0), *fStatus); |
502 |
+ appendOp(URX_CARET, 0); |
503 |
break; |
504 |
|
505 |
@@ -1193,5 +1138,5 @@ |
506 |
fixLiterals(FALSE); |
507 |
int32_t op = (fModeFlags & UREGEX_UWORD)? URX_BACKSLASH_BU : URX_BACKSLASH_B; |
508 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(op, 1), *fStatus); |
509 |
+ appendOp(op, 1); |
510 |
} |
511 |
break; |
512 |
@@ -1206,5 +1151,5 @@ |
513 |
fixLiterals(FALSE); |
514 |
int32_t op = (fModeFlags & UREGEX_UWORD)? URX_BACKSLASH_BU : URX_BACKSLASH_B; |
515 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(op, 0), *fStatus); |
516 |
+ appendOp(op, 0); |
517 |
} |
518 |
break; |
519 |
@@ -1212,44 +1157,40 @@ |
520 |
case doBackslashD: |
521 |
fixLiterals(FALSE); |
522 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_D, 1), *fStatus); |
523 |
+ appendOp(URX_BACKSLASH_D, 1); |
524 |
break; |
525 |
|
526 |
case doBackslashd: |
527 |
fixLiterals(FALSE); |
528 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_D, 0), *fStatus); |
529 |
+ appendOp(URX_BACKSLASH_D, 0); |
530 |
break; |
531 |
|
532 |
case doBackslashG: |
533 |
fixLiterals(FALSE); |
534 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_G, 0), *fStatus); |
535 |
+ appendOp(URX_BACKSLASH_G, 0); |
536 |
break; |
537 |
|
538 |
case doBackslashS: |
539 |
fixLiterals(FALSE); |
540 |
- fRXPat->fCompiledPat->addElement( |
541 |
- URX_BUILD(URX_STAT_SETREF_N, URX_ISSPACE_SET), *fStatus); |
542 |
+ appendOp(URX_STAT_SETREF_N, URX_ISSPACE_SET); |
543 |
break; |
544 |
|
545 |
case doBackslashs: |
546 |
fixLiterals(FALSE); |
547 |
- fRXPat->fCompiledPat->addElement( |
548 |
- URX_BUILD(URX_STATIC_SETREF, URX_ISSPACE_SET), *fStatus); |
549 |
+ appendOp(URX_STATIC_SETREF, URX_ISSPACE_SET); |
550 |
break; |
551 |
|
552 |
case doBackslashW: |
553 |
fixLiterals(FALSE); |
554 |
- fRXPat->fCompiledPat->addElement( |
555 |
- URX_BUILD(URX_STAT_SETREF_N, URX_ISWORD_SET), *fStatus); |
556 |
+ appendOp(URX_STAT_SETREF_N, URX_ISWORD_SET); |
557 |
break; |
558 |
|
559 |
case doBackslashw: |
560 |
fixLiterals(FALSE); |
561 |
- fRXPat->fCompiledPat->addElement( |
562 |
- URX_BUILD(URX_STATIC_SETREF, URX_ISWORD_SET), *fStatus); |
563 |
+ appendOp(URX_STATIC_SETREF, URX_ISWORD_SET); |
564 |
break; |
565 |
|
566 |
case doBackslashX: |
567 |
fixLiterals(FALSE); |
568 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_X, 0), *fStatus); |
569 |
+ appendOp(URX_BACKSLASH_X, 0); |
570 |
break; |
571 |
|
572 |
@@ -1257,10 +1198,10 @@ |
573 |
case doBackslashZ: |
574 |
fixLiterals(FALSE); |
575 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(URX_DOLLAR, 0), *fStatus); |
576 |
+ appendOp(URX_DOLLAR, 0); |
577 |
break; |
578 |
|
579 |
case doBackslashz: |
580 |
fixLiterals(FALSE); |
581 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_Z, 0), *fStatus); |
582 |
+ appendOp(URX_BACKSLASH_Z, 0); |
583 |
break; |
584 |
|
585 |
@@ -1322,11 +1263,9 @@ |
586 |
// and shouldn't enter this code path at all. |
587 |
fixLiterals(FALSE); |
588 |
- int32_t op; |
589 |
if (fModeFlags & UREGEX_CASE_INSENSITIVE) { |
590 |
- op = URX_BUILD(URX_BACKREF_I, groupNum); |
591 |
+ appendOp(URX_BACKREF_I, groupNum); |
592 |
} else { |
593 |
- op = URX_BUILD(URX_BACKREF, groupNum); |
594 |
- } |
595 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
596 |
+ appendOp(URX_BACKREF, groupNum); |
597 |
+ } |
598 |
} |
599 |
break; |
600 |
@@ -1349,20 +1288,16 @@ |
601 |
// Emit the STO_SP |
602 |
int32_t topLoc = blockTopLoc(TRUE); |
603 |
- int32_t stoLoc = fRXPat->fDataSize; |
604 |
- fRXPat->fDataSize++; // Reserve the data location for storing save stack ptr. |
605 |
- int32_t op = URX_BUILD(URX_STO_SP, stoLoc); |
606 |
+ int32_t stoLoc = allocateData(1); // Reserve the data location for storing save stack ptr. |
607 |
+ int32_t op = buildOp(URX_STO_SP, stoLoc); |
608 |
fRXPat->fCompiledPat->setElementAt(op, topLoc); |
609 |
|
610 |
// Emit the STATE_SAVE |
611 |
- op = URX_BUILD(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+2); |
612 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
613 |
+ appendOp(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+2); |
614 |
|
615 |
// Emit the JMP |
616 |
- op = URX_BUILD(URX_JMP, topLoc+1); |
617 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
618 |
+ appendOp(URX_JMP, topLoc+1); |
619 |
|
620 |
// Emit the LD_SP |
621 |
- op = URX_BUILD(URX_LD_SP, stoLoc); |
622 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
623 |
+ appendOp(URX_LD_SP, stoLoc); |
624 |
} |
625 |
break; |
626 |
@@ -1384,21 +1319,18 @@ |
627 |
|
628 |
// emit STO_SP loc |
629 |
- int32_t stoLoc = fRXPat->fDataSize; |
630 |
- fRXPat->fDataSize++; // Reserve the data location for storing save stack ptr. |
631 |
- int32_t op = URX_BUILD(URX_STO_SP, stoLoc); |
632 |
+ int32_t stoLoc = allocateData(1); // Reserve the data location for storing save stack ptr. |
633 |
+ int32_t op = buildOp(URX_STO_SP, stoLoc); |
634 |
fRXPat->fCompiledPat->setElementAt(op, topLoc); |
635 |
|
636 |
// Emit the SAVE_STATE 5 |
637 |
int32_t L7 = fRXPat->fCompiledPat->size()+1; |
638 |
- op = URX_BUILD(URX_STATE_SAVE, L7); |
639 |
+ op = buildOp(URX_STATE_SAVE, L7); |
640 |
fRXPat->fCompiledPat->setElementAt(op, topLoc+1); |
641 |
|
642 |
// Append the JMP operation. |
643 |
- op = URX_BUILD(URX_JMP, topLoc+1); |
644 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
645 |
+ appendOp(URX_JMP, topLoc+1); |
646 |
|
647 |
// Emit the LD_SP loc |
648 |
- op = URX_BUILD(URX_LD_SP, stoLoc); |
649 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
650 |
+ appendOp(URX_LD_SP, stoLoc); |
651 |
} |
652 |
break; |
653 |
@@ -1419,17 +1351,15 @@ |
654 |
|
655 |
// Emit the STO_SP |
656 |
- int32_t stoLoc = fRXPat->fDataSize; |
657 |
- fRXPat->fDataSize++; // Reserve the data location for storing save stack ptr. |
658 |
- int32_t op = URX_BUILD(URX_STO_SP, stoLoc); |
659 |
+ int32_t stoLoc = allocateData(1); // Reserve the data location for storing save stack ptr. |
660 |
+ int32_t op = buildOp(URX_STO_SP, stoLoc); |
661 |
fRXPat->fCompiledPat->setElementAt(op, topLoc); |
662 |
|
663 |
// Emit the SAVE_STATE |
664 |
int32_t continueLoc = fRXPat->fCompiledPat->size()+1; |
665 |
- op = URX_BUILD(URX_STATE_SAVE, continueLoc); |
666 |
+ op = buildOp(URX_STATE_SAVE, continueLoc); |
667 |
fRXPat->fCompiledPat->setElementAt(op, topLoc+1); |
668 |
|
669 |
// Emit the LD_SP |
670 |
- op = URX_BUILD(URX_LD_SP, stoLoc); |
671 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
672 |
+ appendOp(URX_LD_SP, stoLoc); |
673 |
} |
674 |
break; |
675 |
@@ -1488,6 +1418,6 @@ |
676 |
{ |
677 |
fixLiterals(FALSE); |
678 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus); |
679 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus); |
680 |
+ appendOp(URX_NOP, 0); |
681 |
+ appendOp(URX_NOP, 0); |
682 |
|
683 |
// On the Parentheses stack, start a new frame and add the postions |
684 |
@@ -1826,5 +1756,4 @@ |
685 |
//------------------------------------------------------------------------------ |
686 |
void RegexCompile::fixLiterals(UBool split) { |
687 |
- int32_t op = 0; // An op from/for the compiled pattern. |
688 |
|
689 |
// If no literal characters have been scanned but not yet had code generated |
690 |
@@ -1865,21 +1794,21 @@ |
691 |
if ((fModeFlags & UREGEX_CASE_INSENSITIVE) && |
692 |
u_hasBinaryProperty(lastCodePoint, UCHAR_CASE_SENSITIVE)) { |
693 |
- op = URX_BUILD(URX_ONECHAR_I, lastCodePoint); |
694 |
+ appendOp(URX_ONECHAR_I, lastCodePoint); |
695 |
} else { |
696 |
- op = URX_BUILD(URX_ONECHAR, lastCodePoint); |
697 |
- } |
698 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
699 |
+ appendOp(URX_ONECHAR, lastCodePoint); |
700 |
+ } |
701 |
} else { |
702 |
// Two or more chars, emit a URX_STRING to match them. |
703 |
+ if (fLiteralChars.length() > 0x00ffffff || fRXPat->fLiteralText.length() > 0x00ffffff) { |
704 |
+ error(U_REGEX_PATTERN_TOO_BIG); |
705 |
+ } |
706 |
if (fModeFlags & UREGEX_CASE_INSENSITIVE) { |
707 |
- op = URX_BUILD(URX_STRING_I, fRXPat->fLiteralText.length()); |
708 |
+ appendOp(URX_STRING_I, fRXPat->fLiteralText.length()); |
709 |
} else { |
710 |
// TODO here: add optimization to split case sensitive strings of length two |
711 |
// into two single char ops, for efficiency. |
712 |
- op = URX_BUILD(URX_STRING, fRXPat->fLiteralText.length()); |
713 |
- } |
714 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
715 |
- op = URX_BUILD(URX_STRING_LEN, fLiteralChars.length()); |
716 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
717 |
+ appendOp(URX_STRING, fRXPat->fLiteralText.length()); |
718 |
+ } |
719 |
+ appendOp(URX_STRING_LEN, fLiteralChars.length()); |
720 |
|
721 |
// Add this string into the accumulated strings of the compiled pattern. |
722 |
@@ -1891,6 +1820,56 @@ |
723 |
|
724 |
|
725 |
- |
726 |
- |
727 |
+int32_t RegexCompile::buildOp(int32_t type, int32_t val) { |
728 |
+ if (U_FAILURE(*fStatus)) { |
729 |
+ return 0; |
730 |
+ } |
731 |
+ if (type < 0 || type > 255) { |
732 |
+ U_ASSERT(FALSE); |
733 |
+ error(U_REGEX_INTERNAL_ERROR); |
734 |
+ type = URX_RESERVED_OP; |
735 |
+ } |
736 |
+ if (val > 0x00ffffff) { |
737 |
+ U_ASSERT(FALSE); |
738 |
+ error(U_REGEX_INTERNAL_ERROR); |
739 |
+ val = 0; |
740 |
+ } |
741 |
+ if (val < 0) { |
742 |
+ if (!(type == URX_RESERVED_OP_N || type == URX_RESERVED_OP)) { |
743 |
+ U_ASSERT(FALSE); |
744 |
+ error(U_REGEX_INTERNAL_ERROR); |
745 |
+ return -1; |
746 |
+ } |
747 |
+ if (URX_TYPE(val) != 0xff) { |
748 |
+ U_ASSERT(FALSE); |
749 |
+ error(U_REGEX_INTERNAL_ERROR); |
750 |
+ return -1; |
751 |
+ } |
752 |
+ type = URX_RESERVED_OP_N; |
753 |
+ } |
754 |
+ return (type << 24) | val; |
755 |
+} |
756 |
+ |
757 |
+ |
758 |
+//------------------------------------------------------------------------------ |
759 |
+// |
760 |
+// appendOp() Append a new instruction onto the compiled pattern |
761 |
+// Includes error checking, limiting the size of the |
762 |
+// pattern to lengths that can be represented in the |
763 |
+// 24 bit operand field of an instruction. |
764 |
+// |
765 |
+//------------------------------------------------------------------------------ |
766 |
+void RegexCompile::appendOp(int32_t op) { |
767 |
+ if (U_FAILURE(*fStatus)) { |
768 |
+ return; |
769 |
+ } |
770 |
+ fRXPat->fCompiledPat->addElement(op, *fStatus); |
771 |
+ if ((fRXPat->fCompiledPat->size() > 0x00fffff0) && U_SUCCESS(*fStatus)) { |
772 |
+ error(U_REGEX_PATTERN_TOO_BIG); |
773 |
+ } |
774 |
+} |
775 |
+ |
776 |
+void RegexCompile::appendOp(int32_t type, int32_t val) { |
777 |
+ appendOp(buildOp(type, val)); |
778 |
+} |
779 |
|
780 |
|
781 |
@@ -1908,5 +1887,5 @@ |
782 |
U_ASSERT(where>0 && where < code->size()); |
783 |
|
784 |
- int32_t nop = URX_BUILD(URX_NOP, 0); |
785 |
+ int32_t nop = buildOp(URX_NOP, 0); |
786 |
code->insertElementAt(nop, where, *fStatus); |
787 |
|
788 |
@@ -1929,5 +1908,5 @@ |
789 |
// needs to be incremented to adjust for the insertion. |
790 |
opValue++; |
791 |
- op = URX_BUILD(opType, opValue); |
792 |
+ op = buildOp(opType, opValue); |
793 |
code->setElementAt(op, loc); |
794 |
} |
795 |
@@ -1953,4 +1932,56 @@ |
796 |
} |
797 |
|
798 |
+ |
799 |
+//------------------------------------------------------------------------------ |
800 |
+// |
801 |
+// allocateData() Allocate storage in the matcher's static data area. |
802 |
+// Return the index for the newly allocated data. |
803 |
+// The storage won't actually exist until we are running a match |
804 |
+// operation, but the storage indexes are inserted into various |
805 |
+// opcodes while compiling the pattern. |
806 |
+// |
807 |
+//------------------------------------------------------------------------------ |
808 |
+int32_t RegexCompile::allocateData(int32_t size) { |
809 |
+ if (U_FAILURE(*fStatus)) { |
810 |
+ return 0; |
811 |
+ } |
812 |
+ if (size <= 0 || size > 0x100 || fRXPat->fDataSize < 0) { |
813 |
+ error(U_REGEX_INTERNAL_ERROR); |
814 |
+ return 0; |
815 |
+ } |
816 |
+ int32_t dataIndex = fRXPat->fDataSize; |
817 |
+ fRXPat->fDataSize += size; |
818 |
+ if (fRXPat->fDataSize >= 0x00fffff0) { |
819 |
+ error(U_REGEX_INTERNAL_ERROR); |
820 |
+ } |
821 |
+ return dataIndex; |
822 |
+} |
823 |
+ |
824 |
+ |
825 |
+//------------------------------------------------------------------------------ |
826 |
+// |
827 |
+// allocateStackData() Allocate space in the back-tracking stack frame. |
828 |
+// Return the index for the newly allocated data. |
829 |
+// The frame indexes are inserted into various |
830 |
+// opcodes while compiling the pattern, meaning that frame |
831 |
+// size must be restricted to the size that will fit |
832 |
+// as an operand (24 bits). |
833 |
+// |
834 |
+//------------------------------------------------------------------------------ |
835 |
+int32_t RegexCompile::allocateStackData(int32_t size) { |
836 |
+ if (U_FAILURE(*fStatus)) { |
837 |
+ return 0; |
838 |
+ } |
839 |
+ if (size <= 0 || size > 0x100 || fRXPat->fFrameSize < 0) { |
840 |
+ error(U_REGEX_INTERNAL_ERROR); |
841 |
+ return 0; |
842 |
+ } |
843 |
+ int32_t dataIndex = fRXPat->fFrameSize; |
844 |
+ fRXPat->fFrameSize += size; |
845 |
+ if (fRXPat->fFrameSize >= 0x00fffff0) { |
846 |
+ error(U_REGEX_PATTERN_TOO_BIG); |
847 |
+ } |
848 |
+ return dataIndex; |
849 |
+} |
850 |
|
851 |
|
852 |
@@ -1996,5 +2027,5 @@ |
853 |
} |
854 |
if (reserveLoc) { |
855 |
- int32_t nop = URX_BUILD(URX_NOP, 0); |
856 |
+ int32_t nop = buildOp(URX_NOP, 0); |
857 |
fRXPat->fCompiledPat->insertElementAt(nop, theLoc, *fStatus); |
858 |
} |
859 |
@@ -2071,6 +2102,5 @@ |
860 |
|
861 |
int32_t frameVarLocation = URX_VAL(captureOp); |
862 |
- int32_t endCaptureOp = URX_BUILD(URX_END_CAPTURE, frameVarLocation); |
863 |
- fRXPat->fCompiledPat->addElement(endCaptureOp, *fStatus); |
864 |
+ appendOp(URX_END_CAPTURE, frameVarLocation); |
865 |
} |
866 |
break; |
867 |
@@ -2083,6 +2113,5 @@ |
868 |
U_ASSERT(URX_TYPE(stoOp) == URX_STO_SP); |
869 |
int32_t stoLoc = URX_VAL(stoOp); |
870 |
- int32_t ldOp = URX_BUILD(URX_LD_SP, stoLoc); |
871 |
- fRXPat->fCompiledPat->addElement(ldOp, *fStatus); |
872 |
+ appendOp(URX_LD_SP, stoLoc); |
873 |
} |
874 |
break; |
875 |
@@ -2093,6 +2122,5 @@ |
876 |
U_ASSERT(URX_TYPE(startOp) == URX_LA_START); |
877 |
int32_t dataLoc = URX_VAL(startOp); |
878 |
- int32_t op = URX_BUILD(URX_LA_END, dataLoc); |
879 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
880 |
+ appendOp(URX_LA_END, dataLoc); |
881 |
} |
882 |
break; |
883 |
@@ -2104,10 +2132,7 @@ |
884 |
U_ASSERT(URX_TYPE(startOp) == URX_LA_START); |
885 |
int32_t dataLoc = URX_VAL(startOp); |
886 |
- int32_t op = URX_BUILD(URX_LA_END, dataLoc); |
887 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
888 |
- op = URX_BUILD(URX_BACKTRACK, 0); |
889 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
890 |
- op = URX_BUILD(URX_LA_END, dataLoc); |
891 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
892 |
+ appendOp(URX_LA_END, dataLoc); |
893 |
+ appendOp(URX_BACKTRACK, 0); |
894 |
+ appendOp(URX_LA_END, dataLoc); |
895 |
|
896 |
// Patch the URX_SAVE near the top of the block. |
897 |
@@ -2116,5 +2141,5 @@ |
898 |
U_ASSERT(URX_TYPE(saveOp) == URX_STATE_SAVE); |
899 |
int32_t dest = fRXPat->fCompiledPat->size()-1; |
900 |
- saveOp = URX_BUILD(URX_STATE_SAVE, dest); |
901 |
+ saveOp = buildOp(URX_STATE_SAVE, dest); |
902 |
fRXPat->fCompiledPat->setElementAt(saveOp, fMatchOpenParen); |
903 |
} |
904 |
@@ -2129,8 +2154,6 @@ |
905 |
U_ASSERT(URX_TYPE(startOp) == URX_LB_START); |
906 |
int32_t dataLoc = URX_VAL(startOp); |
907 |
- int32_t op = URX_BUILD(URX_LB_END, dataLoc); |
908 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
909 |
- op = URX_BUILD(URX_LA_END, dataLoc); |
910 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
911 |
+ appendOp(URX_LB_END, dataLoc); |
912 |
+ appendOp(URX_LA_END, dataLoc); |
913 |
|
914 |
// Determine the min and max bounds for the length of the |
915 |
@@ -2168,6 +2191,5 @@ |
916 |
U_ASSERT(URX_TYPE(startOp) == URX_LB_START); |
917 |
int32_t dataLoc = URX_VAL(startOp); |
918 |
- int32_t op = URX_BUILD(URX_LBN_END, dataLoc); |
919 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
920 |
+ appendOp(URX_LBN_END, dataLoc); |
921 |
|
922 |
// Determine the min and max bounds for the length of the |
923 |
@@ -2194,5 +2216,5 @@ |
924 |
// Insert the pattern location to continue at after a successful match |
925 |
// as the last operand of the URX_LBN_CONT |
926 |
- op = URX_BUILD(URX_RELOC_OPRND, fRXPat->fCompiledPat->size()); |
927 |
+ int32_t op = buildOp(URX_RELOC_OPRND, fRXPat->fCompiledPat->size()); |
928 |
fRXPat->fCompiledPat->setElementAt(op, fMatchOpenParen-1); |
929 |
} |
930 |
@@ -2235,5 +2257,5 @@ |
931 |
{ |
932 |
// Set of no elements. Always fails to match. |
933 |
- fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKTRACK, 0), *fStatus); |
934 |
+ appendOp(URX_BACKTRACK, 0); |
935 |
delete theSet; |
936 |
} |
937 |
@@ -2256,6 +2278,5 @@ |
938 |
int32_t setNumber = fRXPat->fSets->size(); |
939 |
fRXPat->fSets->addElement(theSet, *fStatus); |
940 |
- int32_t setOp = URX_BUILD(URX_SETREF, setNumber); |
941 |
- fRXPat->fCompiledPat->addElement(setOp, *fStatus); |
942 |
+ appendOp(URX_SETREF, setNumber); |
943 |
} |
944 |
} |
945 |
@@ -2296,11 +2317,8 @@ |
946 |
// +1 --> Input index (for breaking non-progressing loops) |
947 |
// (Only present if unbounded upper limit on loop) |
948 |
- int32_t counterLoc = fRXPat->fFrameSize; |
949 |
- fRXPat->fFrameSize++; |
950 |
- if (fIntervalUpper < 0) { |
951 |
- fRXPat->fFrameSize++; |
952 |
- } |
953 |
- |
954 |
- int32_t op = URX_BUILD(InitOp, counterLoc); |
955 |
+ int32_t dataSize = fIntervalUpper < 0 ? 2 : 1; |
956 |
+ int32_t counterLoc = allocateStackData(dataSize); |
957 |
+ |
958 |
+ int32_t op = buildOp(InitOp, counterLoc); |
959 |
fRXPat->fCompiledPat->setElementAt(op, topOfBlock); |
960 |
|
961 |
@@ -2310,5 +2328,5 @@ |
962 |
// position to move. |
963 |
int32_t loopEnd = fRXPat->fCompiledPat->size(); |
964 |
- op = URX_BUILD(URX_RELOC_OPRND, loopEnd); |
965 |
+ op = buildOp(URX_RELOC_OPRND, loopEnd); |
966 |
fRXPat->fCompiledPat->setElementAt(op, topOfBlock+1); |
967 |
|
968 |
@@ -2319,6 +2337,5 @@ |
969 |
// Apend the CTR_LOOP op. The operand is the location of the CTR_INIT op. |
970 |
// Goes at end of the block being looped over, so just append to the code so far. |
971 |
- op = URX_BUILD(LoopOp, topOfBlock); |
972 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
973 |
+ appendOp(LoopOp, topOfBlock); |
974 |
|
975 |
if ((fIntervalLow & 0xff000000) != 0 || |
976 |
@@ -2373,5 +2390,5 @@ |
977 |
int32_t endOfSequenceLoc = fRXPat->fCompiledPat->size()-1 |
978 |
+ fIntervalUpper + (fIntervalUpper-fIntervalLow); |
979 |
- int32_t saveOp = URX_BUILD(URX_STATE_SAVE, endOfSequenceLoc); |
980 |
+ int32_t saveOp = buildOp(URX_STATE_SAVE, endOfSequenceLoc); |
981 |
if (fIntervalLow == 0) { |
982 |
insertOp(topOfBlock); |
983 |
@@ -2386,11 +2403,8 @@ |
984 |
int32_t i; |
985 |
for (i=1; i<fIntervalUpper; i++ ) { |
986 |
- if (i == fIntervalLow) { |
987 |
- fRXPat->fCompiledPat->addElement(saveOp, *fStatus); |
988 |
- } |
989 |
- if (i > fIntervalLow) { |
990 |
- fRXPat->fCompiledPat->addElement(saveOp, *fStatus); |
991 |
- } |
992 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
993 |
+ if (i >= fIntervalLow) { |
994 |
+ appendOp(saveOp); |
995 |
+ } |
996 |
+ appendOp(op); |
997 |
} |
998 |
return TRUE; |
999 |
@@ -3612,5 +3626,5 @@ |
1000 |
U_ASSERT(operandAddress>=0 && operandAddress<deltas.size()); |
1001 |
int32_t fixedOperandAddress = operandAddress - deltas.elementAti(operandAddress); |
1002 |
- op = URX_BUILD(opType, fixedOperandAddress); |
1003 |
+ op = buildOp(opType, fixedOperandAddress); |
1004 |
fRXPat->fCompiledPat->setElementAt(op, dst); |
1005 |
dst++; |
1006 |
@@ -3627,5 +3641,5 @@ |
1007 |
} |
1008 |
where = fRXPat->fGroupMap->elementAti(where-1); |
1009 |
- op = URX_BUILD(opType, where); |
1010 |
+ op = buildOp(opType, where); |
1011 |
fRXPat->fCompiledPat->setElementAt(op, dst); |
1012 |
dst++; |
1013 |
@@ -3979,5 +3993,5 @@ |
1014 |
// |
1015 |
// scanNamedChar |
1016 |
- // Get a UChar32 from a \N{UNICODE CHARACTER NAME} in the pattern. |
1017 |
+// Get a UChar32 from a \N{UNICODE CHARACTER NAME} in the pattern. |
1018 |
// |
1019 |
// The scan position will be at the 'N'. On return |
1020 |
Index: /icu/trunk/source/i18n/regexcmp.h |
1021 |
=================================================================== |
1022 |
--- /icu/trunk/source/i18n/regexcmp.h (revision 36800) |
1023 |
+++ /icu/trunk/source/i18n/regexcmp.h (revision 36801) |
1024 |
@@ -105,4 +105,11 @@ |
1025 |
void insertOp(int32_t where); // Open up a slot for a new op in the |
1026 |
// generated code at the specified location. |
1027 |
+ void appendOp(int32_t op); // Append a new op to the compiled pattern. |
1028 |
+ void appendOp(int32_t type, int32_t val); // Build & append a new op to the compiled pattern. |
1029 |
+ int32_t buildOp(int32_t type, int32_t val); // Construct a new pcode instruction. |
1030 |
+ int32_t allocateData(int32_t size); // Allocate space in the matcher data area. |
1031 |
+ // Return index of the newly allocated data. |
1032 |
+ int32_t allocateStackData(int32_t size); // Allocate space in the match back-track stack frame. |
1033 |
+ // Return offset index in the frame. |
1034 |
int32_t minMatchLength(int32_t start, |
1035 |
int32_t end); |
1036 |
Index: /icu/trunk/source/i18n/regeximp.h |
1037 |
=================================================================== |
1038 |
--- /icu/trunk/source/i18n/regeximp.h (revision 36800) |
1039 |
+++ /icu/trunk/source/i18n/regeximp.h (revision 36801) |
1040 |
@@ -1,4 +1,4 @@ |
1041 |
// |
1042 |
-// Copyright (C) 2002-2013 International Business Machines Corporation |
1043 |
+// Copyright (C) 2002-2014 International Business Machines Corporation |
1044 |
// and others. All rights reserved. |
1045 |
// |
1046 |
@@ -242,5 +242,4 @@ |
1047 |
// Convenience macros for assembling and disassembling a compiled operation. |
1048 |
// |
1049 |
-#define URX_BUILD(type, val) (int32_t)((type << 24) | (val)) |
1050 |
#define URX_TYPE(x) ((uint32_t)(x) >> 24) |
1051 |
#define URX_VAL(x) ((x) & 0xffffff) |
1052 |
Index: /icu/trunk/source/test/intltest/regextst.cpp |
1053 |
=================================================================== |
1054 |
--- /icu/trunk/source/test/intltest/regextst.cpp (revision 36800) |
1055 |
+++ /icu/trunk/source/test/intltest/regextst.cpp (revision 36801) |
1056 |
@@ -145,4 +145,7 @@ |
1057 |
if (exec) TestBug11049(); |
1058 |
break; |
1059 |
+ case 25: name = "TestBug11371"; |
1060 |
+ if (exec) TestBug11371(); |
1061 |
+ break; |
1062 |
default: name = ""; |
1063 |
break; //needed to end loop |
1064 |
@@ -5368,4 +5371,47 @@ |
1065 |
|
1066 |
|
1067 |
+void RegexTest::TestBug11371() { |
1068 |
+ if (quick) { |
1069 |
+ logln("Skipping test. Runs in exhuastive mode only."); |
1070 |
+ return; |
1071 |
+ } |
1072 |
+ UErrorCode status = U_ZERO_ERROR; |
1073 |
+ UnicodeString patternString; |
1074 |
+ |
1075 |
+ for (int i=0; i<8000000; i++) { |
1076 |
+ patternString.append(UnicodeString("()")); |
1077 |
+ } |
1078 |
+ LocalPointer<RegexPattern> compiledPat(RegexPattern::compile(patternString, 0, status)); |
1079 |
+ if (status != U_REGEX_PATTERN_TOO_BIG) { |
1080 |
+ errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.", |
1081 |
+ __FILE__, __LINE__, u_errorName(status)); |
1082 |
+ } |
1083 |
+ |
1084 |
+ status = U_ZERO_ERROR; |
1085 |
+ patternString = "("; |
1086 |
+ for (int i=0; i<20000000; i++) { |
1087 |
+ patternString.append(UnicodeString("A++")); |
1088 |
+ } |
1089 |
+ patternString.append(UnicodeString("){0}B++")); |
1090 |
+ LocalPointer<RegexPattern> compiledPat2(RegexPattern::compile(patternString, 0, status)); |
1091 |
+ if (status != U_REGEX_PATTERN_TOO_BIG) { |
1092 |
+ errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.", |
1093 |
+ __FILE__, __LINE__, u_errorName(status)); |
1094 |
+ } |
1095 |
+ |
1096 |
+ // Pattern with too much string data, such that string indexes overflow operand data field size |
1097 |
+ // in compiled instruction. |
1098 |
+ status = U_ZERO_ERROR; |
1099 |
+ patternString = ""; |
1100 |
+ while (patternString.length() < 0x00ffffff) { |
1101 |
+ patternString.append(UnicodeString("stuff and things dont you know, these are a few of my favorite strings\n")); |
1102 |
+ } |
1103 |
+ patternString.append(UnicodeString("X? trailing string")); |
1104 |
+ LocalPointer<RegexPattern> compiledPat3(RegexPattern::compile(patternString, 0, status)); |
1105 |
+ if (status != U_REGEX_PATTERN_TOO_BIG) { |
1106 |
+ errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.", |
1107 |
+ __FILE__, __LINE__, u_errorName(status)); |
1108 |
+ } |
1109 |
+} |
1110 |
|
1111 |
#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ |
1112 |
Index: /icu/trunk/source/test/intltest/regextst.h |
1113 |
=================================================================== |
1114 |
--- /icu/trunk/source/test/intltest/regextst.h (revision 36800) |
1115 |
+++ /icu/trunk/source/test/intltest/regextst.h (revision 36801) |
1116 |
@@ -51,4 +51,5 @@ |
1117 |
virtual void TestCaseInsensitiveStarters(); |
1118 |
virtual void TestBug11049(); |
1119 |
+ virtual void TestBug11371(); |
1120 |
|
1121 |
// The following functions are internal to the regexp tests. |