Gentoo Archives: gentoo-commits

From: "Andreas HAttel (dilfridge)" <dilfridge@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] gentoo-x86 commit in dev-libs/icu/files: icu-54.1-CVE-2014-9654.patch
Date: Sat, 07 Feb 2015 17:47:12
Message-Id: 20150207174706.70AAD1143D@oystercatcher.gentoo.org
1 dilfridge 15/02/07 17:47:06
2
3 Added: icu-54.1-CVE-2014-9654.patch
4 Log:
5 Backport patch for CVE-2014-9654, bug 539108. Unfortunately this does bad things to a header file, so forcing a rebuild by mangling subslots...
6
7 (Portage version: 2.2.15/cvs/Linux x86_64, signed Manifest commit with key 84AD142F)
8
9 Revision Changes Path
10 1.1 dev-libs/icu/files/icu-54.1-CVE-2014-9654.patch
11
12 file : http://sources.gentoo.org/viewvc.cgi/gentoo-x86/dev-libs/icu/files/icu-54.1-CVE-2014-9654.patch?rev=1.1&view=markup
13 plain: http://sources.gentoo.org/viewvc.cgi/gentoo-x86/dev-libs/icu/files/icu-54.1-CVE-2014-9654.patch?rev=1.1&content-type=text/plain
14
15 Index: icu-54.1-CVE-2014-9654.patch
16 ===================================================================
17 Index: /icu/trunk/source/common/unicode/utypes.h
18 ===================================================================
19 --- /icu/trunk/source/common/unicode/utypes.h (revision 36800)
20 +++ /icu/trunk/source/common/unicode/utypes.h (revision 36801)
21 @@ -648,4 +648,5 @@
22 U_REGEX_TIME_OUT, /**< Maximum allowed match time exceeded */
23 U_REGEX_STOPPED_BY_CALLER, /**< Matching operation aborted by user callback fn. */
24 + U_REGEX_PATTERN_TOO_BIG, /**< Pattern exceeds limits on size or complexity. @draft ICU 55 */
25 U_REGEX_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for regexp errors */
26
27 Index: /icu/trunk/source/common/utypes.c
28 ===================================================================
29 --- /icu/trunk/source/common/utypes.c (revision 36800)
30 +++ /icu/trunk/source/common/utypes.c (revision 36801)
31 @@ -2,5 +2,5 @@
32 ******************************************************************************
33 *
34 -* Copyright (C) 1997-2011, International Business Machines
35 +* Copyright (C) 1997-2014, International Business Machines
36 * Corporation and others. All Rights Reserved.
37 *
38 @@ -166,5 +166,6 @@
39 "U_REGEX_STACK_OVERFLOW",
40 "U_REGEX_TIME_OUT",
41 - "U_REGEX_STOPPED_BY_CALLER"
42 + "U_REGEX_STOPPED_BY_CALLER",
43 + "U_REGEX_PATTERN_TOO_BIG"
44 };
45
46 Index: /icu/trunk/source/i18n/regexcmp.cpp
47 ===================================================================
48 --- /icu/trunk/source/i18n/regexcmp.cpp (revision 36800)
49 +++ /icu/trunk/source/i18n/regexcmp.cpp (revision 36801)
50 @@ -305,5 +305,5 @@
51 // the position in the compiled pattern.
52 //
53 - fRXPat->fFrameSize+=RESTACKFRAME_HDRCOUNT;
54 + allocateStackData(RESTACKFRAME_HDRCOUNT);
55
56 //
57 @@ -371,7 +371,7 @@
58 //4 NOP Resreved, will be replaced by a save if there are
59 // OR | operators at the top level
60 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_STATE_SAVE, 2), *fStatus);
61 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_JMP, 3), *fStatus);
62 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_FAIL, 0), *fStatus);
63 + appendOp(URX_STATE_SAVE, 2);
64 + appendOp(URX_JMP, 3);
65 + appendOp(URX_FAIL, 0);
66
67 // Standard open nonCapture paren action emits the two NOPs and
68 @@ -396,5 +396,5 @@
69
70 // add the END operation to the compiled pattern.
71 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_END, 0), *fStatus);
72 + appendOp(URX_END, 0);
73
74 // Terminate the pattern compilation state machine.
75 @@ -418,5 +418,5 @@
76 int32_t op = (int32_t)fRXPat->fCompiledPat->elementAti(savePosition);
77 U_ASSERT(URX_TYPE(op) == URX_NOP); // original contents of reserved location
78 - op = URX_BUILD(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+1);
79 + op = buildOp(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+1);
80 fRXPat->fCompiledPat->setElementAt(op, savePosition);
81
82 @@ -424,6 +424,5 @@
83 // the JMP will eventually be the location following the ')' for the
84 // group. This will be patched in later, when the ')' is encountered.
85 - op = URX_BUILD(URX_JMP, 0);
86 - fRXPat->fCompiledPat->addElement(op, *fStatus);
87 + appendOp(URX_JMP, 0);
88
89 // Push the position of the newly added JMP op onto the parentheses stack.
90 @@ -434,5 +433,5 @@
91 // for a SAVE in the event that there is yet another '|' following
92 // this one.
93 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
94 + appendOp(URX_NOP, 0);
95 fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus);
96 }
97 @@ -460,10 +459,8 @@
98 {
99 fixLiterals();
100 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
101 - int32_t varsLoc = fRXPat->fFrameSize; // Reserve three slots in match stack frame.
102 - fRXPat->fFrameSize += 3;
103 - int32_t cop = URX_BUILD(URX_START_CAPTURE, varsLoc);
104 - fRXPat->fCompiledPat->addElement(cop, *fStatus);
105 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
106 + appendOp(URX_NOP, 0);
107 + int32_t varsLoc = allocateStackData(3); // Reserve three slots in match stack frame.
108 + appendOp(URX_START_CAPTURE, varsLoc);
109 + appendOp(URX_NOP, 0);
110
111 // On the Parentheses stack, start a new frame and add the postions
112 @@ -490,6 +487,6 @@
113 {
114 fixLiterals();
115 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
116 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
117 + appendOp(URX_NOP, 0);
118 + appendOp(URX_NOP, 0);
119
120 // On the Parentheses stack, start a new frame and add the postions
121 @@ -513,10 +510,8 @@
122 {
123 fixLiterals();
124 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
125 - int32_t varLoc = fRXPat->fDataSize; // Reserve a data location for saving the
126 - fRXPat->fDataSize += 1; // state stack ptr.
127 - int32_t stoOp = URX_BUILD(URX_STO_SP, varLoc);
128 - fRXPat->fCompiledPat->addElement(stoOp, *fStatus);
129 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
130 + appendOp(URX_NOP, 0);
131 + int32_t varLoc = allocateData(1); // Reserve a data location for saving the state stack ptr.
132 + appendOp(URX_STO_SP, varLoc);
133 + appendOp(URX_NOP, 0);
134
135 // On the Parentheses stack, start a new frame and add the postions
136 @@ -561,24 +556,12 @@
137 {
138 fixLiterals();
139 - int32_t dataLoc = fRXPat->fDataSize;
140 - fRXPat->fDataSize += 2;
141 - int32_t op = URX_BUILD(URX_LA_START, dataLoc);
142 - fRXPat->fCompiledPat->addElement(op, *fStatus);
143 -
144 - op = URX_BUILD(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+ 2);
145 - fRXPat->fCompiledPat->addElement(op, *fStatus);
146 -
147 - op = URX_BUILD(URX_JMP, fRXPat->fCompiledPat->size()+ 3);
148 - fRXPat->fCompiledPat->addElement(op, *fStatus);
149 -
150 - op = URX_BUILD(URX_LA_END, dataLoc);
151 - fRXPat->fCompiledPat->addElement(op, *fStatus);
152 -
153 - op = URX_BUILD(URX_BACKTRACK, 0);
154 - fRXPat->fCompiledPat->addElement(op, *fStatus);
155 -
156 - op = URX_BUILD(URX_NOP, 0);
157 - fRXPat->fCompiledPat->addElement(op, *fStatus);
158 - fRXPat->fCompiledPat->addElement(op, *fStatus);
159 + int32_t dataLoc = allocateData(2);
160 + appendOp(URX_LA_START, dataLoc);
161 + appendOp(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+ 2);
162 + appendOp(URX_JMP, fRXPat->fCompiledPat->size()+ 3);
163 + appendOp(URX_LA_END, dataLoc);
164 + appendOp(URX_BACKTRACK, 0);
165 + appendOp(URX_NOP, 0);
166 + appendOp(URX_NOP, 0);
167
168 // On the Parentheses stack, start a new frame and add the postions
169 @@ -605,14 +588,8 @@
170 {
171 fixLiterals();
172 - int32_t dataLoc = fRXPat->fDataSize;
173 - fRXPat->fDataSize += 2;
174 - int32_t op = URX_BUILD(URX_LA_START, dataLoc);
175 - fRXPat->fCompiledPat->addElement(op, *fStatus);
176 -
177 - op = URX_BUILD(URX_STATE_SAVE, 0); // dest address will be patched later.
178 - fRXPat->fCompiledPat->addElement(op, *fStatus);
179 -
180 - op = URX_BUILD(URX_NOP, 0);
181 - fRXPat->fCompiledPat->addElement(op, *fStatus);
182 + int32_t dataLoc = allocateData(2);
183 + appendOp(URX_LA_START, dataLoc);
184 + appendOp(URX_STATE_SAVE, 0); // dest address will be patched later.
185 + appendOp(URX_NOP, 0);
186
187 // On the Parentheses stack, start a new frame and add the postions
188 @@ -652,21 +629,17 @@
189
190 // Allocate data space
191 - int32_t dataLoc = fRXPat->fDataSize;
192 - fRXPat->fDataSize += 4;
193 + int32_t dataLoc = allocateData(4);
194
195 // Emit URX_LB_START
196 - int32_t op = URX_BUILD(URX_LB_START, dataLoc);
197 - fRXPat->fCompiledPat->addElement(op, *fStatus);
198 + appendOp(URX_LB_START, dataLoc);
199
200 // Emit URX_LB_CONT
201 - op = URX_BUILD(URX_LB_CONT, dataLoc);
202 - fRXPat->fCompiledPat->addElement(op, *fStatus);
203 - fRXPat->fCompiledPat->addElement(0, *fStatus); // MinMatchLength. To be filled later.
204 - fRXPat->fCompiledPat->addElement(0, *fStatus); // MaxMatchLength. To be filled later.
205 -
206 - // Emit the NOP
207 - op = URX_BUILD(URX_NOP, 0);
208 - fRXPat->fCompiledPat->addElement(op, *fStatus);
209 - fRXPat->fCompiledPat->addElement(op, *fStatus);
210 + appendOp(URX_LB_CONT, dataLoc);
211 + appendOp(URX_RESERVED_OP, 0); // MinMatchLength. To be filled later.
212 + appendOp(URX_RESERVED_OP, 0); // MaxMatchLength. To be filled later.
213 +
214 + // Emit the NOPs
215 + appendOp(URX_NOP, 0);
216 + appendOp(URX_NOP, 0);
217
218 // On the Parentheses stack, start a new frame and add the postions
219 @@ -708,22 +681,18 @@
220
221 // Allocate data space
222 - int32_t dataLoc = fRXPat->fDataSize;
223 - fRXPat->fDataSize += 4;
224 + int32_t dataLoc = allocateData(4);
225
226 // Emit URX_LB_START
227 - int32_t op = URX_BUILD(URX_LB_START, dataLoc);
228 - fRXPat->fCompiledPat->addElement(op, *fStatus);
229 + appendOp(URX_LB_START, dataLoc);
230
231 // Emit URX_LBN_CONT
232 - op = URX_BUILD(URX_LBN_CONT, dataLoc);
233 - fRXPat->fCompiledPat->addElement(op, *fStatus);
234 - fRXPat->fCompiledPat->addElement(0, *fStatus); // MinMatchLength. To be filled later.
235 - fRXPat->fCompiledPat->addElement(0, *fStatus); // MaxMatchLength. To be filled later.
236 - fRXPat->fCompiledPat->addElement(0, *fStatus); // Continue Loc. To be filled later.
237 -
238 - // Emit the NOP
239 - op = URX_BUILD(URX_NOP, 0);
240 - fRXPat->fCompiledPat->addElement(op, *fStatus);
241 - fRXPat->fCompiledPat->addElement(op, *fStatus);
242 + appendOp(URX_LBN_CONT, dataLoc);
243 + appendOp(URX_RESERVED_OP, 0); // MinMatchLength. To be filled later.
244 + appendOp(URX_RESERVED_OP, 0); // MaxMatchLength. To be filled later.
245 + appendOp(URX_RESERVED_OP, 0); // Continue Loc. To be filled later.
246 +
247 + // Emit the NOPs
248 + appendOp(URX_NOP, 0);
249 + appendOp(URX_NOP, 0);
250
251 // On the Parentheses stack, start a new frame and add the postions
252 @@ -795,10 +764,7 @@
253 if (URX_TYPE(repeatedOp) == URX_SETREF) {
254 // Emit optimized code for [char set]+
255 - int32_t loopOpI = URX_BUILD(URX_LOOP_SR_I, URX_VAL(repeatedOp));
256 - fRXPat->fCompiledPat->addElement(loopOpI, *fStatus);
257 - frameLoc = fRXPat->fFrameSize;
258 - fRXPat->fFrameSize++;
259 - int32_t loopOpC = URX_BUILD(URX_LOOP_C, frameLoc);
260 - fRXPat->fCompiledPat->addElement(loopOpC, *fStatus);
261 + appendOp(URX_LOOP_SR_I, URX_VAL(repeatedOp));
262 + frameLoc = allocateStackData(1);
263 + appendOp(URX_LOOP_C, frameLoc);
264 break;
265 }
266 @@ -808,5 +774,5 @@
267 URX_TYPE(repeatedOp) == URX_DOTANY_UNIX) {
268 // Emit Optimized code for .+ operations.
269 - int32_t loopOpI = URX_BUILD(URX_LOOP_DOT_I, 0);
270 + int32_t loopOpI = buildOp(URX_LOOP_DOT_I, 0);
271 if (URX_TYPE(repeatedOp) == URX_DOTANY_ALL) {
272 // URX_LOOP_DOT_I operand is a flag indicating ". matches any" mode.
273 @@ -816,9 +782,7 @@
274 loopOpI |= 2;
275 }
276 - fRXPat->fCompiledPat->addElement(loopOpI, *fStatus);
277 - frameLoc = fRXPat->fFrameSize;
278 - fRXPat->fFrameSize++;
279 - int32_t loopOpC = URX_BUILD(URX_LOOP_C, frameLoc);
280 - fRXPat->fCompiledPat->addElement(loopOpC, *fStatus);
281 + appendOp(loopOpI);
282 + frameLoc = allocateStackData(1);
283 + appendOp(URX_LOOP_C, frameLoc);
284 break;
285 }
286 @@ -834,16 +798,13 @@
287 // Emit the code sequence that can handle it.
288 insertOp(topLoc);
289 - frameLoc = fRXPat->fFrameSize;
290 - fRXPat->fFrameSize++;
291 -
292 - int32_t op = URX_BUILD(URX_STO_INP_LOC, frameLoc);
293 + frameLoc = allocateStackData(1);
294 +
295 + int32_t op = buildOp(URX_STO_INP_LOC, frameLoc);
296 fRXPat->fCompiledPat->setElementAt(op, topLoc);
297
298 - op = URX_BUILD(URX_JMP_SAV_X, topLoc+1);
299 - fRXPat->fCompiledPat->addElement(op, *fStatus);
300 + appendOp(URX_JMP_SAV_X, topLoc+1);
301 } else {
302 // Simpler code when the repeated body must match something non-empty
303 - int32_t jmpOp = URX_BUILD(URX_JMP_SAV, topLoc);
304 - fRXPat->fCompiledPat->addElement(jmpOp, *fStatus);
305 + appendOp(URX_JMP_SAV, topLoc);
306 }
307 }
308 @@ -857,6 +818,5 @@
309 {
310 int32_t topLoc = blockTopLoc(FALSE);
311 - int32_t saveStateOp = URX_BUILD(URX_STATE_SAVE, topLoc);
312 - fRXPat->fCompiledPat->addElement(saveStateOp, *fStatus);
313 + appendOp(URX_STATE_SAVE, topLoc);
314 }
315 break;
316 @@ -872,5 +832,5 @@
317 {
318 int32_t saveStateLoc = blockTopLoc(TRUE);
319 - int32_t saveStateOp = URX_BUILD(URX_STATE_SAVE, fRXPat->fCompiledPat->size());
320 + int32_t saveStateOp = buildOp(URX_STATE_SAVE, fRXPat->fCompiledPat->size());
321 fRXPat->fCompiledPat->setElementAt(saveStateOp, saveStateLoc);
322 }
323 @@ -891,12 +851,10 @@
324 int32_t jmp2_loc = fRXPat->fCompiledPat->size();
325
326 - int32_t jmp1_op = URX_BUILD(URX_JMP, jmp2_loc+1);
327 + int32_t jmp1_op = buildOp(URX_JMP, jmp2_loc+1);
328 fRXPat->fCompiledPat->setElementAt(jmp1_op, jmp1_loc);
329
330 - int32_t jmp2_op = URX_BUILD(URX_JMP, jmp2_loc+2);
331 - fRXPat->fCompiledPat->addElement(jmp2_op, *fStatus);
332 -
333 - int32_t save_op = URX_BUILD(URX_STATE_SAVE, jmp1_loc+1);
334 - fRXPat->fCompiledPat->addElement(save_op, *fStatus);
335 + appendOp(URX_JMP, jmp2_loc+2);
336 +
337 + appendOp(URX_STATE_SAVE, jmp1_loc+1);
338 }
339 break;
340 @@ -938,10 +896,8 @@
341 if (URX_TYPE(repeatedOp) == URX_SETREF) {
342 // Emit optimized code for a [char set]*
343 - int32_t loopOpI = URX_BUILD(URX_LOOP_SR_I, URX_VAL(repeatedOp));
344 + int32_t loopOpI = buildOp(URX_LOOP_SR_I, URX_VAL(repeatedOp));
345 fRXPat->fCompiledPat->setElementAt(loopOpI, topLoc);
346 - dataLoc = fRXPat->fFrameSize;
347 - fRXPat->fFrameSize++;
348 - int32_t loopOpC = URX_BUILD(URX_LOOP_C, dataLoc);
349 - fRXPat->fCompiledPat->addElement(loopOpC, *fStatus);
350 + dataLoc = allocateStackData(1);
351 + appendOp(URX_LOOP_C, dataLoc);
352 break;
353 }
354 @@ -951,5 +907,5 @@
355 URX_TYPE(repeatedOp) == URX_DOTANY_UNIX) {
356 // Emit Optimized code for .* operations.
357 - int32_t loopOpI = URX_BUILD(URX_LOOP_DOT_I, 0);
358 + int32_t loopOpI = buildOp(URX_LOOP_DOT_I, 0);
359 if (URX_TYPE(repeatedOp) == URX_DOTANY_ALL) {
360 // URX_LOOP_DOT_I operand is a flag indicating . matches any mode.
361 @@ -960,8 +916,6 @@
362 }
363 fRXPat->fCompiledPat->setElementAt(loopOpI, topLoc);
364 - dataLoc = fRXPat->fFrameSize;
365 - fRXPat->fFrameSize++;
366 - int32_t loopOpC = URX_BUILD(URX_LOOP_C, dataLoc);
367 - fRXPat->fCompiledPat->addElement(loopOpC, *fStatus);
368 + dataLoc = allocateStackData(1);
369 + appendOp(URX_LOOP_C, dataLoc);
370 break;
371 }
372 @@ -972,5 +926,5 @@
373
374 int32_t saveStateLoc = blockTopLoc(TRUE);
375 - int32_t jmpOp = URX_BUILD(URX_JMP_SAV, saveStateLoc+1);
376 + int32_t jmpOp = buildOp(URX_JMP_SAV, saveStateLoc+1);
377
378 // Check for minimum match length of zero, which requires
379 @@ -978,10 +932,9 @@
380 if (minMatchLength(saveStateLoc, fRXPat->fCompiledPat->size()-1) == 0) {
381 insertOp(saveStateLoc);
382 - dataLoc = fRXPat->fFrameSize;
383 - fRXPat->fFrameSize++;
384 -
385 - int32_t op = URX_BUILD(URX_STO_INP_LOC, dataLoc);
386 + dataLoc = allocateStackData(1);
387 +
388 + int32_t op = buildOp(URX_STO_INP_LOC, dataLoc);
389 fRXPat->fCompiledPat->setElementAt(op, saveStateLoc+1);
390 - jmpOp = URX_BUILD(URX_JMP_SAV_X, saveStateLoc+2);
391 + jmpOp = buildOp(URX_JMP_SAV_X, saveStateLoc+2);
392 }
393
394 @@ -990,10 +943,10 @@
395 int32_t continueLoc = fRXPat->fCompiledPat->size()+1;
396
397 - // Put together the save state op store it into the compiled code.
398 - int32_t saveStateOp = URX_BUILD(URX_STATE_SAVE, continueLoc);
399 + // Put together the save state op and store it into the compiled code.
400 + int32_t saveStateOp = buildOp(URX_STATE_SAVE, continueLoc);
401 fRXPat->fCompiledPat->setElementAt(saveStateOp, saveStateLoc);
402
403 // Append the URX_JMP_SAV or URX_JMPX operation to the compiled pattern.
404 - fRXPat->fCompiledPat->addElement(jmpOp, *fStatus);
405 + appendOp(jmpOp);
406 }
407 break;
408 @@ -1009,8 +962,7 @@
409 int32_t jmpLoc = blockTopLoc(TRUE); // loc 1.
410 int32_t saveLoc = fRXPat->fCompiledPat->size(); // loc 3.
411 - int32_t jmpOp = URX_BUILD(URX_JMP, saveLoc);
412 - int32_t stateSaveOp = URX_BUILD(URX_STATE_SAVE, jmpLoc+1);
413 + int32_t jmpOp = buildOp(URX_JMP, saveLoc);
414 fRXPat->fCompiledPat->setElementAt(jmpOp, jmpLoc);
415 - fRXPat->fCompiledPat->addElement(stateSaveOp, *fStatus);
416 + appendOp(URX_STATE_SAVE, jmpLoc+1);
417 }
418 break;
419 @@ -1085,7 +1037,7 @@
420 // First the STO_SP before the start of the loop
421 insertOp(topLoc);
422 - int32_t varLoc = fRXPat->fDataSize; // Reserve a data location for saving the
423 - fRXPat->fDataSize += 1; // state stack ptr.
424 - int32_t op = URX_BUILD(URX_STO_SP, varLoc);
425 +
426 + int32_t varLoc = allocateData(1); // Reserve a data location for saving the
427 + int32_t op = buildOp(URX_STO_SP, varLoc);
428 fRXPat->fCompiledPat->setElementAt(op, topLoc);
429
430 @@ -1096,6 +1048,5 @@
431
432 // Then the LD_SP after the end of the loop
433 - op = URX_BUILD(URX_LD_SP, varLoc);
434 - fRXPat->fCompiledPat->addElement(op, *fStatus);
435 + appendOp(URX_LD_SP, varLoc);
436 }
437
438 @@ -1133,13 +1084,11 @@
439 {
440 fixLiterals(FALSE);
441 - int32_t op;
442 if (fModeFlags & UREGEX_DOTALL) {
443 - op = URX_BUILD(URX_DOTANY_ALL, 0);
444 + appendOp(URX_DOTANY_ALL, 0);
445 } else if (fModeFlags & UREGEX_UNIX_LINES) {
446 - op = URX_BUILD(URX_DOTANY_UNIX, 0);
447 + appendOp(URX_DOTANY_UNIX, 0);
448 } else {
449 - op = URX_BUILD(URX_DOTANY, 0);
450 - }
451 - fRXPat->fCompiledPat->addElement(op, *fStatus);
452 + appendOp(URX_DOTANY, 0);
453 + }
454 }
455 break;
456 @@ -1148,15 +1097,13 @@
457 {
458 fixLiterals(FALSE);
459 - int32_t op = 0;
460 if ( (fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & UREGEX_UNIX_LINES) == 0) {
461 - op = URX_CARET;
462 + appendOp(URX_CARET, 0);
463 } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UREGEX_UNIX_LINES) == 0) {
464 - op = URX_CARET_M;
465 + appendOp(URX_CARET_M, 0);
466 } else if ((fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & UREGEX_UNIX_LINES) != 0) {
467 - op = URX_CARET; // Only testing true start of input.
468 + appendOp(URX_CARET, 0); // Only testing true start of input.
469 } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UREGEX_UNIX_LINES) != 0) {
470 - op = URX_CARET_M_UNIX;
471 - }
472 - fRXPat->fCompiledPat->addElement(URX_BUILD(op, 0), *fStatus);
473 + appendOp(URX_CARET_M_UNIX, 0);
474 + }
475 }
476 break;
477 @@ -1165,15 +1112,13 @@
478 {
479 fixLiterals(FALSE);
480 - int32_t op = 0;
481 if ( (fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & UREGEX_UNIX_LINES) == 0) {
482 - op = URX_DOLLAR;
483 + appendOp(URX_DOLLAR, 0);
484 } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UREGEX_UNIX_LINES) == 0) {
485 - op = URX_DOLLAR_M;
486 + appendOp(URX_DOLLAR_M, 0);
487 } else if ((fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & UREGEX_UNIX_LINES) != 0) {
488 - op = URX_DOLLAR_D;
489 + appendOp(URX_DOLLAR_D, 0);
490 } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UREGEX_UNIX_LINES) != 0) {
491 - op = URX_DOLLAR_MD;
492 - }
493 - fRXPat->fCompiledPat->addElement(URX_BUILD(op, 0), *fStatus);
494 + appendOp(URX_DOLLAR_MD, 0);
495 + }
496 }
497 break;
498 @@ -1181,5 +1126,5 @@
499 case doBackslashA:
500 fixLiterals(FALSE);
501 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_CARET, 0), *fStatus);
502 + appendOp(URX_CARET, 0);
503 break;
504
505 @@ -1193,5 +1138,5 @@
506 fixLiterals(FALSE);
507 int32_t op = (fModeFlags & UREGEX_UWORD)? URX_BACKSLASH_BU : URX_BACKSLASH_B;
508 - fRXPat->fCompiledPat->addElement(URX_BUILD(op, 1), *fStatus);
509 + appendOp(op, 1);
510 }
511 break;
512 @@ -1206,5 +1151,5 @@
513 fixLiterals(FALSE);
514 int32_t op = (fModeFlags & UREGEX_UWORD)? URX_BACKSLASH_BU : URX_BACKSLASH_B;
515 - fRXPat->fCompiledPat->addElement(URX_BUILD(op, 0), *fStatus);
516 + appendOp(op, 0);
517 }
518 break;
519 @@ -1212,44 +1157,40 @@
520 case doBackslashD:
521 fixLiterals(FALSE);
522 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_D, 1), *fStatus);
523 + appendOp(URX_BACKSLASH_D, 1);
524 break;
525
526 case doBackslashd:
527 fixLiterals(FALSE);
528 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_D, 0), *fStatus);
529 + appendOp(URX_BACKSLASH_D, 0);
530 break;
531
532 case doBackslashG:
533 fixLiterals(FALSE);
534 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_G, 0), *fStatus);
535 + appendOp(URX_BACKSLASH_G, 0);
536 break;
537
538 case doBackslashS:
539 fixLiterals(FALSE);
540 - fRXPat->fCompiledPat->addElement(
541 - URX_BUILD(URX_STAT_SETREF_N, URX_ISSPACE_SET), *fStatus);
542 + appendOp(URX_STAT_SETREF_N, URX_ISSPACE_SET);
543 break;
544
545 case doBackslashs:
546 fixLiterals(FALSE);
547 - fRXPat->fCompiledPat->addElement(
548 - URX_BUILD(URX_STATIC_SETREF, URX_ISSPACE_SET), *fStatus);
549 + appendOp(URX_STATIC_SETREF, URX_ISSPACE_SET);
550 break;
551
552 case doBackslashW:
553 fixLiterals(FALSE);
554 - fRXPat->fCompiledPat->addElement(
555 - URX_BUILD(URX_STAT_SETREF_N, URX_ISWORD_SET), *fStatus);
556 + appendOp(URX_STAT_SETREF_N, URX_ISWORD_SET);
557 break;
558
559 case doBackslashw:
560 fixLiterals(FALSE);
561 - fRXPat->fCompiledPat->addElement(
562 - URX_BUILD(URX_STATIC_SETREF, URX_ISWORD_SET), *fStatus);
563 + appendOp(URX_STATIC_SETREF, URX_ISWORD_SET);
564 break;
565
566 case doBackslashX:
567 fixLiterals(FALSE);
568 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_X, 0), *fStatus);
569 + appendOp(URX_BACKSLASH_X, 0);
570 break;
571
572 @@ -1257,10 +1198,10 @@
573 case doBackslashZ:
574 fixLiterals(FALSE);
575 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_DOLLAR, 0), *fStatus);
576 + appendOp(URX_DOLLAR, 0);
577 break;
578
579 case doBackslashz:
580 fixLiterals(FALSE);
581 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_Z, 0), *fStatus);
582 + appendOp(URX_BACKSLASH_Z, 0);
583 break;
584
585 @@ -1322,11 +1263,9 @@
586 // and shouldn't enter this code path at all.
587 fixLiterals(FALSE);
588 - int32_t op;
589 if (fModeFlags & UREGEX_CASE_INSENSITIVE) {
590 - op = URX_BUILD(URX_BACKREF_I, groupNum);
591 + appendOp(URX_BACKREF_I, groupNum);
592 } else {
593 - op = URX_BUILD(URX_BACKREF, groupNum);
594 - }
595 - fRXPat->fCompiledPat->addElement(op, *fStatus);
596 + appendOp(URX_BACKREF, groupNum);
597 + }
598 }
599 break;
600 @@ -1349,20 +1288,16 @@
601 // Emit the STO_SP
602 int32_t topLoc = blockTopLoc(TRUE);
603 - int32_t stoLoc = fRXPat->fDataSize;
604 - fRXPat->fDataSize++; // Reserve the data location for storing save stack ptr.
605 - int32_t op = URX_BUILD(URX_STO_SP, stoLoc);
606 + int32_t stoLoc = allocateData(1); // Reserve the data location for storing save stack ptr.
607 + int32_t op = buildOp(URX_STO_SP, stoLoc);
608 fRXPat->fCompiledPat->setElementAt(op, topLoc);
609
610 // Emit the STATE_SAVE
611 - op = URX_BUILD(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+2);
612 - fRXPat->fCompiledPat->addElement(op, *fStatus);
613 + appendOp(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+2);
614
615 // Emit the JMP
616 - op = URX_BUILD(URX_JMP, topLoc+1);
617 - fRXPat->fCompiledPat->addElement(op, *fStatus);
618 + appendOp(URX_JMP, topLoc+1);
619
620 // Emit the LD_SP
621 - op = URX_BUILD(URX_LD_SP, stoLoc);
622 - fRXPat->fCompiledPat->addElement(op, *fStatus);
623 + appendOp(URX_LD_SP, stoLoc);
624 }
625 break;
626 @@ -1384,21 +1319,18 @@
627
628 // emit STO_SP loc
629 - int32_t stoLoc = fRXPat->fDataSize;
630 - fRXPat->fDataSize++; // Reserve the data location for storing save stack ptr.
631 - int32_t op = URX_BUILD(URX_STO_SP, stoLoc);
632 + int32_t stoLoc = allocateData(1); // Reserve the data location for storing save stack ptr.
633 + int32_t op = buildOp(URX_STO_SP, stoLoc);
634 fRXPat->fCompiledPat->setElementAt(op, topLoc);
635
636 // Emit the SAVE_STATE 5
637 int32_t L7 = fRXPat->fCompiledPat->size()+1;
638 - op = URX_BUILD(URX_STATE_SAVE, L7);
639 + op = buildOp(URX_STATE_SAVE, L7);
640 fRXPat->fCompiledPat->setElementAt(op, topLoc+1);
641
642 // Append the JMP operation.
643 - op = URX_BUILD(URX_JMP, topLoc+1);
644 - fRXPat->fCompiledPat->addElement(op, *fStatus);
645 + appendOp(URX_JMP, topLoc+1);
646
647 // Emit the LD_SP loc
648 - op = URX_BUILD(URX_LD_SP, stoLoc);
649 - fRXPat->fCompiledPat->addElement(op, *fStatus);
650 + appendOp(URX_LD_SP, stoLoc);
651 }
652 break;
653 @@ -1419,17 +1351,15 @@
654
655 // Emit the STO_SP
656 - int32_t stoLoc = fRXPat->fDataSize;
657 - fRXPat->fDataSize++; // Reserve the data location for storing save stack ptr.
658 - int32_t op = URX_BUILD(URX_STO_SP, stoLoc);
659 + int32_t stoLoc = allocateData(1); // Reserve the data location for storing save stack ptr.
660 + int32_t op = buildOp(URX_STO_SP, stoLoc);
661 fRXPat->fCompiledPat->setElementAt(op, topLoc);
662
663 // Emit the SAVE_STATE
664 int32_t continueLoc = fRXPat->fCompiledPat->size()+1;
665 - op = URX_BUILD(URX_STATE_SAVE, continueLoc);
666 + op = buildOp(URX_STATE_SAVE, continueLoc);
667 fRXPat->fCompiledPat->setElementAt(op, topLoc+1);
668
669 // Emit the LD_SP
670 - op = URX_BUILD(URX_LD_SP, stoLoc);
671 - fRXPat->fCompiledPat->addElement(op, *fStatus);
672 + appendOp(URX_LD_SP, stoLoc);
673 }
674 break;
675 @@ -1488,6 +1418,6 @@
676 {
677 fixLiterals(FALSE);
678 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
679 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
680 + appendOp(URX_NOP, 0);
681 + appendOp(URX_NOP, 0);
682
683 // On the Parentheses stack, start a new frame and add the postions
684 @@ -1826,5 +1756,4 @@
685 //------------------------------------------------------------------------------
686 void RegexCompile::fixLiterals(UBool split) {
687 - int32_t op = 0; // An op from/for the compiled pattern.
688
689 // If no literal characters have been scanned but not yet had code generated
690 @@ -1865,21 +1794,21 @@
691 if ((fModeFlags & UREGEX_CASE_INSENSITIVE) &&
692 u_hasBinaryProperty(lastCodePoint, UCHAR_CASE_SENSITIVE)) {
693 - op = URX_BUILD(URX_ONECHAR_I, lastCodePoint);
694 + appendOp(URX_ONECHAR_I, lastCodePoint);
695 } else {
696 - op = URX_BUILD(URX_ONECHAR, lastCodePoint);
697 - }
698 - fRXPat->fCompiledPat->addElement(op, *fStatus);
699 + appendOp(URX_ONECHAR, lastCodePoint);
700 + }
701 } else {
702 // Two or more chars, emit a URX_STRING to match them.
703 + if (fLiteralChars.length() > 0x00ffffff || fRXPat->fLiteralText.length() > 0x00ffffff) {
704 + error(U_REGEX_PATTERN_TOO_BIG);
705 + }
706 if (fModeFlags & UREGEX_CASE_INSENSITIVE) {
707 - op = URX_BUILD(URX_STRING_I, fRXPat->fLiteralText.length());
708 + appendOp(URX_STRING_I, fRXPat->fLiteralText.length());
709 } else {
710 // TODO here: add optimization to split case sensitive strings of length two
711 // into two single char ops, for efficiency.
712 - op = URX_BUILD(URX_STRING, fRXPat->fLiteralText.length());
713 - }
714 - fRXPat->fCompiledPat->addElement(op, *fStatus);
715 - op = URX_BUILD(URX_STRING_LEN, fLiteralChars.length());
716 - fRXPat->fCompiledPat->addElement(op, *fStatus);
717 + appendOp(URX_STRING, fRXPat->fLiteralText.length());
718 + }
719 + appendOp(URX_STRING_LEN, fLiteralChars.length());
720
721 // Add this string into the accumulated strings of the compiled pattern.
722 @@ -1891,6 +1820,56 @@
723
724
725 -
726 -
727 +int32_t RegexCompile::buildOp(int32_t type, int32_t val) {
728 + if (U_FAILURE(*fStatus)) {
729 + return 0;
730 + }
731 + if (type < 0 || type > 255) {
732 + U_ASSERT(FALSE);
733 + error(U_REGEX_INTERNAL_ERROR);
734 + type = URX_RESERVED_OP;
735 + }
736 + if (val > 0x00ffffff) {
737 + U_ASSERT(FALSE);
738 + error(U_REGEX_INTERNAL_ERROR);
739 + val = 0;
740 + }
741 + if (val < 0) {
742 + if (!(type == URX_RESERVED_OP_N || type == URX_RESERVED_OP)) {
743 + U_ASSERT(FALSE);
744 + error(U_REGEX_INTERNAL_ERROR);
745 + return -1;
746 + }
747 + if (URX_TYPE(val) != 0xff) {
748 + U_ASSERT(FALSE);
749 + error(U_REGEX_INTERNAL_ERROR);
750 + return -1;
751 + }
752 + type = URX_RESERVED_OP_N;
753 + }
754 + return (type << 24) | val;
755 +}
756 +
757 +
758 +//------------------------------------------------------------------------------
759 +//
760 +// appendOp() Append a new instruction onto the compiled pattern
761 +// Includes error checking, limiting the size of the
762 +// pattern to lengths that can be represented in the
763 +// 24 bit operand field of an instruction.
764 +//
765 +//------------------------------------------------------------------------------
766 +void RegexCompile::appendOp(int32_t op) {
767 + if (U_FAILURE(*fStatus)) {
768 + return;
769 + }
770 + fRXPat->fCompiledPat->addElement(op, *fStatus);
771 + if ((fRXPat->fCompiledPat->size() > 0x00fffff0) && U_SUCCESS(*fStatus)) {
772 + error(U_REGEX_PATTERN_TOO_BIG);
773 + }
774 +}
775 +
776 +void RegexCompile::appendOp(int32_t type, int32_t val) {
777 + appendOp(buildOp(type, val));
778 +}
779
780
781 @@ -1908,5 +1887,5 @@
782 U_ASSERT(where>0 && where < code->size());
783
784 - int32_t nop = URX_BUILD(URX_NOP, 0);
785 + int32_t nop = buildOp(URX_NOP, 0);
786 code->insertElementAt(nop, where, *fStatus);
787
788 @@ -1929,5 +1908,5 @@
789 // needs to be incremented to adjust for the insertion.
790 opValue++;
791 - op = URX_BUILD(opType, opValue);
792 + op = buildOp(opType, opValue);
793 code->setElementAt(op, loc);
794 }
795 @@ -1953,4 +1932,56 @@
796 }
797
798 +
799 +//------------------------------------------------------------------------------
800 +//
801 +// allocateData() Allocate storage in the matcher's static data area.
802 +// Return the index for the newly allocated data.
803 +// The storage won't actually exist until we are running a match
804 +// operation, but the storage indexes are inserted into various
805 +// opcodes while compiling the pattern.
806 +//
807 +//------------------------------------------------------------------------------
808 +int32_t RegexCompile::allocateData(int32_t size) {
809 + if (U_FAILURE(*fStatus)) {
810 + return 0;
811 + }
812 + if (size <= 0 || size > 0x100 || fRXPat->fDataSize < 0) {
813 + error(U_REGEX_INTERNAL_ERROR);
814 + return 0;
815 + }
816 + int32_t dataIndex = fRXPat->fDataSize;
817 + fRXPat->fDataSize += size;
818 + if (fRXPat->fDataSize >= 0x00fffff0) {
819 + error(U_REGEX_INTERNAL_ERROR);
820 + }
821 + return dataIndex;
822 +}
823 +
824 +
825 +//------------------------------------------------------------------------------
826 +//
827 +// allocateStackData() Allocate space in the back-tracking stack frame.
828 +// Return the index for the newly allocated data.
829 +// The frame indexes are inserted into various
830 +// opcodes while compiling the pattern, meaning that frame
831 +// size must be restricted to the size that will fit
832 +// as an operand (24 bits).
833 +//
834 +//------------------------------------------------------------------------------
835 +int32_t RegexCompile::allocateStackData(int32_t size) {
836 + if (U_FAILURE(*fStatus)) {
837 + return 0;
838 + }
839 + if (size <= 0 || size > 0x100 || fRXPat->fFrameSize < 0) {
840 + error(U_REGEX_INTERNAL_ERROR);
841 + return 0;
842 + }
843 + int32_t dataIndex = fRXPat->fFrameSize;
844 + fRXPat->fFrameSize += size;
845 + if (fRXPat->fFrameSize >= 0x00fffff0) {
846 + error(U_REGEX_PATTERN_TOO_BIG);
847 + }
848 + return dataIndex;
849 +}
850
851
852 @@ -1996,5 +2027,5 @@
853 }
854 if (reserveLoc) {
855 - int32_t nop = URX_BUILD(URX_NOP, 0);
856 + int32_t nop = buildOp(URX_NOP, 0);
857 fRXPat->fCompiledPat->insertElementAt(nop, theLoc, *fStatus);
858 }
859 @@ -2071,6 +2102,5 @@
860
861 int32_t frameVarLocation = URX_VAL(captureOp);
862 - int32_t endCaptureOp = URX_BUILD(URX_END_CAPTURE, frameVarLocation);
863 - fRXPat->fCompiledPat->addElement(endCaptureOp, *fStatus);
864 + appendOp(URX_END_CAPTURE, frameVarLocation);
865 }
866 break;
867 @@ -2083,6 +2113,5 @@
868 U_ASSERT(URX_TYPE(stoOp) == URX_STO_SP);
869 int32_t stoLoc = URX_VAL(stoOp);
870 - int32_t ldOp = URX_BUILD(URX_LD_SP, stoLoc);
871 - fRXPat->fCompiledPat->addElement(ldOp, *fStatus);
872 + appendOp(URX_LD_SP, stoLoc);
873 }
874 break;
875 @@ -2093,6 +2122,5 @@
876 U_ASSERT(URX_TYPE(startOp) == URX_LA_START);
877 int32_t dataLoc = URX_VAL(startOp);
878 - int32_t op = URX_BUILD(URX_LA_END, dataLoc);
879 - fRXPat->fCompiledPat->addElement(op, *fStatus);
880 + appendOp(URX_LA_END, dataLoc);
881 }
882 break;
883 @@ -2104,10 +2132,7 @@
884 U_ASSERT(URX_TYPE(startOp) == URX_LA_START);
885 int32_t dataLoc = URX_VAL(startOp);
886 - int32_t op = URX_BUILD(URX_LA_END, dataLoc);
887 - fRXPat->fCompiledPat->addElement(op, *fStatus);
888 - op = URX_BUILD(URX_BACKTRACK, 0);
889 - fRXPat->fCompiledPat->addElement(op, *fStatus);
890 - op = URX_BUILD(URX_LA_END, dataLoc);
891 - fRXPat->fCompiledPat->addElement(op, *fStatus);
892 + appendOp(URX_LA_END, dataLoc);
893 + appendOp(URX_BACKTRACK, 0);
894 + appendOp(URX_LA_END, dataLoc);
895
896 // Patch the URX_SAVE near the top of the block.
897 @@ -2116,5 +2141,5 @@
898 U_ASSERT(URX_TYPE(saveOp) == URX_STATE_SAVE);
899 int32_t dest = fRXPat->fCompiledPat->size()-1;
900 - saveOp = URX_BUILD(URX_STATE_SAVE, dest);
901 + saveOp = buildOp(URX_STATE_SAVE, dest);
902 fRXPat->fCompiledPat->setElementAt(saveOp, fMatchOpenParen);
903 }
904 @@ -2129,8 +2154,6 @@
905 U_ASSERT(URX_TYPE(startOp) == URX_LB_START);
906 int32_t dataLoc = URX_VAL(startOp);
907 - int32_t op = URX_BUILD(URX_LB_END, dataLoc);
908 - fRXPat->fCompiledPat->addElement(op, *fStatus);
909 - op = URX_BUILD(URX_LA_END, dataLoc);
910 - fRXPat->fCompiledPat->addElement(op, *fStatus);
911 + appendOp(URX_LB_END, dataLoc);
912 + appendOp(URX_LA_END, dataLoc);
913
914 // Determine the min and max bounds for the length of the
915 @@ -2168,6 +2191,5 @@
916 U_ASSERT(URX_TYPE(startOp) == URX_LB_START);
917 int32_t dataLoc = URX_VAL(startOp);
918 - int32_t op = URX_BUILD(URX_LBN_END, dataLoc);
919 - fRXPat->fCompiledPat->addElement(op, *fStatus);
920 + appendOp(URX_LBN_END, dataLoc);
921
922 // Determine the min and max bounds for the length of the
923 @@ -2194,5 +2216,5 @@
924 // Insert the pattern location to continue at after a successful match
925 // as the last operand of the URX_LBN_CONT
926 - op = URX_BUILD(URX_RELOC_OPRND, fRXPat->fCompiledPat->size());
927 + int32_t op = buildOp(URX_RELOC_OPRND, fRXPat->fCompiledPat->size());
928 fRXPat->fCompiledPat->setElementAt(op, fMatchOpenParen-1);
929 }
930 @@ -2235,5 +2257,5 @@
931 {
932 // Set of no elements. Always fails to match.
933 - fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKTRACK, 0), *fStatus);
934 + appendOp(URX_BACKTRACK, 0);
935 delete theSet;
936 }
937 @@ -2256,6 +2278,5 @@
938 int32_t setNumber = fRXPat->fSets->size();
939 fRXPat->fSets->addElement(theSet, *fStatus);
940 - int32_t setOp = URX_BUILD(URX_SETREF, setNumber);
941 - fRXPat->fCompiledPat->addElement(setOp, *fStatus);
942 + appendOp(URX_SETREF, setNumber);
943 }
944 }
945 @@ -2296,11 +2317,8 @@
946 // +1 --> Input index (for breaking non-progressing loops)
947 // (Only present if unbounded upper limit on loop)
948 - int32_t counterLoc = fRXPat->fFrameSize;
949 - fRXPat->fFrameSize++;
950 - if (fIntervalUpper < 0) {
951 - fRXPat->fFrameSize++;
952 - }
953 -
954 - int32_t op = URX_BUILD(InitOp, counterLoc);
955 + int32_t dataSize = fIntervalUpper < 0 ? 2 : 1;
956 + int32_t counterLoc = allocateStackData(dataSize);
957 +
958 + int32_t op = buildOp(InitOp, counterLoc);
959 fRXPat->fCompiledPat->setElementAt(op, topOfBlock);
960
961 @@ -2310,5 +2328,5 @@
962 // position to move.
963 int32_t loopEnd = fRXPat->fCompiledPat->size();
964 - op = URX_BUILD(URX_RELOC_OPRND, loopEnd);
965 + op = buildOp(URX_RELOC_OPRND, loopEnd);
966 fRXPat->fCompiledPat->setElementAt(op, topOfBlock+1);
967
968 @@ -2319,6 +2337,5 @@
969 // Apend the CTR_LOOP op. The operand is the location of the CTR_INIT op.
970 // Goes at end of the block being looped over, so just append to the code so far.
971 - op = URX_BUILD(LoopOp, topOfBlock);
972 - fRXPat->fCompiledPat->addElement(op, *fStatus);
973 + appendOp(LoopOp, topOfBlock);
974
975 if ((fIntervalLow & 0xff000000) != 0 ||
976 @@ -2373,5 +2390,5 @@
977 int32_t endOfSequenceLoc = fRXPat->fCompiledPat->size()-1
978 + fIntervalUpper + (fIntervalUpper-fIntervalLow);
979 - int32_t saveOp = URX_BUILD(URX_STATE_SAVE, endOfSequenceLoc);
980 + int32_t saveOp = buildOp(URX_STATE_SAVE, endOfSequenceLoc);
981 if (fIntervalLow == 0) {
982 insertOp(topOfBlock);
983 @@ -2386,11 +2403,8 @@
984 int32_t i;
985 for (i=1; i<fIntervalUpper; i++ ) {
986 - if (i == fIntervalLow) {
987 - fRXPat->fCompiledPat->addElement(saveOp, *fStatus);
988 - }
989 - if (i > fIntervalLow) {
990 - fRXPat->fCompiledPat->addElement(saveOp, *fStatus);
991 - }
992 - fRXPat->fCompiledPat->addElement(op, *fStatus);
993 + if (i >= fIntervalLow) {
994 + appendOp(saveOp);
995 + }
996 + appendOp(op);
997 }
998 return TRUE;
999 @@ -3612,5 +3626,5 @@
1000 U_ASSERT(operandAddress>=0 && operandAddress<deltas.size());
1001 int32_t fixedOperandAddress = operandAddress - deltas.elementAti(operandAddress);
1002 - op = URX_BUILD(opType, fixedOperandAddress);
1003 + op = buildOp(opType, fixedOperandAddress);
1004 fRXPat->fCompiledPat->setElementAt(op, dst);
1005 dst++;
1006 @@ -3627,5 +3641,5 @@
1007 }
1008 where = fRXPat->fGroupMap->elementAti(where-1);
1009 - op = URX_BUILD(opType, where);
1010 + op = buildOp(opType, where);
1011 fRXPat->fCompiledPat->setElementAt(op, dst);
1012 dst++;
1013 @@ -3979,5 +3993,5 @@
1014 //
1015 // scanNamedChar
1016 - // Get a UChar32 from a \N{UNICODE CHARACTER NAME} in the pattern.
1017 +// Get a UChar32 from a \N{UNICODE CHARACTER NAME} in the pattern.
1018 //
1019 // The scan position will be at the 'N'. On return
1020 Index: /icu/trunk/source/i18n/regexcmp.h
1021 ===================================================================
1022 --- /icu/trunk/source/i18n/regexcmp.h (revision 36800)
1023 +++ /icu/trunk/source/i18n/regexcmp.h (revision 36801)
1024 @@ -105,4 +105,11 @@
1025 void insertOp(int32_t where); // Open up a slot for a new op in the
1026 // generated code at the specified location.
1027 + void appendOp(int32_t op); // Append a new op to the compiled pattern.
1028 + void appendOp(int32_t type, int32_t val); // Build & append a new op to the compiled pattern.
1029 + int32_t buildOp(int32_t type, int32_t val); // Construct a new pcode instruction.
1030 + int32_t allocateData(int32_t size); // Allocate space in the matcher data area.
1031 + // Return index of the newly allocated data.
1032 + int32_t allocateStackData(int32_t size); // Allocate space in the match back-track stack frame.
1033 + // Return offset index in the frame.
1034 int32_t minMatchLength(int32_t start,
1035 int32_t end);
1036 Index: /icu/trunk/source/i18n/regeximp.h
1037 ===================================================================
1038 --- /icu/trunk/source/i18n/regeximp.h (revision 36800)
1039 +++ /icu/trunk/source/i18n/regeximp.h (revision 36801)
1040 @@ -1,4 +1,4 @@
1041 //
1042 -// Copyright (C) 2002-2013 International Business Machines Corporation
1043 +// Copyright (C) 2002-2014 International Business Machines Corporation
1044 // and others. All rights reserved.
1045 //
1046 @@ -242,5 +242,4 @@
1047 // Convenience macros for assembling and disassembling a compiled operation.
1048 //
1049 -#define URX_BUILD(type, val) (int32_t)((type << 24) | (val))
1050 #define URX_TYPE(x) ((uint32_t)(x) >> 24)
1051 #define URX_VAL(x) ((x) & 0xffffff)
1052 Index: /icu/trunk/source/test/intltest/regextst.cpp
1053 ===================================================================
1054 --- /icu/trunk/source/test/intltest/regextst.cpp (revision 36800)
1055 +++ /icu/trunk/source/test/intltest/regextst.cpp (revision 36801)
1056 @@ -145,4 +145,7 @@
1057 if (exec) TestBug11049();
1058 break;
1059 + case 25: name = "TestBug11371";
1060 + if (exec) TestBug11371();
1061 + break;
1062 default: name = "";
1063 break; //needed to end loop
1064 @@ -5368,4 +5371,47 @@
1065
1066
1067 +void RegexTest::TestBug11371() {
1068 + if (quick) {
1069 + logln("Skipping test. Runs in exhuastive mode only.");
1070 + return;
1071 + }
1072 + UErrorCode status = U_ZERO_ERROR;
1073 + UnicodeString patternString;
1074 +
1075 + for (int i=0; i<8000000; i++) {
1076 + patternString.append(UnicodeString("()"));
1077 + }
1078 + LocalPointer<RegexPattern> compiledPat(RegexPattern::compile(patternString, 0, status));
1079 + if (status != U_REGEX_PATTERN_TOO_BIG) {
1080 + errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.",
1081 + __FILE__, __LINE__, u_errorName(status));
1082 + }
1083 +
1084 + status = U_ZERO_ERROR;
1085 + patternString = "(";
1086 + for (int i=0; i<20000000; i++) {
1087 + patternString.append(UnicodeString("A++"));
1088 + }
1089 + patternString.append(UnicodeString("){0}B++"));
1090 + LocalPointer<RegexPattern> compiledPat2(RegexPattern::compile(patternString, 0, status));
1091 + if (status != U_REGEX_PATTERN_TOO_BIG) {
1092 + errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.",
1093 + __FILE__, __LINE__, u_errorName(status));
1094 + }
1095 +
1096 + // Pattern with too much string data, such that string indexes overflow operand data field size
1097 + // in compiled instruction.
1098 + status = U_ZERO_ERROR;
1099 + patternString = "";
1100 + while (patternString.length() < 0x00ffffff) {
1101 + patternString.append(UnicodeString("stuff and things dont you know, these are a few of my favorite strings\n"));
1102 + }
1103 + patternString.append(UnicodeString("X? trailing string"));
1104 + LocalPointer<RegexPattern> compiledPat3(RegexPattern::compile(patternString, 0, status));
1105 + if (status != U_REGEX_PATTERN_TOO_BIG) {
1106 + errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.",
1107 + __FILE__, __LINE__, u_errorName(status));
1108 + }
1109 +}
1110
1111 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
1112 Index: /icu/trunk/source/test/intltest/regextst.h
1113 ===================================================================
1114 --- /icu/trunk/source/test/intltest/regextst.h (revision 36800)
1115 +++ /icu/trunk/source/test/intltest/regextst.h (revision 36801)
1116 @@ -51,4 +51,5 @@
1117 virtual void TestCaseInsensitiveStarters();
1118 virtual void TestBug11049();
1119 + virtual void TestBug11371();
1120
1121 // The following functions are internal to the regexp tests.