1 |
tove 08/05/09 11:04:55 |
2 |
|
3 |
Added: libperl-5.8.8-utf8-boundary.patch |
4 |
libperl-5.8.8-CVE-2008-1927.patch |
5 |
Log: |
6 |
#219203 - Version bump. Added patch from bug #198196. Apply lib64.patch for ppc64 too like dev-lang/perl-5.8.8 does |
7 |
(Portage version: 2.1.5_rc7) |
8 |
|
9 |
Revision Changes Path |
10 |
1.1 sys-devel/libperl/files/libperl-5.8.8-utf8-boundary.patch |
11 |
|
12 |
file : http://sources.gentoo.org/viewcvs.py/gentoo-x86/sys-devel/libperl/files/libperl-5.8.8-utf8-boundary.patch?rev=1.1&view=markup |
13 |
plain: http://sources.gentoo.org/viewcvs.py/gentoo-x86/sys-devel/libperl/files/libperl-5.8.8-utf8-boundary.patch?rev=1.1&content-type=text/plain |
14 |
|
15 |
Index: libperl-5.8.8-utf8-boundary.patch |
16 |
=================================================================== |
17 |
--- regcomp.c 2006-01-08 12:59:27.000000000 -0800 |
18 |
+++ regcomp.c 2007-10-05 12:07:55.000000000 -0700 |
19 |
@@ -135,7 +135,8 @@ |
20 |
I32 extralen; |
21 |
I32 seen_zerolen; |
22 |
I32 seen_evals; |
23 |
- I32 utf8; |
24 |
+ I32 utf8; /* pattern is utf8 or not */ |
25 |
+ I32 orig_utf8; /* pattern was originally utf8 */ |
26 |
#if ADD_TO_REGEXEC |
27 |
char *starttry; /* -Dr: where regtry was called. */ |
28 |
#define RExC_starttry (pRExC_state->starttry) |
29 |
@@ -161,6 +162,7 @@ |
30 |
#define RExC_seen_zerolen (pRExC_state->seen_zerolen) |
31 |
#define RExC_seen_evals (pRExC_state->seen_evals) |
32 |
#define RExC_utf8 (pRExC_state->utf8) |
33 |
+#define RExC_orig_utf8 (pRExC_state->orig_utf8) |
34 |
|
35 |
#define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?') |
36 |
#define ISMULT2(s) ((*s) == '*' || (*s) == '+' || (*s) == '?' || \ |
37 |
@@ -1749,15 +1751,17 @@ |
38 |
if (exp == NULL) |
39 |
FAIL("NULL regexp argument"); |
40 |
|
41 |
- RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8; |
42 |
+ RExC_orig_utf8 = RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8; |
43 |
|
44 |
- RExC_precomp = exp; |
45 |
DEBUG_r({ |
46 |
if (!PL_colorset) reginitcolors(); |
47 |
PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n", |
48 |
PL_colors[4],PL_colors[5],PL_colors[0], |
49 |
- (int)(xend - exp), RExC_precomp, PL_colors[1]); |
50 |
+ (int)(xend - exp), exp, PL_colors[1]); |
51 |
}); |
52 |
+ |
53 |
+redo_first_pass: |
54 |
+ RExC_precomp = exp; |
55 |
RExC_flags = pm->op_pmflags; |
56 |
RExC_sawback = 0; |
57 |
|
58 |
@@ -1783,6 +1787,17 @@ |
59 |
RExC_precomp = Nullch; |
60 |
return(NULL); |
61 |
} |
62 |
+ if (RExC_utf8 && !RExC_orig_utf8) { |
63 |
+ STRLEN len = xend-exp; |
64 |
+ DEBUG_r(PerlIO_printf(Perl_debug_log, |
65 |
+ "UTF8 mismatch! Converting to utf8 for resizing and compile\n")); |
66 |
+ exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)exp, &len); |
67 |
+ xend = exp + len; |
68 |
+ RExC_orig_utf8 = RExC_utf8; |
69 |
+ SAVEFREEPV(exp); |
70 |
+ goto redo_first_pass; |
71 |
+ } |
72 |
+ |
73 |
|
74 |
|
75 |
|
76 |
|
77 |
1.1 sys-devel/libperl/files/libperl-5.8.8-CVE-2008-1927.patch |
78 |
|
79 |
file : http://sources.gentoo.org/viewcvs.py/gentoo-x86/sys-devel/libperl/files/libperl-5.8.8-CVE-2008-1927.patch?rev=1.1&view=markup |
80 |
plain: http://sources.gentoo.org/viewcvs.py/gentoo-x86/sys-devel/libperl/files/libperl-5.8.8-CVE-2008-1927.patch?rev=1.1&content-type=text/plain |
81 |
|
82 |
Index: libperl-5.8.8-CVE-2008-1927.patch |
83 |
=================================================================== |
84 |
Fix a double free / segfault with utf8 regexps |
85 |
Debian #454792 |
86 |
[rt.cpan.org #48156] |
87 |
[rt.cpan.org #40641] |
88 |
upstream change 29204 |
89 |
|
90 |
UTF8_ALLOW_DEFAULT definition in utf8.h picked from upstream change 27688 |
91 |
|
92 |
diff --git a/embed.fnc b/embed.fnc |
93 |
index edfbc0e..26524c7 100644 |
94 |
--- a/embed.fnc |
95 |
+++ b/embed.fnc |
96 |
@@ -1168,6 +1168,7 @@ Es |void |reguni |NN const struct RExC_state_t *state|UV uv|NN char *s|NN STRLE |
97 |
Es |regnode*|regclass |NN struct RExC_state_t *state |
98 |
ERs |I32 |regcurly |NN const char * |
99 |
Es |regnode*|reg_node |NN struct RExC_state_t *state|U8 op |
100 |
+Es |UV |reg_recode |const char value|NULLOK SV **encp |
101 |
Es |regnode*|regpiece |NN struct RExC_state_t *state|NN I32 *flagp |
102 |
Es |void |reginsert |NN struct RExC_state_t *state|U8 op|NN regnode *opnd |
103 |
Es |void |regoptail |NN struct RExC_state_t *state|NN regnode *p|NN regnode *val |
104 |
diff --git a/embed.h b/embed.h |
105 |
index 2b38fd5..372b04f 100644 |
106 |
--- a/embed.h |
107 |
+++ b/embed.h |
108 |
@@ -1234,6 +1234,7 @@ |
109 |
#define regclass S_regclass |
110 |
#define regcurly S_regcurly |
111 |
#define reg_node S_reg_node |
112 |
+#define reg_recode S_reg_recode |
113 |
#define regpiece S_regpiece |
114 |
#define reginsert S_reginsert |
115 |
#define regoptail S_regoptail |
116 |
@@ -3277,6 +3278,7 @@ |
117 |
#define regclass(a) S_regclass(aTHX_ a) |
118 |
#define regcurly(a) S_regcurly(aTHX_ a) |
119 |
#define reg_node(a,b) S_reg_node(aTHX_ a,b) |
120 |
+#define reg_recode(a,b) S_reg_recode(aTHX_ a,b) |
121 |
#define regpiece(a,b) S_regpiece(aTHX_ a,b) |
122 |
#define reginsert(a,b,c) S_reginsert(aTHX_ a,b,c) |
123 |
#define regoptail(a,b,c) S_regoptail(aTHX_ a,b,c) |
124 |
diff --git a/pod/perldiag.pod b/pod/perldiag.pod |
125 |
index 9b3134c..7d95216 100644 |
126 |
--- a/pod/perldiag.pod |
127 |
+++ b/pod/perldiag.pod |
128 |
@@ -1900,6 +1900,15 @@ recognized by Perl or by a user-supplied handler. See L<attributes>. |
129 |
(W printf) Perl does not understand the given format conversion. See |
130 |
L<perlfunc/sprintf>. |
131 |
|
132 |
+=item Invalid escape in the specified encoding in regex; marked by <-- HERE in m/%s/ |
133 |
+ |
134 |
+(W regexp) The numeric escape (for example C<\xHH>) of value < 256 |
135 |
+didn't correspond to a single character through the conversion |
136 |
+from the encoding specified by the encoding pragma. |
137 |
+The escape was replaced with REPLACEMENT CHARACTER (U+FFFD) instead. |
138 |
+The <-- HERE shows in the regular expression about where the |
139 |
+escape was discovered. |
140 |
+ |
141 |
=item Invalid [] range "%s" in regex; marked by <-- HERE in m/%s/ |
142 |
|
143 |
(F) The range specified in a character class had a minimum character |
144 |
diff --git a/proto.h b/proto.h |
145 |
index 6d185dd..ef6c0cf 100644 |
146 |
--- a/proto.h |
147 |
+++ b/proto.h |
148 |
@@ -1748,6 +1748,7 @@ STATIC I32 S_regcurly(pTHX_ const char *) |
149 |
__attribute__warn_unused_result__; |
150 |
|
151 |
STATIC regnode* S_reg_node(pTHX_ struct RExC_state_t *state, U8 op); |
152 |
+STATIC UV S_reg_recode(pTHX_ const char value, SV **encp); |
153 |
STATIC regnode* S_regpiece(pTHX_ struct RExC_state_t *state, I32 *flagp); |
154 |
STATIC void S_reginsert(pTHX_ struct RExC_state_t *state, U8 op, regnode *opnd); |
155 |
STATIC void S_regoptail(pTHX_ struct RExC_state_t *state, regnode *p, regnode *val); |
156 |
diff --git a/regcomp.c b/regcomp.c |
157 |
index 928cf39..98d48dd 100644 |
158 |
--- a/regcomp.c |
159 |
+++ b/regcomp.c |
160 |
@@ -2791,6 +2791,39 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp) |
161 |
} |
162 |
|
163 |
/* |
164 |
+ * reg_recode |
165 |
+ * |
166 |
+ * It returns the code point in utf8 for the value in *encp. |
167 |
+ * value: a code value in the source encoding |
168 |
+ * encp: a pointer to an Encode object |
169 |
+ * |
170 |
+ * If the result from Encode is not a single character, |
171 |
+ * it returns U+FFFD (Replacement character) and sets *encp to NULL. |
172 |
+ */ |
173 |
+STATIC UV |
174 |
+S_reg_recode(pTHX_ const char value, SV **encp) |
175 |
+{ |
176 |
+ STRLEN numlen = 1; |
177 |
+ SV * const sv = sv_2mortal(newSVpvn(&value, numlen)); |
178 |
+ const char * const s = encp && *encp ? sv_recode_to_utf8(sv, *encp) |
179 |
+ : SvPVX(sv); |
180 |
+ const STRLEN newlen = SvCUR(sv); |
181 |
+ UV uv = UNICODE_REPLACEMENT; |
182 |
+ |
183 |
+ if (newlen) |
184 |
+ uv = SvUTF8(sv) |
185 |
+ ? utf8n_to_uvchr((U8*)s, newlen, &numlen, UTF8_ALLOW_DEFAULT) |
186 |
+ : *(U8*)s; |
187 |
+ |
188 |
+ if (!newlen || numlen != newlen) { |
189 |
+ uv = UNICODE_REPLACEMENT; |
190 |
+ if (encp) |
191 |
+ *encp = NULL; |
192 |
+ } |
193 |
+ return uv; |
194 |
+} |
195 |
+ |
196 |
+/* |
197 |
- regatom - the lowest level |
198 |
* |
199 |
* Optimization: gobbles an entire sequence of ordinary characters so that |
200 |
@@ -3182,6 +3215,8 @@ tryagain: |
201 |
ender = grok_hex(p, &numlen, &flags, NULL); |
202 |
p += numlen; |
203 |
} |
204 |
+ if (PL_encoding && ender < 0x100) |
205 |
+ goto recode_encoding; |
206 |
break; |
207 |
case 'c': |
208 |
p++; |
209 |
@@ -3201,6 +3236,17 @@ tryagain: |
210 |
--p; |
211 |
goto loopdone; |
212 |
} |
213 |
+ if (PL_encoding && ender < 0x100) |
214 |
+ goto recode_encoding; |
215 |
+ break; |
216 |
+ recode_encoding: |
217 |
+ { |
218 |
+ SV* enc = PL_encoding; |
219 |
+ ender = reg_recode((const char)(U8)ender, &enc); |
220 |
+ if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP)) |
221 |
+ vWARN(p, "Invalid escape in the specified encoding"); |
222 |
+ RExC_utf8 = 1; |
223 |
+ } |
224 |
break; |
225 |
case '\0': |
226 |
if (p >= RExC_end) |
227 |
@@ -3331,32 +3377,6 @@ tryagain: |
228 |
break; |
229 |
} |
230 |
|
231 |
- /* If the encoding pragma is in effect recode the text of |
232 |
- * any EXACT-kind nodes. */ |
233 |
- if (PL_encoding && PL_regkind[(U8)OP(ret)] == EXACT) { |
234 |
- STRLEN oldlen = STR_LEN(ret); |
235 |
- SV *sv = sv_2mortal(newSVpvn(STRING(ret), oldlen)); |
236 |
- |
237 |
- if (RExC_utf8) |
238 |
- SvUTF8_on(sv); |
239 |
- if (sv_utf8_downgrade(sv, TRUE)) { |
240 |
- const char * const s = sv_recode_to_utf8(sv, PL_encoding); |
241 |
- const STRLEN newlen = SvCUR(sv); |
242 |
- |
243 |
- if (SvUTF8(sv)) |
244 |
- RExC_utf8 = 1; |
245 |
- if (!SIZE_ONLY) { |
246 |
- DEBUG_r(PerlIO_printf(Perl_debug_log, "recode %*s to %*s\n", |
247 |
- (int)oldlen, STRING(ret), |
248 |
- (int)newlen, s)); |
249 |
- Copy(s, STRING(ret), newlen, char); |
250 |
- STR_LEN(ret) += newlen - oldlen; |
251 |
- RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen); |
252 |
- } else |
253 |
- RExC_size += STR_SZ(newlen) - STR_SZ(oldlen); |
254 |
- } |
255 |
- } |
256 |
- |
257 |
return(ret); |
258 |
} |
259 |
|
260 |
@@ -3734,6 +3754,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) |
261 |
value = grok_hex(RExC_parse, &numlen, &flags, NULL); |
262 |
RExC_parse += numlen; |
263 |
} |
264 |
+ if (PL_encoding && value < 0x100) |
265 |
+ goto recode_encoding; |
266 |
break; |
267 |
case 'c': |
268 |
value = UCHARAT(RExC_parse++); |
269 |
@@ -3741,13 +3763,24 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) |
270 |
break; |
271 |
case '0': case '1': case '2': case '3': case '4': |
272 |
case '5': case '6': case '7': case '8': case '9': |
273 |
- { |
274 |
- I32 flags = 0; |
275 |
- numlen = 3; |
276 |
- value = grok_oct(--RExC_parse, &numlen, &flags, NULL); |
277 |
- RExC_parse += numlen; |
278 |
- break; |
279 |
- } |
280 |
+ { |
281 |
+ I32 flags = 0; |
282 |
+ numlen = 3; |
283 |
+ value = grok_oct(--RExC_parse, &numlen, &flags, NULL); |
284 |
+ RExC_parse += numlen; |
285 |
+ if (PL_encoding && value < 0x100) |
286 |
+ goto recode_encoding; |
287 |
+ break; |
288 |
+ } |
289 |
+ recode_encoding: |
290 |
+ { |
291 |
+ SV* enc = PL_encoding; |
292 |
+ value = reg_recode((const char)(U8)value, &enc); |
293 |
+ if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP)) |
294 |
+ vWARN(RExC_parse, |
295 |
+ "Invalid escape in the specified encoding"); |
296 |
+ break; |
297 |
+ } |
298 |
default: |
299 |
if (!SIZE_ONLY && isALPHA(value) && ckWARN(WARN_REGEXP)) |
300 |
vWARN2(RExC_parse, |
301 |
diff --git a/t/uni/tr_utf8.t b/t/uni/tr_utf8.t |
302 |
index 606a84a..354156a 100755 |
303 |
--- a/t/uni/tr_utf8.t |
304 |
+++ b/t/uni/tr_utf8.t |
305 |
@@ -31,7 +31,7 @@ BEGIN { |
306 |
} |
307 |
|
308 |
use strict; |
309 |
-use Test::More tests => 7; |
310 |
+use Test::More tests => 8; |
311 |
|
312 |
use encoding 'utf8'; |
313 |
|
314 |
@@ -67,4 +67,12 @@ is($str, $hiragana, "s/// # hiragana -> katakana"); |
315 |
$line =~ tr/bcdeghijklmnprstvwxyz$02578/בצדעגהיײקלמנפּרסטװשכיזשױתײחא/; |
316 |
is($line, "aבצדעfגהיײקלמנoפqּרסuטװשכיזש1ױ34ת6ײח9", "[perl #16843]"); |
317 |
} |
318 |
+ |
319 |
+{ |
320 |
+ # [perl #40641] |
321 |
+ my $str = qq/Gebääääääääääääääääääääude/; |
322 |
+ my $reg = qr/Gebääääääääääääääääääääude/; |
323 |
+ ok($str =~ /$reg/, "[perl #40641]"); |
324 |
+} |
325 |
+ |
326 |
__END__ |
327 |
diff --git a/utf8.h b/utf8.h |
328 |
index 6d63897..3800866 100644 |
329 |
--- a/utf8.h |
330 |
+++ b/utf8.h |
331 |
@@ -198,6 +198,8 @@ encoded character. |
332 |
UTF8_ALLOW_SURROGATE|UTF8_ALLOW_FFFF) |
333 |
#define UTF8_ALLOW_ANY 0x00FF |
334 |
#define UTF8_CHECK_ONLY 0x0200 |
335 |
+#define UTF8_ALLOW_DEFAULT (ckWARN(WARN_UTF8) ? 0 : \ |
336 |
+ UTF8_ALLOW_ANYUV) |
337 |
|
338 |
#define UNICODE_SURROGATE_FIRST 0xD800 |
339 |
#define UNICODE_SURROGATE_LAST 0xDFFF |
340 |
|
341 |
|
342 |
|
343 |
-- |
344 |
gentoo-commits@l.g.o mailing list |