Gentoo Archives: gentoo-commits

From: "Torsten Veller (tove)" <tove@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] gentoo-x86 commit in sys-devel/libperl/files: libperl-5.8.8-utf8-boundary.patch libperl-5.8.8-CVE-2008-1927.patch
Date: Fri, 09 May 2008 11:04:58
Message-Id: E1JuQPH-0000n7-GZ@stork.gentoo.org
1 tove 08/05/09 11:04:55
2
3 Added: libperl-5.8.8-utf8-boundary.patch
4 libperl-5.8.8-CVE-2008-1927.patch
5 Log:
6 #219203 - Version bump. Added patch from bug #198196. Apply lib64.patch for ppc64 too like dev-lang/perl-5.8.8 does
7 (Portage version: 2.1.5_rc7)
8
9 Revision Changes Path
10 1.1 sys-devel/libperl/files/libperl-5.8.8-utf8-boundary.patch
11
12 file : http://sources.gentoo.org/viewcvs.py/gentoo-x86/sys-devel/libperl/files/libperl-5.8.8-utf8-boundary.patch?rev=1.1&view=markup
13 plain: http://sources.gentoo.org/viewcvs.py/gentoo-x86/sys-devel/libperl/files/libperl-5.8.8-utf8-boundary.patch?rev=1.1&content-type=text/plain
14
15 Index: libperl-5.8.8-utf8-boundary.patch
16 ===================================================================
17 --- regcomp.c 2006-01-08 12:59:27.000000000 -0800
18 +++ regcomp.c 2007-10-05 12:07:55.000000000 -0700
19 @@ -135,7 +135,8 @@
20 I32 extralen;
21 I32 seen_zerolen;
22 I32 seen_evals;
23 - I32 utf8;
24 + I32 utf8; /* pattern is utf8 or not */
25 + I32 orig_utf8; /* pattern was originally utf8 */
26 #if ADD_TO_REGEXEC
27 char *starttry; /* -Dr: where regtry was called. */
28 #define RExC_starttry (pRExC_state->starttry)
29 @@ -161,6 +162,7 @@
30 #define RExC_seen_zerolen (pRExC_state->seen_zerolen)
31 #define RExC_seen_evals (pRExC_state->seen_evals)
32 #define RExC_utf8 (pRExC_state->utf8)
33 +#define RExC_orig_utf8 (pRExC_state->orig_utf8)
34
35 #define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?')
36 #define ISMULT2(s) ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
37 @@ -1749,15 +1751,17 @@
38 if (exp == NULL)
39 FAIL("NULL regexp argument");
40
41 - RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
42 + RExC_orig_utf8 = RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
43
44 - RExC_precomp = exp;
45 DEBUG_r({
46 if (!PL_colorset) reginitcolors();
47 PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
48 PL_colors[4],PL_colors[5],PL_colors[0],
49 - (int)(xend - exp), RExC_precomp, PL_colors[1]);
50 + (int)(xend - exp), exp, PL_colors[1]);
51 });
52 +
53 +redo_first_pass:
54 + RExC_precomp = exp;
55 RExC_flags = pm->op_pmflags;
56 RExC_sawback = 0;
57
58 @@ -1783,6 +1787,17 @@
59 RExC_precomp = Nullch;
60 return(NULL);
61 }
62 + if (RExC_utf8 && !RExC_orig_utf8) {
63 + STRLEN len = xend-exp;
64 + DEBUG_r(PerlIO_printf(Perl_debug_log,
65 + "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
66 + exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)exp, &len);
67 + xend = exp + len;
68 + RExC_orig_utf8 = RExC_utf8;
69 + SAVEFREEPV(exp);
70 + goto redo_first_pass;
71 + }
72 +
73
74
75
76
77 1.1 sys-devel/libperl/files/libperl-5.8.8-CVE-2008-1927.patch
78
79 file : http://sources.gentoo.org/viewcvs.py/gentoo-x86/sys-devel/libperl/files/libperl-5.8.8-CVE-2008-1927.patch?rev=1.1&view=markup
80 plain: http://sources.gentoo.org/viewcvs.py/gentoo-x86/sys-devel/libperl/files/libperl-5.8.8-CVE-2008-1927.patch?rev=1.1&content-type=text/plain
81
82 Index: libperl-5.8.8-CVE-2008-1927.patch
83 ===================================================================
84 Fix a double free / segfault with utf8 regexps
85 Debian #454792
86 [rt.cpan.org #48156]
87 [rt.cpan.org #40641]
88 upstream change 29204
89
90 UTF8_ALLOW_DEFAULT definition in utf8.h picked from upstream change 27688
91
92 diff --git a/embed.fnc b/embed.fnc
93 index edfbc0e..26524c7 100644
94 --- a/embed.fnc
95 +++ b/embed.fnc
96 @@ -1168,6 +1168,7 @@ Es |void |reguni |NN const struct RExC_state_t *state|UV uv|NN char *s|NN STRLE
97 Es |regnode*|regclass |NN struct RExC_state_t *state
98 ERs |I32 |regcurly |NN const char *
99 Es |regnode*|reg_node |NN struct RExC_state_t *state|U8 op
100 +Es |UV |reg_recode |const char value|NULLOK SV **encp
101 Es |regnode*|regpiece |NN struct RExC_state_t *state|NN I32 *flagp
102 Es |void |reginsert |NN struct RExC_state_t *state|U8 op|NN regnode *opnd
103 Es |void |regoptail |NN struct RExC_state_t *state|NN regnode *p|NN regnode *val
104 diff --git a/embed.h b/embed.h
105 index 2b38fd5..372b04f 100644
106 --- a/embed.h
107 +++ b/embed.h
108 @@ -1234,6 +1234,7 @@
109 #define regclass S_regclass
110 #define regcurly S_regcurly
111 #define reg_node S_reg_node
112 +#define reg_recode S_reg_recode
113 #define regpiece S_regpiece
114 #define reginsert S_reginsert
115 #define regoptail S_regoptail
116 @@ -3277,6 +3278,7 @@
117 #define regclass(a) S_regclass(aTHX_ a)
118 #define regcurly(a) S_regcurly(aTHX_ a)
119 #define reg_node(a,b) S_reg_node(aTHX_ a,b)
120 +#define reg_recode(a,b) S_reg_recode(aTHX_ a,b)
121 #define regpiece(a,b) S_regpiece(aTHX_ a,b)
122 #define reginsert(a,b,c) S_reginsert(aTHX_ a,b,c)
123 #define regoptail(a,b,c) S_regoptail(aTHX_ a,b,c)
124 diff --git a/pod/perldiag.pod b/pod/perldiag.pod
125 index 9b3134c..7d95216 100644
126 --- a/pod/perldiag.pod
127 +++ b/pod/perldiag.pod
128 @@ -1900,6 +1900,15 @@ recognized by Perl or by a user-supplied handler. See L<attributes>.
129 (W printf) Perl does not understand the given format conversion. See
130 L<perlfunc/sprintf>.
131
132 +=item Invalid escape in the specified encoding in regex; marked by <-- HERE in m/%s/
133 +
134 +(W regexp) The numeric escape (for example C<\xHH>) of value < 256
135 +didn't correspond to a single character through the conversion
136 +from the encoding specified by the encoding pragma.
137 +The escape was replaced with REPLACEMENT CHARACTER (U+FFFD) instead.
138 +The <-- HERE shows in the regular expression about where the
139 +escape was discovered.
140 +
141 =item Invalid [] range "%s" in regex; marked by <-- HERE in m/%s/
142
143 (F) The range specified in a character class had a minimum character
144 diff --git a/proto.h b/proto.h
145 index 6d185dd..ef6c0cf 100644
146 --- a/proto.h
147 +++ b/proto.h
148 @@ -1748,6 +1748,7 @@ STATIC I32 S_regcurly(pTHX_ const char *)
149 __attribute__warn_unused_result__;
150
151 STATIC regnode* S_reg_node(pTHX_ struct RExC_state_t *state, U8 op);
152 +STATIC UV S_reg_recode(pTHX_ const char value, SV **encp);
153 STATIC regnode* S_regpiece(pTHX_ struct RExC_state_t *state, I32 *flagp);
154 STATIC void S_reginsert(pTHX_ struct RExC_state_t *state, U8 op, regnode *opnd);
155 STATIC void S_regoptail(pTHX_ struct RExC_state_t *state, regnode *p, regnode *val);
156 diff --git a/regcomp.c b/regcomp.c
157 index 928cf39..98d48dd 100644
158 --- a/regcomp.c
159 +++ b/regcomp.c
160 @@ -2791,6 +2791,39 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp)
161 }
162
163 /*
164 + * reg_recode
165 + *
166 + * It returns the code point in utf8 for the value in *encp.
167 + * value: a code value in the source encoding
168 + * encp: a pointer to an Encode object
169 + *
170 + * If the result from Encode is not a single character,
171 + * it returns U+FFFD (Replacement character) and sets *encp to NULL.
172 + */
173 +STATIC UV
174 +S_reg_recode(pTHX_ const char value, SV **encp)
175 +{
176 + STRLEN numlen = 1;
177 + SV * const sv = sv_2mortal(newSVpvn(&value, numlen));
178 + const char * const s = encp && *encp ? sv_recode_to_utf8(sv, *encp)
179 + : SvPVX(sv);
180 + const STRLEN newlen = SvCUR(sv);
181 + UV uv = UNICODE_REPLACEMENT;
182 +
183 + if (newlen)
184 + uv = SvUTF8(sv)
185 + ? utf8n_to_uvchr((U8*)s, newlen, &numlen, UTF8_ALLOW_DEFAULT)
186 + : *(U8*)s;
187 +
188 + if (!newlen || numlen != newlen) {
189 + uv = UNICODE_REPLACEMENT;
190 + if (encp)
191 + *encp = NULL;
192 + }
193 + return uv;
194 +}
195 +
196 +/*
197 - regatom - the lowest level
198 *
199 * Optimization: gobbles an entire sequence of ordinary characters so that
200 @@ -3182,6 +3215,8 @@ tryagain:
201 ender = grok_hex(p, &numlen, &flags, NULL);
202 p += numlen;
203 }
204 + if (PL_encoding && ender < 0x100)
205 + goto recode_encoding;
206 break;
207 case 'c':
208 p++;
209 @@ -3201,6 +3236,17 @@ tryagain:
210 --p;
211 goto loopdone;
212 }
213 + if (PL_encoding && ender < 0x100)
214 + goto recode_encoding;
215 + break;
216 + recode_encoding:
217 + {
218 + SV* enc = PL_encoding;
219 + ender = reg_recode((const char)(U8)ender, &enc);
220 + if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP))
221 + vWARN(p, "Invalid escape in the specified encoding");
222 + RExC_utf8 = 1;
223 + }
224 break;
225 case '\0':
226 if (p >= RExC_end)
227 @@ -3331,32 +3377,6 @@ tryagain:
228 break;
229 }
230
231 - /* If the encoding pragma is in effect recode the text of
232 - * any EXACT-kind nodes. */
233 - if (PL_encoding && PL_regkind[(U8)OP(ret)] == EXACT) {
234 - STRLEN oldlen = STR_LEN(ret);
235 - SV *sv = sv_2mortal(newSVpvn(STRING(ret), oldlen));
236 -
237 - if (RExC_utf8)
238 - SvUTF8_on(sv);
239 - if (sv_utf8_downgrade(sv, TRUE)) {
240 - const char * const s = sv_recode_to_utf8(sv, PL_encoding);
241 - const STRLEN newlen = SvCUR(sv);
242 -
243 - if (SvUTF8(sv))
244 - RExC_utf8 = 1;
245 - if (!SIZE_ONLY) {
246 - DEBUG_r(PerlIO_printf(Perl_debug_log, "recode %*s to %*s\n",
247 - (int)oldlen, STRING(ret),
248 - (int)newlen, s));
249 - Copy(s, STRING(ret), newlen, char);
250 - STR_LEN(ret) += newlen - oldlen;
251 - RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen);
252 - } else
253 - RExC_size += STR_SZ(newlen) - STR_SZ(oldlen);
254 - }
255 - }
256 -
257 return(ret);
258 }
259
260 @@ -3734,6 +3754,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
261 value = grok_hex(RExC_parse, &numlen, &flags, NULL);
262 RExC_parse += numlen;
263 }
264 + if (PL_encoding && value < 0x100)
265 + goto recode_encoding;
266 break;
267 case 'c':
268 value = UCHARAT(RExC_parse++);
269 @@ -3741,13 +3763,24 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
270 break;
271 case '0': case '1': case '2': case '3': case '4':
272 case '5': case '6': case '7': case '8': case '9':
273 - {
274 - I32 flags = 0;
275 - numlen = 3;
276 - value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
277 - RExC_parse += numlen;
278 - break;
279 - }
280 + {
281 + I32 flags = 0;
282 + numlen = 3;
283 + value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
284 + RExC_parse += numlen;
285 + if (PL_encoding && value < 0x100)
286 + goto recode_encoding;
287 + break;
288 + }
289 + recode_encoding:
290 + {
291 + SV* enc = PL_encoding;
292 + value = reg_recode((const char)(U8)value, &enc);
293 + if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP))
294 + vWARN(RExC_parse,
295 + "Invalid escape in the specified encoding");
296 + break;
297 + }
298 default:
299 if (!SIZE_ONLY && isALPHA(value) && ckWARN(WARN_REGEXP))
300 vWARN2(RExC_parse,
301 diff --git a/t/uni/tr_utf8.t b/t/uni/tr_utf8.t
302 index 606a84a..354156a 100755
303 --- a/t/uni/tr_utf8.t
304 +++ b/t/uni/tr_utf8.t
305 @@ -31,7 +31,7 @@ BEGIN {
306 }
307
308 use strict;
309 -use Test::More tests => 7;
310 +use Test::More tests => 8;
311
312 use encoding 'utf8';
313
314 @@ -67,4 +67,12 @@ is($str, $hiragana, "s/// # hiragana -> katakana");
315 $line =~ tr/bcdeghijklmnprstvwxyz$02578/בצדעגהיײקלמנפּרסטװשכיזשױתײחא/;
316 is($line, "aבצדעfגהיײקלמנoפqּרסuטװשכיזש1ױ34ת6ײח9", "[perl #16843]");
317 }
318 +
319 +{
320 + # [perl #40641]
321 + my $str = qq/Gebääääääääääääääääääääude/;
322 + my $reg = qr/Gebääääääääääääääääääääude/;
323 + ok($str =~ /$reg/, "[perl #40641]");
324 +}
325 +
326 __END__
327 diff --git a/utf8.h b/utf8.h
328 index 6d63897..3800866 100644
329 --- a/utf8.h
330 +++ b/utf8.h
331 @@ -198,6 +198,8 @@ encoded character.
332 UTF8_ALLOW_SURROGATE|UTF8_ALLOW_FFFF)
333 #define UTF8_ALLOW_ANY 0x00FF
334 #define UTF8_CHECK_ONLY 0x0200
335 +#define UTF8_ALLOW_DEFAULT (ckWARN(WARN_UTF8) ? 0 : \
336 + UTF8_ALLOW_ANYUV)
337
338 #define UNICODE_SURROGATE_FIRST 0xD800
339 #define UNICODE_SURROGATE_LAST 0xDFFF
340
341
342
343 --
344 gentoo-commits@l.g.o mailing list