Gentoo Archives: gentoo-commits

From: "Torsten Veller (tove)" <tove@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] gentoo-x86 commit in dev-lang/perl/files: perl-5.8.8-CVE-2008-1927.patch
Date: Fri, 09 May 2008 11:12:03
Message-Id: E1JuQW8-0000r8-QJ@stork.gentoo.org
1 tove 08/05/09 11:12:00
2
3 Added: perl-5.8.8-CVE-2008-1927.patch
4 Log:
5 #219203 - Fix for CVE-2008-1927
6 (Portage version: 2.1.5_rc7)
7
8 Revision Changes Path
9 1.1 dev-lang/perl/files/perl-5.8.8-CVE-2008-1927.patch
10
11 file : http://sources.gentoo.org/viewcvs.py/gentoo-x86/dev-lang/perl/files/perl-5.8.8-CVE-2008-1927.patch?rev=1.1&view=markup
12 plain: http://sources.gentoo.org/viewcvs.py/gentoo-x86/dev-lang/perl/files/perl-5.8.8-CVE-2008-1927.patch?rev=1.1&content-type=text/plain
13
14 Index: perl-5.8.8-CVE-2008-1927.patch
15 ===================================================================
16 Fix a double free / segfault with utf8 regexps
17 Debian #454792
18 [rt.cpan.org #48156]
19 [rt.cpan.org #40641]
20 upstream change 29204
21
22 UTF8_ALLOW_DEFAULT definition in utf8.h picked from upstream change 27688
23
24 diff --git a/embed.fnc b/embed.fnc
25 index edfbc0e..26524c7 100644
26 --- a/embed.fnc
27 +++ b/embed.fnc
28 @@ -1168,6 +1168,7 @@ Es |void |reguni |NN const struct RExC_state_t *state|UV uv|NN char *s|NN STRLE
29 Es |regnode*|regclass |NN struct RExC_state_t *state
30 ERs |I32 |regcurly |NN const char *
31 Es |regnode*|reg_node |NN struct RExC_state_t *state|U8 op
32 +Es |UV |reg_recode |const char value|NULLOK SV **encp
33 Es |regnode*|regpiece |NN struct RExC_state_t *state|NN I32 *flagp
34 Es |void |reginsert |NN struct RExC_state_t *state|U8 op|NN regnode *opnd
35 Es |void |regoptail |NN struct RExC_state_t *state|NN regnode *p|NN regnode *val
36 diff --git a/embed.h b/embed.h
37 index 2b38fd5..372b04f 100644
38 --- a/embed.h
39 +++ b/embed.h
40 @@ -1234,6 +1234,7 @@
41 #define regclass S_regclass
42 #define regcurly S_regcurly
43 #define reg_node S_reg_node
44 +#define reg_recode S_reg_recode
45 #define regpiece S_regpiece
46 #define reginsert S_reginsert
47 #define regoptail S_regoptail
48 @@ -3277,6 +3278,7 @@
49 #define regclass(a) S_regclass(aTHX_ a)
50 #define regcurly(a) S_regcurly(aTHX_ a)
51 #define reg_node(a,b) S_reg_node(aTHX_ a,b)
52 +#define reg_recode(a,b) S_reg_recode(aTHX_ a,b)
53 #define regpiece(a,b) S_regpiece(aTHX_ a,b)
54 #define reginsert(a,b,c) S_reginsert(aTHX_ a,b,c)
55 #define regoptail(a,b,c) S_regoptail(aTHX_ a,b,c)
56 diff --git a/pod/perldiag.pod b/pod/perldiag.pod
57 index 9b3134c..7d95216 100644
58 --- a/pod/perldiag.pod
59 +++ b/pod/perldiag.pod
60 @@ -1900,6 +1900,15 @@ recognized by Perl or by a user-supplied handler. See L<attributes>.
61 (W printf) Perl does not understand the given format conversion. See
62 L<perlfunc/sprintf>.
63
64 +=item Invalid escape in the specified encoding in regex; marked by <-- HERE in m/%s/
65 +
66 +(W regexp) The numeric escape (for example C<\xHH>) of value < 256
67 +didn't correspond to a single character through the conversion
68 +from the encoding specified by the encoding pragma.
69 +The escape was replaced with REPLACEMENT CHARACTER (U+FFFD) instead.
70 +The <-- HERE shows in the regular expression about where the
71 +escape was discovered.
72 +
73 =item Invalid [] range "%s" in regex; marked by <-- HERE in m/%s/
74
75 (F) The range specified in a character class had a minimum character
76 diff --git a/proto.h b/proto.h
77 index 6d185dd..ef6c0cf 100644
78 --- a/proto.h
79 +++ b/proto.h
80 @@ -1748,6 +1748,7 @@ STATIC I32 S_regcurly(pTHX_ const char *)
81 __attribute__warn_unused_result__;
82
83 STATIC regnode* S_reg_node(pTHX_ struct RExC_state_t *state, U8 op);
84 +STATIC UV S_reg_recode(pTHX_ const char value, SV **encp);
85 STATIC regnode* S_regpiece(pTHX_ struct RExC_state_t *state, I32 *flagp);
86 STATIC void S_reginsert(pTHX_ struct RExC_state_t *state, U8 op, regnode *opnd);
87 STATIC void S_regoptail(pTHX_ struct RExC_state_t *state, regnode *p, regnode *val);
88 diff --git a/regcomp.c b/regcomp.c
89 index 928cf39..98d48dd 100644
90 --- a/regcomp.c
91 +++ b/regcomp.c
92 @@ -2791,6 +2791,39 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp)
93 }
94
95 /*
96 + * reg_recode
97 + *
98 + * It returns the code point in utf8 for the value in *encp.
99 + * value: a code value in the source encoding
100 + * encp: a pointer to an Encode object
101 + *
102 + * If the result from Encode is not a single character,
103 + * it returns U+FFFD (Replacement character) and sets *encp to NULL.
104 + */
105 +STATIC UV
106 +S_reg_recode(pTHX_ const char value, SV **encp)
107 +{
108 + STRLEN numlen = 1;
109 + SV * const sv = sv_2mortal(newSVpvn(&value, numlen));
110 + const char * const s = encp && *encp ? sv_recode_to_utf8(sv, *encp)
111 + : SvPVX(sv);
112 + const STRLEN newlen = SvCUR(sv);
113 + UV uv = UNICODE_REPLACEMENT;
114 +
115 + if (newlen)
116 + uv = SvUTF8(sv)
117 + ? utf8n_to_uvchr((U8*)s, newlen, &numlen, UTF8_ALLOW_DEFAULT)
118 + : *(U8*)s;
119 +
120 + if (!newlen || numlen != newlen) {
121 + uv = UNICODE_REPLACEMENT;
122 + if (encp)
123 + *encp = NULL;
124 + }
125 + return uv;
126 +}
127 +
128 +/*
129 - regatom - the lowest level
130 *
131 * Optimization: gobbles an entire sequence of ordinary characters so that
132 @@ -3182,6 +3215,8 @@ tryagain:
133 ender = grok_hex(p, &numlen, &flags, NULL);
134 p += numlen;
135 }
136 + if (PL_encoding && ender < 0x100)
137 + goto recode_encoding;
138 break;
139 case 'c':
140 p++;
141 @@ -3201,6 +3236,17 @@ tryagain:
142 --p;
143 goto loopdone;
144 }
145 + if (PL_encoding && ender < 0x100)
146 + goto recode_encoding;
147 + break;
148 + recode_encoding:
149 + {
150 + SV* enc = PL_encoding;
151 + ender = reg_recode((const char)(U8)ender, &enc);
152 + if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP))
153 + vWARN(p, "Invalid escape in the specified encoding");
154 + RExC_utf8 = 1;
155 + }
156 break;
157 case '\0':
158 if (p >= RExC_end)
159 @@ -3331,32 +3377,6 @@ tryagain:
160 break;
161 }
162
163 - /* If the encoding pragma is in effect recode the text of
164 - * any EXACT-kind nodes. */
165 - if (PL_encoding && PL_regkind[(U8)OP(ret)] == EXACT) {
166 - STRLEN oldlen = STR_LEN(ret);
167 - SV *sv = sv_2mortal(newSVpvn(STRING(ret), oldlen));
168 -
169 - if (RExC_utf8)
170 - SvUTF8_on(sv);
171 - if (sv_utf8_downgrade(sv, TRUE)) {
172 - const char * const s = sv_recode_to_utf8(sv, PL_encoding);
173 - const STRLEN newlen = SvCUR(sv);
174 -
175 - if (SvUTF8(sv))
176 - RExC_utf8 = 1;
177 - if (!SIZE_ONLY) {
178 - DEBUG_r(PerlIO_printf(Perl_debug_log, "recode %*s to %*s\n",
179 - (int)oldlen, STRING(ret),
180 - (int)newlen, s));
181 - Copy(s, STRING(ret), newlen, char);
182 - STR_LEN(ret) += newlen - oldlen;
183 - RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen);
184 - } else
185 - RExC_size += STR_SZ(newlen) - STR_SZ(oldlen);
186 - }
187 - }
188 -
189 return(ret);
190 }
191
192 @@ -3734,6 +3754,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
193 value = grok_hex(RExC_parse, &numlen, &flags, NULL);
194 RExC_parse += numlen;
195 }
196 + if (PL_encoding && value < 0x100)
197 + goto recode_encoding;
198 break;
199 case 'c':
200 value = UCHARAT(RExC_parse++);
201 @@ -3741,13 +3763,24 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
202 break;
203 case '0': case '1': case '2': case '3': case '4':
204 case '5': case '6': case '7': case '8': case '9':
205 - {
206 - I32 flags = 0;
207 - numlen = 3;
208 - value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
209 - RExC_parse += numlen;
210 - break;
211 - }
212 + {
213 + I32 flags = 0;
214 + numlen = 3;
215 + value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
216 + RExC_parse += numlen;
217 + if (PL_encoding && value < 0x100)
218 + goto recode_encoding;
219 + break;
220 + }
221 + recode_encoding:
222 + {
223 + SV* enc = PL_encoding;
224 + value = reg_recode((const char)(U8)value, &enc);
225 + if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP))
226 + vWARN(RExC_parse,
227 + "Invalid escape in the specified encoding");
228 + break;
229 + }
230 default:
231 if (!SIZE_ONLY && isALPHA(value) && ckWARN(WARN_REGEXP))
232 vWARN2(RExC_parse,
233 diff --git a/t/uni/tr_utf8.t b/t/uni/tr_utf8.t
234 index 606a84a..354156a 100755
235 --- a/t/uni/tr_utf8.t
236 +++ b/t/uni/tr_utf8.t
237 @@ -31,7 +31,7 @@ BEGIN {
238 }
239
240 use strict;
241 -use Test::More tests => 7;
242 +use Test::More tests => 8;
243
244 use encoding 'utf8';
245
246 @@ -67,4 +67,12 @@ is($str, $hiragana, "s/// # hiragana -> katakana");
247 $line =~ tr/bcdeghijklmnprstvwxyz$02578/בצדעגהיײקלמנפּרסטװשכיזשױתײחא/;
248 is($line, "aבצדעfגהיײקלמנoפqּרסuטװשכיזש1ױ34ת6ײח9", "[perl #16843]");
249 }
250 +
251 +{
252 + # [perl #40641]
253 + my $str = qq/Gebääääääääääääääääääääude/;
254 + my $reg = qr/Gebääääääääääääääääääääude/;
255 + ok($str =~ /$reg/, "[perl #40641]");
256 +}
257 +
258 __END__
259 diff --git a/utf8.h b/utf8.h
260 index 6d63897..3800866 100644
261 --- a/utf8.h
262 +++ b/utf8.h
263 @@ -198,6 +198,8 @@ encoded character.
264 UTF8_ALLOW_SURROGATE|UTF8_ALLOW_FFFF)
265 #define UTF8_ALLOW_ANY 0x00FF
266 #define UTF8_CHECK_ONLY 0x0200
267 +#define UTF8_ALLOW_DEFAULT (ckWARN(WARN_UTF8) ? 0 : \
268 + UTF8_ALLOW_ANYUV)
269
270 #define UNICODE_SURROGATE_FIRST 0xD800
271 #define UNICODE_SURROGATE_LAST 0xDFFF
272
273
274
275 --
276 gentoo-commits@l.g.o mailing list