1 |
tove 08/05/09 11:12:00 |
2 |
|
3 |
Added: perl-5.8.8-CVE-2008-1927.patch |
4 |
Log: |
5 |
#219203 - Fix for CVE-2008-1927 |
6 |
(Portage version: 2.1.5_rc7) |
7 |
|
8 |
Revision Changes Path |
9 |
1.1 dev-lang/perl/files/perl-5.8.8-CVE-2008-1927.patch |
10 |
|
11 |
file : http://sources.gentoo.org/viewcvs.py/gentoo-x86/dev-lang/perl/files/perl-5.8.8-CVE-2008-1927.patch?rev=1.1&view=markup |
12 |
plain: http://sources.gentoo.org/viewcvs.py/gentoo-x86/dev-lang/perl/files/perl-5.8.8-CVE-2008-1927.patch?rev=1.1&content-type=text/plain |
13 |
|
14 |
Index: perl-5.8.8-CVE-2008-1927.patch |
15 |
=================================================================== |
16 |
Fix a double free / segfault with utf8 regexps |
17 |
Debian #454792 |
18 |
[rt.cpan.org #48156] |
19 |
[rt.cpan.org #40641] |
20 |
upstream change 29204 |
21 |
|
22 |
UTF8_ALLOW_DEFAULT definition in utf8.h picked from upstream change 27688 |
23 |
|
24 |
diff --git a/embed.fnc b/embed.fnc |
25 |
index edfbc0e..26524c7 100644 |
26 |
--- a/embed.fnc |
27 |
+++ b/embed.fnc |
28 |
@@ -1168,6 +1168,7 @@ Es |void |reguni |NN const struct RExC_state_t *state|UV uv|NN char *s|NN STRLE |
29 |
Es |regnode*|regclass |NN struct RExC_state_t *state |
30 |
ERs |I32 |regcurly |NN const char * |
31 |
Es |regnode*|reg_node |NN struct RExC_state_t *state|U8 op |
32 |
+Es |UV |reg_recode |const char value|NULLOK SV **encp |
33 |
Es |regnode*|regpiece |NN struct RExC_state_t *state|NN I32 *flagp |
34 |
Es |void |reginsert |NN struct RExC_state_t *state|U8 op|NN regnode *opnd |
35 |
Es |void |regoptail |NN struct RExC_state_t *state|NN regnode *p|NN regnode *val |
36 |
diff --git a/embed.h b/embed.h |
37 |
index 2b38fd5..372b04f 100644 |
38 |
--- a/embed.h |
39 |
+++ b/embed.h |
40 |
@@ -1234,6 +1234,7 @@ |
41 |
#define regclass S_regclass |
42 |
#define regcurly S_regcurly |
43 |
#define reg_node S_reg_node |
44 |
+#define reg_recode S_reg_recode |
45 |
#define regpiece S_regpiece |
46 |
#define reginsert S_reginsert |
47 |
#define regoptail S_regoptail |
48 |
@@ -3277,6 +3278,7 @@ |
49 |
#define regclass(a) S_regclass(aTHX_ a) |
50 |
#define regcurly(a) S_regcurly(aTHX_ a) |
51 |
#define reg_node(a,b) S_reg_node(aTHX_ a,b) |
52 |
+#define reg_recode(a,b) S_reg_recode(aTHX_ a,b) |
53 |
#define regpiece(a,b) S_regpiece(aTHX_ a,b) |
54 |
#define reginsert(a,b,c) S_reginsert(aTHX_ a,b,c) |
55 |
#define regoptail(a,b,c) S_regoptail(aTHX_ a,b,c) |
56 |
diff --git a/pod/perldiag.pod b/pod/perldiag.pod |
57 |
index 9b3134c..7d95216 100644 |
58 |
--- a/pod/perldiag.pod |
59 |
+++ b/pod/perldiag.pod |
60 |
@@ -1900,6 +1900,15 @@ recognized by Perl or by a user-supplied handler. See L<attributes>. |
61 |
(W printf) Perl does not understand the given format conversion. See |
62 |
L<perlfunc/sprintf>. |
63 |
|
64 |
+=item Invalid escape in the specified encoding in regex; marked by <-- HERE in m/%s/ |
65 |
+ |
66 |
+(W regexp) The numeric escape (for example C<\xHH>) of value < 256 |
67 |
+didn't correspond to a single character through the conversion |
68 |
+from the encoding specified by the encoding pragma. |
69 |
+The escape was replaced with REPLACEMENT CHARACTER (U+FFFD) instead. |
70 |
+The <-- HERE shows in the regular expression about where the |
71 |
+escape was discovered. |
72 |
+ |
73 |
=item Invalid [] range "%s" in regex; marked by <-- HERE in m/%s/ |
74 |
|
75 |
(F) The range specified in a character class had a minimum character |
76 |
diff --git a/proto.h b/proto.h |
77 |
index 6d185dd..ef6c0cf 100644 |
78 |
--- a/proto.h |
79 |
+++ b/proto.h |
80 |
@@ -1748,6 +1748,7 @@ STATIC I32 S_regcurly(pTHX_ const char *) |
81 |
__attribute__warn_unused_result__; |
82 |
|
83 |
STATIC regnode* S_reg_node(pTHX_ struct RExC_state_t *state, U8 op); |
84 |
+STATIC UV S_reg_recode(pTHX_ const char value, SV **encp); |
85 |
STATIC regnode* S_regpiece(pTHX_ struct RExC_state_t *state, I32 *flagp); |
86 |
STATIC void S_reginsert(pTHX_ struct RExC_state_t *state, U8 op, regnode *opnd); |
87 |
STATIC void S_regoptail(pTHX_ struct RExC_state_t *state, regnode *p, regnode *val); |
88 |
diff --git a/regcomp.c b/regcomp.c |
89 |
index 928cf39..98d48dd 100644 |
90 |
--- a/regcomp.c |
91 |
+++ b/regcomp.c |
92 |
@@ -2791,6 +2791,39 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp) |
93 |
} |
94 |
|
95 |
/* |
96 |
+ * reg_recode |
97 |
+ * |
98 |
+ * It returns the code point in utf8 for the value in *encp. |
99 |
+ * value: a code value in the source encoding |
100 |
+ * encp: a pointer to an Encode object |
101 |
+ * |
102 |
+ * If the result from Encode is not a single character, |
103 |
+ * it returns U+FFFD (Replacement character) and sets *encp to NULL. |
104 |
+ */ |
105 |
+STATIC UV |
106 |
+S_reg_recode(pTHX_ const char value, SV **encp) |
107 |
+{ |
108 |
+ STRLEN numlen = 1; |
109 |
+ SV * const sv = sv_2mortal(newSVpvn(&value, numlen)); |
110 |
+ const char * const s = encp && *encp ? sv_recode_to_utf8(sv, *encp) |
111 |
+ : SvPVX(sv); |
112 |
+ const STRLEN newlen = SvCUR(sv); |
113 |
+ UV uv = UNICODE_REPLACEMENT; |
114 |
+ |
115 |
+ if (newlen) |
116 |
+ uv = SvUTF8(sv) |
117 |
+ ? utf8n_to_uvchr((U8*)s, newlen, &numlen, UTF8_ALLOW_DEFAULT) |
118 |
+ : *(U8*)s; |
119 |
+ |
120 |
+ if (!newlen || numlen != newlen) { |
121 |
+ uv = UNICODE_REPLACEMENT; |
122 |
+ if (encp) |
123 |
+ *encp = NULL; |
124 |
+ } |
125 |
+ return uv; |
126 |
+} |
127 |
+ |
128 |
+/* |
129 |
- regatom - the lowest level |
130 |
* |
131 |
* Optimization: gobbles an entire sequence of ordinary characters so that |
132 |
@@ -3182,6 +3215,8 @@ tryagain: |
133 |
ender = grok_hex(p, &numlen, &flags, NULL); |
134 |
p += numlen; |
135 |
} |
136 |
+ if (PL_encoding && ender < 0x100) |
137 |
+ goto recode_encoding; |
138 |
break; |
139 |
case 'c': |
140 |
p++; |
141 |
@@ -3201,6 +3236,17 @@ tryagain: |
142 |
--p; |
143 |
goto loopdone; |
144 |
} |
145 |
+ if (PL_encoding && ender < 0x100) |
146 |
+ goto recode_encoding; |
147 |
+ break; |
148 |
+ recode_encoding: |
149 |
+ { |
150 |
+ SV* enc = PL_encoding; |
151 |
+ ender = reg_recode((const char)(U8)ender, &enc); |
152 |
+ if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP)) |
153 |
+ vWARN(p, "Invalid escape in the specified encoding"); |
154 |
+ RExC_utf8 = 1; |
155 |
+ } |
156 |
break; |
157 |
case '\0': |
158 |
if (p >= RExC_end) |
159 |
@@ -3331,32 +3377,6 @@ tryagain: |
160 |
break; |
161 |
} |
162 |
|
163 |
- /* If the encoding pragma is in effect recode the text of |
164 |
- * any EXACT-kind nodes. */ |
165 |
- if (PL_encoding && PL_regkind[(U8)OP(ret)] == EXACT) { |
166 |
- STRLEN oldlen = STR_LEN(ret); |
167 |
- SV *sv = sv_2mortal(newSVpvn(STRING(ret), oldlen)); |
168 |
- |
169 |
- if (RExC_utf8) |
170 |
- SvUTF8_on(sv); |
171 |
- if (sv_utf8_downgrade(sv, TRUE)) { |
172 |
- const char * const s = sv_recode_to_utf8(sv, PL_encoding); |
173 |
- const STRLEN newlen = SvCUR(sv); |
174 |
- |
175 |
- if (SvUTF8(sv)) |
176 |
- RExC_utf8 = 1; |
177 |
- if (!SIZE_ONLY) { |
178 |
- DEBUG_r(PerlIO_printf(Perl_debug_log, "recode %*s to %*s\n", |
179 |
- (int)oldlen, STRING(ret), |
180 |
- (int)newlen, s)); |
181 |
- Copy(s, STRING(ret), newlen, char); |
182 |
- STR_LEN(ret) += newlen - oldlen; |
183 |
- RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen); |
184 |
- } else |
185 |
- RExC_size += STR_SZ(newlen) - STR_SZ(oldlen); |
186 |
- } |
187 |
- } |
188 |
- |
189 |
return(ret); |
190 |
} |
191 |
|
192 |
@@ -3734,6 +3754,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) |
193 |
value = grok_hex(RExC_parse, &numlen, &flags, NULL); |
194 |
RExC_parse += numlen; |
195 |
} |
196 |
+ if (PL_encoding && value < 0x100) |
197 |
+ goto recode_encoding; |
198 |
break; |
199 |
case 'c': |
200 |
value = UCHARAT(RExC_parse++); |
201 |
@@ -3741,13 +3763,24 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) |
202 |
break; |
203 |
case '0': case '1': case '2': case '3': case '4': |
204 |
case '5': case '6': case '7': case '8': case '9': |
205 |
- { |
206 |
- I32 flags = 0; |
207 |
- numlen = 3; |
208 |
- value = grok_oct(--RExC_parse, &numlen, &flags, NULL); |
209 |
- RExC_parse += numlen; |
210 |
- break; |
211 |
- } |
212 |
+ { |
213 |
+ I32 flags = 0; |
214 |
+ numlen = 3; |
215 |
+ value = grok_oct(--RExC_parse, &numlen, &flags, NULL); |
216 |
+ RExC_parse += numlen; |
217 |
+ if (PL_encoding && value < 0x100) |
218 |
+ goto recode_encoding; |
219 |
+ break; |
220 |
+ } |
221 |
+ recode_encoding: |
222 |
+ { |
223 |
+ SV* enc = PL_encoding; |
224 |
+ value = reg_recode((const char)(U8)value, &enc); |
225 |
+ if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP)) |
226 |
+ vWARN(RExC_parse, |
227 |
+ "Invalid escape in the specified encoding"); |
228 |
+ break; |
229 |
+ } |
230 |
default: |
231 |
if (!SIZE_ONLY && isALPHA(value) && ckWARN(WARN_REGEXP)) |
232 |
vWARN2(RExC_parse, |
233 |
diff --git a/t/uni/tr_utf8.t b/t/uni/tr_utf8.t |
234 |
index 606a84a..354156a 100755 |
235 |
--- a/t/uni/tr_utf8.t |
236 |
+++ b/t/uni/tr_utf8.t |
237 |
@@ -31,7 +31,7 @@ BEGIN { |
238 |
} |
239 |
|
240 |
use strict; |
241 |
-use Test::More tests => 7; |
242 |
+use Test::More tests => 8; |
243 |
|
244 |
use encoding 'utf8'; |
245 |
|
246 |
@@ -67,4 +67,12 @@ is($str, $hiragana, "s/// # hiragana -> katakana"); |
247 |
$line =~ tr/bcdeghijklmnprstvwxyz$02578/בצדעגהיײקלמנפּרסטװשכיזשױתײחא/; |
248 |
is($line, "aבצדעfגהיײקלמנoפqּרסuטװשכיזש1ױ34ת6ײח9", "[perl #16843]"); |
249 |
} |
250 |
+ |
251 |
+{ |
252 |
+ # [perl #40641] |
253 |
+ my $str = qq/Gebääääääääääääääääääääude/; |
254 |
+ my $reg = qr/Gebääääääääääääääääääääude/; |
255 |
+ ok($str =~ /$reg/, "[perl #40641]"); |
256 |
+} |
257 |
+ |
258 |
__END__ |
259 |
diff --git a/utf8.h b/utf8.h |
260 |
index 6d63897..3800866 100644 |
261 |
--- a/utf8.h |
262 |
+++ b/utf8.h |
263 |
@@ -198,6 +198,8 @@ encoded character. |
264 |
UTF8_ALLOW_SURROGATE|UTF8_ALLOW_FFFF) |
265 |
#define UTF8_ALLOW_ANY 0x00FF |
266 |
#define UTF8_CHECK_ONLY 0x0200 |
267 |
+#define UTF8_ALLOW_DEFAULT (ckWARN(WARN_UTF8) ? 0 : \ |
268 |
+ UTF8_ALLOW_ANYUV) |
269 |
|
270 |
#define UNICODE_SURROGATE_FIRST 0xD800 |
271 |
#define UNICODE_SURROGATE_LAST 0xDFFF |
272 |
|
273 |
|
274 |
|
275 |
-- |
276 |
gentoo-commits@l.g.o mailing list |