[gentoo-commits] gentoo-x86 commit in sys-devel/libperl/files: libperl-5.8.8-utf8-boundary.patch libperl-5.8.8-CVE-2008-1927.patch - gentoo-commits

From:	"Torsten Veller (tove)" <tove@g.o>
To:	gentoo-commits@l.g.o
Subject:	[gentoo-commits] gentoo-x86 commit in sys-devel/libperl/files: libperl-5.8.8-utf8-boundary.patch libperl-5.8.8-CVE-2008-1927.patch
Date:	Fri, 09 May 2008 11:04:58
Message-Id:	`E1JuQPH-0000n7-GZ@stork.gentoo.org`

1

tove        08/05/09 11:04:55

2

3

  Added:                libperl-5.8.8-utf8-boundary.patch

4

                        libperl-5.8.8-CVE-2008-1927.patch

5

  Log:

6

  #219203 - Version bump. Added patch from bug #198196. Apply lib64.patch for ppc64 too like dev-lang/perl-5.8.8 does

7

  (Portage version: 2.1.5_rc7)

8

9

Revision  Changes    Path

10

1.1                  sys-devel/libperl/files/libperl-5.8.8-utf8-boundary.patch

11

12

file : http://sources.gentoo.org/viewcvs.py/gentoo-x86/sys-devel/libperl/files/libperl-5.8.8-utf8-boundary.patch?rev=1.1&view=markup

13

plain: http://sources.gentoo.org/viewcvs.py/gentoo-x86/sys-devel/libperl/files/libperl-5.8.8-utf8-boundary.patch?rev=1.1&content-type=text/plain

14

15

Index: libperl-5.8.8-utf8-boundary.patch

16

===================================================================

17

--- regcomp.c	2006-01-08 12:59:27.000000000 -0800

18

+++ regcomp.c	2007-10-05 12:07:55.000000000 -0700

19

@@ -135,7 +135,8 @@

20

     I32		extralen;

21

     I32		seen_zerolen;

22

     I32		seen_evals;

23

-    I32		utf8;

24

+    I32		utf8;			/* pattern is utf8 or not */

25

+    I32		orig_utf8;		/* pattern was originally utf8 */

26

 #if ADD_TO_REGEXEC

27

     char 	*starttry;		/* -Dr: where regtry was called. */

28

 #define RExC_starttry	(pRExC_state->starttry)

29

@@ -161,6 +162,7 @@

30

 #define RExC_seen_zerolen	(pRExC_state->seen_zerolen)

31

 #define RExC_seen_evals	(pRExC_state->seen_evals)

32

 #define RExC_utf8	(pRExC_state->utf8)

33

+#define RExC_orig_utf8	(pRExC_state->orig_utf8)

34

35

 #define	ISMULT1(c)	((c) == '*' || (c) == '+' || (c) == '?')

36

 #define	ISMULT2(s)	((*s) == '*' || (*s) == '+' || (*s) == '?' || \

37

@@ -1749,15 +1751,17 @@

38

     if (exp == NULL)

39

 	FAIL("NULL regexp argument");

40

41

-    RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;

42

+    RExC_orig_utf8 = RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;

43

44

-    RExC_precomp = exp;

45

     DEBUG_r({

46

 	 if (!PL_colorset) reginitcolors();

47

 	 PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",

48

 		       PL_colors[4],PL_colors[5],PL_colors[0],

49

-		       (int)(xend - exp), RExC_precomp, PL_colors[1]);

50

+		       (int)(xend - exp), exp, PL_colors[1]);

51

});

52

+

53

+redo_first_pass:

54

+    RExC_precomp = exp;

55

     RExC_flags = pm->op_pmflags;

56

     RExC_sawback = 0;

57

58

@@ -1783,6 +1787,17 @@

59

 	RExC_precomp = Nullch;

60

 	return(NULL);

61

}

62

+    if (RExC_utf8 && !RExC_orig_utf8) {

63

+        STRLEN len = xend-exp;

64

+        DEBUG_r(PerlIO_printf(Perl_debug_log,

65

+                    "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));

66

+        exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)exp, &len);

67

+        xend = exp + len;

68

+        RExC_orig_utf8 = RExC_utf8;

69

+        SAVEFREEPV(exp);

70

+        goto redo_first_pass;

71

+    }

72

+

1.1                  sys-devel/libperl/files/libperl-5.8.8-CVE-2008-1927.patch

78

79

file : http://sources.gentoo.org/viewcvs.py/gentoo-x86/sys-devel/libperl/files/libperl-5.8.8-CVE-2008-1927.patch?rev=1.1&view=markup

80

plain: http://sources.gentoo.org/viewcvs.py/gentoo-x86/sys-devel/libperl/files/libperl-5.8.8-CVE-2008-1927.patch?rev=1.1&content-type=text/plain

81

82

Index: libperl-5.8.8-CVE-2008-1927.patch

83

===================================================================

84

Fix a double free / segfault with utf8 regexps 

85

Debian #454792

86

[rt.cpan.org #48156]

87

[rt.cpan.org #40641]

88

upstream change 29204

89

90

UTF8_ALLOW_DEFAULT definition in utf8.h picked from upstream change 27688

91

92

diff --git a/embed.fnc b/embed.fnc

93

index edfbc0e..26524c7 100644

94

--- a/embed.fnc

95

+++ b/embed.fnc

96

@@ -1168,6 +1168,7 @@ Es	|void	|reguni		|NN const struct RExC_state_t *state|UV uv|NN char *s|NN STRLE

97

 Es	|regnode*|regclass	|NN struct RExC_state_t *state

98

 ERs	|I32	|regcurly	|NN const char *

99

 Es	|regnode*|reg_node	|NN struct RExC_state_t *state|U8 op

100

+Es	|UV	|reg_recode	|const char value|NULLOK SV **encp

101

 Es	|regnode*|regpiece	|NN struct RExC_state_t *state|NN I32 *flagp

102

 Es	|void	|reginsert	|NN struct RExC_state_t *state|U8 op|NN regnode *opnd

103

 Es	|void	|regoptail	|NN struct RExC_state_t *state|NN regnode *p|NN regnode *val

104

diff --git a/embed.h b/embed.h

105

index 2b38fd5..372b04f 100644

106

--- a/embed.h

107

+++ b/embed.h

108

@@ -1234,6 +1234,7 @@

109

 #define regclass		S_regclass

110

 #define regcurly		S_regcurly

111

 #define reg_node		S_reg_node

112

+#define reg_recode		S_reg_recode

113

 #define regpiece		S_regpiece

114

 #define reginsert		S_reginsert

115

 #define regoptail		S_regoptail

116

@@ -3277,6 +3278,7 @@

117

 #define regclass(a)		S_regclass(aTHX_ a)

118

 #define regcurly(a)		S_regcurly(aTHX_ a)

119

 #define reg_node(a,b)		S_reg_node(aTHX_ a,b)

120

+#define reg_recode(a,b)		S_reg_recode(aTHX_ a,b)

121

 #define regpiece(a,b)		S_regpiece(aTHX_ a,b)

122

 #define reginsert(a,b,c)	S_reginsert(aTHX_ a,b,c)

123

 #define regoptail(a,b,c)	S_regoptail(aTHX_ a,b,c)

124

diff --git a/pod/perldiag.pod b/pod/perldiag.pod

125

index 9b3134c..7d95216 100644

126

--- a/pod/perldiag.pod

127

+++ b/pod/perldiag.pod

128

@@ -1900,6 +1900,15 @@ recognized by Perl or by a user-supplied handler.  See L<attributes>.

129

 (W printf) Perl does not understand the given format conversion.  See

130

 L<perlfunc/sprintf>.

131

132

+=item Invalid escape in the specified encoding in regex; marked by <-- HERE in m/%s/

133

+

134

+(W regexp) The numeric escape (for example C<\xHH>) of value < 256

135

+didn't correspond to a single character through the conversion

136

+from the encoding specified by the encoding pragma.

137

+The escape was replaced with REPLACEMENT CHARACTER (U+FFFD) instead.

138

+The <-- HERE shows in the regular expression about where the

139

+escape was discovered.

140

+

141

 =item Invalid [] range "%s" in regex; marked by <-- HERE in m/%s/

142

143

 (F) The range specified in a character class had a minimum character

144

diff --git a/proto.h b/proto.h

145

index 6d185dd..ef6c0cf 100644

146

--- a/proto.h

147

+++ b/proto.h

148

@@ -1748,6 +1748,7 @@ STATIC I32	S_regcurly(pTHX_ const char *)

149

 			__attribute__warn_unused_result__;

150

151

 STATIC regnode*	S_reg_node(pTHX_ struct RExC_state_t *state, U8 op);

152

+STATIC UV	S_reg_recode(pTHX_ const char value, SV **encp);

153

 STATIC regnode*	S_regpiece(pTHX_ struct RExC_state_t *state, I32 *flagp);

154

 STATIC void	S_reginsert(pTHX_ struct RExC_state_t *state, U8 op, regnode *opnd);

155

 STATIC void	S_regoptail(pTHX_ struct RExC_state_t *state, regnode *p, regnode *val);

156

diff --git a/regcomp.c b/regcomp.c

157

index 928cf39..98d48dd 100644

158

--- a/regcomp.c

159

+++ b/regcomp.c

160

@@ -2791,6 +2791,39 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp)

161

}

162

163

/*

164

+ * reg_recode

165

+ *

166

+ * It returns the code point in utf8 for the value in *encp.

167

+ *    value: a code value in the source encoding

168

+ *    encp:  a pointer to an Encode object

169

+ *

170

+ * If the result from Encode is not a single character,

171

+ * it returns U+FFFD (Replacement character) and sets *encp to NULL.

172

+ */

173

+STATIC UV

174

+S_reg_recode(pTHX_ const char value, SV **encp)

175

+{

176

+    STRLEN numlen = 1;

177

+    SV * const sv = sv_2mortal(newSVpvn(&value, numlen));

178

+    const char * const s = encp && *encp ? sv_recode_to_utf8(sv, *encp)

179

+					 : SvPVX(sv);

180

+    const STRLEN newlen = SvCUR(sv);

181

+    UV uv = UNICODE_REPLACEMENT;

182

+

183

+    if (newlen)

184

+	uv = SvUTF8(sv)

185

+	     ? utf8n_to_uvchr((U8*)s, newlen, &numlen, UTF8_ALLOW_DEFAULT)

186

+	     : *(U8*)s;

187

+

188

+    if (!newlen || numlen != newlen) {

189

+	uv = UNICODE_REPLACEMENT;

190

+	if (encp)

191

+	    *encp = NULL;

192

+    }

193

+    return uv;

194

+}

195

+

196

+/*

197

  - regatom - the lowest level

198

*

199

  * Optimization:  gobbles an entire sequence of ordinary characters so that

200

@@ -3182,6 +3215,8 @@ tryagain:

201

 			    ender = grok_hex(p, &numlen, &flags, NULL);

202

 			    p += numlen;

203

}

204

+			if (PL_encoding && ender < 0x100)

205

+			    goto recode_encoding;

206

 			break;

207

 		    case 'c':

208

 			p++;

209

@@ -3201,6 +3236,17 @@ tryagain:

210

 			    --p;

211

 			    goto loopdone;

212

}

213

+			if (PL_encoding && ender < 0x100)

214

+			    goto recode_encoding;

215

+			break;

216

+		    recode_encoding:

217

+			{

218

+			    SV* enc = PL_encoding;

219

+			    ender = reg_recode((const char)(U8)ender, &enc);

220

+			    if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP))

221

+				vWARN(p, "Invalid escape in the specified encoding");

222

+			    RExC_utf8 = 1;

223

+			}

224

 			break;

225

 		    case '\0':

226

 			if (p >= RExC_end)

227

@@ -3331,32 +3377,6 @@ tryagain:

228

 	break;

229

}

230

231

-    /* If the encoding pragma is in effect recode the text of

232

-     * any EXACT-kind nodes. */

233

-    if (PL_encoding && PL_regkind[(U8)OP(ret)] == EXACT) {

234

-	STRLEN oldlen = STR_LEN(ret);

235

-	SV *sv        = sv_2mortal(newSVpvn(STRING(ret), oldlen));

236

-

237

-	if (RExC_utf8)

238

-	    SvUTF8_on(sv);

239

-	if (sv_utf8_downgrade(sv, TRUE)) {

240

-	    const char * const s = sv_recode_to_utf8(sv, PL_encoding);

241

-	    const STRLEN newlen = SvCUR(sv);

242

-

243

-	    if (SvUTF8(sv))

244

-		RExC_utf8 = 1;

245

-	    if (!SIZE_ONLY) {

246

-		DEBUG_r(PerlIO_printf(Perl_debug_log, "recode %*s to %*s\n",

247

-				      (int)oldlen, STRING(ret),

248

-				      (int)newlen, s));

249

-		Copy(s, STRING(ret), newlen, char);

250

-		STR_LEN(ret) += newlen - oldlen;

251

-		RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen);

252

-	    } else

253

-		RExC_size += STR_SZ(newlen) - STR_SZ(oldlen);

254

-	}

255

-    }

256

-

257

     return(ret);

258

}

259

260

@@ -3734,6 +3754,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)

261

 		    value = grok_hex(RExC_parse, &numlen, &flags, NULL);

262

 		    RExC_parse += numlen;

263

}

264

+		if (PL_encoding && value < 0x100)

265

+		    goto recode_encoding;

266

 		break;

267

 	    case 'c':

268

 		value = UCHARAT(RExC_parse++);

269

@@ -3741,13 +3763,24 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)

270

 		break;

271

 	    case '0': case '1': case '2': case '3': case '4':

272

 	    case '5': case '6': case '7': case '8': case '9':

273

-            {

274

-                I32 flags = 0;

275

-		numlen = 3;

276

-		value = grok_oct(--RExC_parse, &numlen, &flags, NULL);

277

-		RExC_parse += numlen;

278

-		break;

279

-            }

280

+		{

281

+		    I32 flags = 0;

282

+		    numlen = 3;

283

+		    value = grok_oct(--RExC_parse, &numlen, &flags, NULL);

284

+		    RExC_parse += numlen;

285

+		    if (PL_encoding && value < 0x100)

286

+			goto recode_encoding;

287

+		    break;

288

+		}

289

+	    recode_encoding:

290

+		{

291

+		    SV* enc = PL_encoding;

292

+		    value = reg_recode((const char)(U8)value, &enc);

293

+		    if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP))

294

+			vWARN(RExC_parse,

295

+			      "Invalid escape in the specified encoding");

296

+		    break;

297

+		}

298

 	    default:

299

 		if (!SIZE_ONLY && isALPHA(value) && ckWARN(WARN_REGEXP))

300

 		    vWARN2(RExC_parse,

301

diff --git a/t/uni/tr_utf8.t b/t/uni/tr_utf8.t

302

index 606a84a..354156a 100755

303

--- a/t/uni/tr_utf8.t

304

+++ b/t/uni/tr_utf8.t

305

@@ -31,7 +31,7 @@ BEGIN {

306

}

307

308

 use strict;

309

-use Test::More tests => 7;

310

+use Test::More tests => 8;

311

312

 use encoding 'utf8';

313

314

@@ -67,4 +67,12 @@ is($str, $hiragana, "s/// # hiragana -> katakana");

315

   $line =~ tr/bcdeghijklmnprstvwxyz$02578/בצדעגהיײקלמנפּרסטװשכיזשױתײחא/;

316

   is($line, "aבצדעfגהיײקלמנoפqּרסuטװשכיזש1ױ34ת6ײח9", "[perl #16843]");

317

}

318

+

319

+{

320

+  # [perl #40641]

321

+  my $str = qq/Gebääääääääääääääääääääude/;

322

+  my $reg = qr/Gebääääääääääääääääääääude/;

323

+  ok($str =~ /$reg/, "[perl #40641]");

324

+}

325

+

326

 __END__

327

diff --git a/utf8.h b/utf8.h

328

index 6d63897..3800866 100644

329

--- a/utf8.h

330

+++ b/utf8.h

331

@@ -198,6 +198,8 @@ encoded character.

332

 					 UTF8_ALLOW_SURROGATE|UTF8_ALLOW_FFFF)

333

 #define UTF8_ALLOW_ANY			0x00FF

334

 #define UTF8_CHECK_ONLY			0x0200

335

+#define UTF8_ALLOW_DEFAULT             (ckWARN(WARN_UTF8) ? 0 : \

336

+                                        UTF8_ALLOW_ANYUV)

337

338

 #define UNICODE_SURROGATE_FIRST		0xD800

339

 #define UNICODE_SURROGATE_LAST		0xDFFF

--

344

gentoo-commits@l.g.o mailing list

1	tove 08/05/09 11:04:55
2
3	Added: libperl-5.8.8-utf8-boundary.patch
4	libperl-5.8.8-CVE-2008-1927.patch
5	Log:
6	#219203 - Version bump. Added patch from bug #198196. Apply lib64.patch for ppc64 too like dev-lang/perl-5.8.8 does
7	(Portage version: 2.1.5_rc7)
8
9	Revision Changes Path
10	1.1 sys-devel/libperl/files/libperl-5.8.8-utf8-boundary.patch
11
12	file : http://sources.gentoo.org/viewcvs.py/gentoo-x86/sys-devel/libperl/files/libperl-5.8.8-utf8-boundary.patch?rev=1.1&view=markup
13	plain: http://sources.gentoo.org/viewcvs.py/gentoo-x86/sys-devel/libperl/files/libperl-5.8.8-utf8-boundary.patch?rev=1.1&content-type=text/plain
14
15	Index: libperl-5.8.8-utf8-boundary.patch
16	===================================================================
17	--- regcomp.c 2006-01-08 12:59:27.000000000 -0800
18	+++ regcomp.c 2007-10-05 12:07:55.000000000 -0700
19	@@ -135,7 +135,8 @@
20	I32 extralen;
21	I32 seen_zerolen;
22	I32 seen_evals;
23	- I32 utf8;
24	+ I32 utf8; /* pattern is utf8 or not */
25	+ I32 orig_utf8; /* pattern was originally utf8 */
26	#if ADD_TO_REGEXEC
27	char starttry; / -Dr: where regtry was called. */
28	#define RExC_starttry (pRExC_state->starttry)
29	@@ -161,6 +162,7 @@
30	#define RExC_seen_zerolen (pRExC_state->seen_zerolen)
31	#define RExC_seen_evals (pRExC_state->seen_evals)
32	#define RExC_utf8 (pRExC_state->utf8)
33	+#define RExC_orig_utf8 (pRExC_state->orig_utf8)
34
35	#define ISMULT1(c) ((c) == '*' \|\| (c) == '+' \|\| (c) == '?')
36	#define ISMULT2(s) ((s) == '' \|\| (s) == '+' \|\| (s) == '?' \|\| \
37	@@ -1749,15 +1751,17 @@
38	if (exp == NULL)
39	FAIL("NULL regexp argument");
40
41	- RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
42	+ RExC_orig_utf8 = RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
43
44	- RExC_precomp = exp;
45	DEBUG_r({
46	if (!PL_colorset) reginitcolors();
47	PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
48	PL_colors[4],PL_colors[5],PL_colors[0],
49	- (int)(xend - exp), RExC_precomp, PL_colors[1]);
50	+ (int)(xend - exp), exp, PL_colors[1]);
51	});
52	+
53	+redo_first_pass:
54	+ RExC_precomp = exp;
55	RExC_flags = pm->op_pmflags;
56	RExC_sawback = 0;
57
58	@@ -1783,6 +1787,17 @@
59	RExC_precomp = Nullch;
60	return(NULL);
61	}
62	+ if (RExC_utf8 && !RExC_orig_utf8) {
63	+ STRLEN len = xend-exp;
64	+ DEBUG_r(PerlIO_printf(Perl_debug_log,
65	+ "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
66	+ exp = (char)Perl_bytes_to_utf8(aTHX_ (U8)exp, &len);
67	+ xend = exp + len;
68	+ RExC_orig_utf8 = RExC_utf8;
69	+ SAVEFREEPV(exp);
70	+ goto redo_first_pass;
71	+ }
72	+
73
74
75
76
77	1.1 sys-devel/libperl/files/libperl-5.8.8-CVE-2008-1927.patch
78
79	file : http://sources.gentoo.org/viewcvs.py/gentoo-x86/sys-devel/libperl/files/libperl-5.8.8-CVE-2008-1927.patch?rev=1.1&view=markup
80	plain: http://sources.gentoo.org/viewcvs.py/gentoo-x86/sys-devel/libperl/files/libperl-5.8.8-CVE-2008-1927.patch?rev=1.1&content-type=text/plain
81
82	Index: libperl-5.8.8-CVE-2008-1927.patch
83	===================================================================
84	Fix a double free / segfault with utf8 regexps
85	Debian #454792
86	[rt.cpan.org #48156]
87	[rt.cpan.org #40641]
88	upstream change 29204
89
90	UTF8_ALLOW_DEFAULT definition in utf8.h picked from upstream change 27688
91
92	diff --git a/embed.fnc b/embed.fnc
93	index edfbc0e..26524c7 100644
94	--- a/embed.fnc
95	+++ b/embed.fnc
96	@@ -1168,6 +1168,7 @@ Es \|void \|reguni \|NN const struct RExC_state_t state\|UV uv\|NN char s\|NN STRLE
97	Es \|regnode\|regclass \|NN struct RExC_state_t state
98	ERs \|I32 \|regcurly \|NN const char *
99	Es \|regnode\|reg_node \|NN struct RExC_state_t state\|U8 op
100	+Es \|UV \|reg_recode \|const char value\|NULLOK SV **encp
101	Es \|regnode\|regpiece \|NN struct RExC_state_t state\|NN I32 *flagp
102	Es \|void \|reginsert \|NN struct RExC_state_t state\|U8 op\|NN regnode opnd
103	Es \|void \|regoptail \|NN struct RExC_state_t state\|NN regnode p\|NN regnode *val
104	diff --git a/embed.h b/embed.h
105	index 2b38fd5..372b04f 100644
106	--- a/embed.h
107	+++ b/embed.h
108	@@ -1234,6 +1234,7 @@
109	#define regclass S_regclass
110	#define regcurly S_regcurly
111	#define reg_node S_reg_node
112	+#define reg_recode S_reg_recode
113	#define regpiece S_regpiece
114	#define reginsert S_reginsert
115	#define regoptail S_regoptail
116	@@ -3277,6 +3278,7 @@
117	#define regclass(a) S_regclass(aTHX_ a)
118	#define regcurly(a) S_regcurly(aTHX_ a)
119	#define reg_node(a,b) S_reg_node(aTHX_ a,b)
120	+#define reg_recode(a,b) S_reg_recode(aTHX_ a,b)
121	#define regpiece(a,b) S_regpiece(aTHX_ a,b)
122	#define reginsert(a,b,c) S_reginsert(aTHX_ a,b,c)
123	#define regoptail(a,b,c) S_regoptail(aTHX_ a,b,c)
124	diff --git a/pod/perldiag.pod b/pod/perldiag.pod
125	index 9b3134c..7d95216 100644
126	--- a/pod/perldiag.pod
127	+++ b/pod/perldiag.pod
128	@@ -1900,6 +1900,15 @@ recognized by Perl or by a user-supplied handler. See L<attributes>.
129	(W printf) Perl does not understand the given format conversion. See
130	L<perlfunc/sprintf>.
131
132	+=item Invalid escape in the specified encoding in regex; marked by <-- HERE in m/%s/
133	+
134	+(W regexp) The numeric escape (for example C<\xHH>) of value < 256
135	+didn't correspond to a single character through the conversion
136	+from the encoding specified by the encoding pragma.
137	+The escape was replaced with REPLACEMENT CHARACTER (U+FFFD) instead.
138	+The <-- HERE shows in the regular expression about where the
139	+escape was discovered.
140	+
141	=item Invalid [] range "%s" in regex; marked by <-- HERE in m/%s/
142
143	(F) The range specified in a character class had a minimum character
144	diff --git a/proto.h b/proto.h
145	index 6d185dd..ef6c0cf 100644
146	--- a/proto.h
147	+++ b/proto.h
148	@@ -1748,6 +1748,7 @@ STATIC I32 S_regcurly(pTHX_ const char *)
149	__attribute__warn_unused_result__;
150
151	STATIC regnode* S_reg_node(pTHX_ struct RExC_state_t *state, U8 op);
152	+STATIC UV S_reg_recode(pTHX_ const char value, SV **encp);
153	STATIC regnode* S_regpiece(pTHX_ struct RExC_state_t state, I32 flagp);
154	STATIC void S_reginsert(pTHX_ struct RExC_state_t state, U8 op, regnode opnd);
155	STATIC void S_regoptail(pTHX_ struct RExC_state_t state, regnode p, regnode *val);
156	diff --git a/regcomp.c b/regcomp.c
157	index 928cf39..98d48dd 100644
158	--- a/regcomp.c
159	+++ b/regcomp.c
160	@@ -2791,6 +2791,39 @@ S_regpiece(pTHX_ RExC_state_t pRExC_state, I32 flagp)
161	}
162
163	/*
164	+ * reg_recode
165	+ *
166	+ * It returns the code point in utf8 for the value in *encp.
167	+ * value: a code value in the source encoding
168	+ * encp: a pointer to an Encode object
169	+ *
170	+ * If the result from Encode is not a single character,
171	+ * it returns U+FFFD (Replacement character) and sets *encp to NULL.
172	+ */
173	+STATIC UV
174	+S_reg_recode(pTHX_ const char value, SV **encp)
175	+{
176	+ STRLEN numlen = 1;
177	+ SV * const sv = sv_2mortal(newSVpvn(&value, numlen));
178	+ const char * const s = encp && encp ? sv_recode_to_utf8(sv, encp)
179	+ : SvPVX(sv);
180	+ const STRLEN newlen = SvCUR(sv);
181	+ UV uv = UNICODE_REPLACEMENT;
182	+
183	+ if (newlen)
184	+ uv = SvUTF8(sv)
185	+ ? utf8n_to_uvchr((U8*)s, newlen, &numlen, UTF8_ALLOW_DEFAULT)
186	+ : (U8)s;
187	+
188	+ if (!newlen \|\| numlen != newlen) {
189	+ uv = UNICODE_REPLACEMENT;
190	+ if (encp)
191	+ *encp = NULL;
192	+ }
193	+ return uv;
194	+}
195	+
196	+/*
197	- regatom - the lowest level
198	*
199	* Optimization: gobbles an entire sequence of ordinary characters so that
200	@@ -3182,6 +3215,8 @@ tryagain:
201	ender = grok_hex(p, &numlen, &flags, NULL);
202	p += numlen;
203	}
204	+ if (PL_encoding && ender < 0x100)
205	+ goto recode_encoding;
206	break;
207	case 'c':
208	p++;
209	@@ -3201,6 +3236,17 @@ tryagain:
210	--p;
211	goto loopdone;
212	}
213	+ if (PL_encoding && ender < 0x100)
214	+ goto recode_encoding;
215	+ break;
216	+ recode_encoding:
217	+ {
218	+ SV* enc = PL_encoding;
219	+ ender = reg_recode((const char)(U8)ender, &enc);
220	+ if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP))
221	+ vWARN(p, "Invalid escape in the specified encoding");
222	+ RExC_utf8 = 1;
223	+ }
224	break;
225	case '\0':
226	if (p >= RExC_end)
227	@@ -3331,32 +3377,6 @@ tryagain:
228	break;
229	}
230
231	- /* If the encoding pragma is in effect recode the text of
232	- * any EXACT-kind nodes. */
233	- if (PL_encoding && PL_regkind[(U8)OP(ret)] == EXACT) {
234	- STRLEN oldlen = STR_LEN(ret);
235	- SV *sv = sv_2mortal(newSVpvn(STRING(ret), oldlen));
236	-
237	- if (RExC_utf8)
238	- SvUTF8_on(sv);
239	- if (sv_utf8_downgrade(sv, TRUE)) {
240	- const char * const s = sv_recode_to_utf8(sv, PL_encoding);
241	- const STRLEN newlen = SvCUR(sv);
242	-
243	- if (SvUTF8(sv))
244	- RExC_utf8 = 1;
245	- if (!SIZE_ONLY) {
246	- DEBUG_r(PerlIO_printf(Perl_debug_log, "recode %s to %s\n",
247	- (int)oldlen, STRING(ret),
248	- (int)newlen, s));
249	- Copy(s, STRING(ret), newlen, char);
250	- STR_LEN(ret) += newlen - oldlen;
251	- RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen);
252	- } else
253	- RExC_size += STR_SZ(newlen) - STR_SZ(oldlen);
254	- }
255	- }
256	-
257	return(ret);
258	}
259
260	@@ -3734,6 +3754,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
261	value = grok_hex(RExC_parse, &numlen, &flags, NULL);
262	RExC_parse += numlen;
263	}
264	+ if (PL_encoding && value < 0x100)
265	+ goto recode_encoding;
266	break;
267	case 'c':
268	value = UCHARAT(RExC_parse++);
269	@@ -3741,13 +3763,24 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
270	break;
271	case '0': case '1': case '2': case '3': case '4':
272	case '5': case '6': case '7': case '8': case '9':
273	- {
274	- I32 flags = 0;
275	- numlen = 3;
276	- value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
277	- RExC_parse += numlen;
278	- break;
279	- }
280	+ {
281	+ I32 flags = 0;
282	+ numlen = 3;
283	+ value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
284	+ RExC_parse += numlen;
285	+ if (PL_encoding && value < 0x100)
286	+ goto recode_encoding;
287	+ break;
288	+ }
289	+ recode_encoding:
290	+ {
291	+ SV* enc = PL_encoding;
292	+ value = reg_recode((const char)(U8)value, &enc);
293	+ if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP))
294	+ vWARN(RExC_parse,
295	+ "Invalid escape in the specified encoding");
296	+ break;
297	+ }
298	default:
299	if (!SIZE_ONLY && isALPHA(value) && ckWARN(WARN_REGEXP))
300	vWARN2(RExC_parse,
301	diff --git a/t/uni/tr_utf8.t b/t/uni/tr_utf8.t
302	index 606a84a..354156a 100755
303	--- a/t/uni/tr_utf8.t
304	+++ b/t/uni/tr_utf8.t
305	@@ -31,7 +31,7 @@ BEGIN {
306	}
307
308	use strict;
309	-use Test::More tests => 7;
310	+use Test::More tests => 8;
311
312	use encoding 'utf8';
313
314	@@ -67,4 +67,12 @@ is($str, $hiragana, "s/// # hiragana -> katakana");
315	$line =~ tr/bcdeghijklmnprstvwxyz$02578/בצדעגהיײקלמנפּרסטװשכיזשױתײחא/;
316	is($line, "aבצדעfגהיײקלמנoפqּרסuטװשכיזש1ױ34ת6ײח9", "[perl #16843]");
317	}
318	+
319	+{
320	+ # [perl #40641]
321	+ my $str = qq/Gebääääääääääääääääääääude/;
322	+ my $reg = qr/Gebääääääääääääääääääääude/;
323	+ ok($str =~ /$reg/, "[perl #40641]");
324	+}
325	+
326	__END__
327	diff --git a/utf8.h b/utf8.h
328	index 6d63897..3800866 100644
329	--- a/utf8.h
330	+++ b/utf8.h
331	@@ -198,6 +198,8 @@ encoded character.
332	UTF8_ALLOW_SURROGATE\|UTF8_ALLOW_FFFF)
333	#define UTF8_ALLOW_ANY 0x00FF
334	#define UTF8_CHECK_ONLY 0x0200
335	+#define UTF8_ALLOW_DEFAULT (ckWARN(WARN_UTF8) ? 0 : \
336	+ UTF8_ALLOW_ANYUV)
337
338	#define UNICODE_SURROGATE_FIRST 0xD800
339	#define UNICODE_SURROGATE_LAST 0xDFFF
340
341
342
343	--
344	gentoo-commits@l.g.o mailing list

Gentoo Archives: gentoo-commits