[gentoo-commits] repo/gentoo:master commit in: dev-ml/markup/, dev-ml/markup/files/ - gentoo-commits

From:	Alexis Ballier <aballier@g.o>
To:	gentoo-commits@l.g.o
Subject:	[gentoo-commits] repo/gentoo:master commit in: dev-ml/markup/, dev-ml/markup/files/
Date:	Thu, 01 Dec 2016 17:55:04
Message-Id:	`1480614892.02edd946d92a10dd27a13f2ece1868483d4c2880.aballier@gentoo`

1

commit:     02edd946d92a10dd27a13f2ece1868483d4c2880

2

Author:     Alexis Ballier <aballier <AT> gentoo <DOT> org>

3

AuthorDate: Thu Dec  1 17:54:24 2016 +0000

4

Commit:     Alexis Ballier <aballier <AT> gentoo <DOT> org>

5

CommitDate: Thu Dec  1 17:54:52 2016 +0000

6

URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=02edd946

7

8

dev-ml/markup: remove old

9

10

Package-Manager: portage-2.3.2

11

12

 dev-ml/markup/Manifest               |    1 -

13

 dev-ml/markup/files/test.patch       |  273 ---------

14

 dev-ml/markup/files/uutf.patch       | 1085 ----------------------------------

15

 dev-ml/markup/markup-0.7.2-r1.ebuild |   44 --

16

 4 files changed, 1403 deletions(-)

17

18

diff --git a/dev-ml/markup/Manifest b/dev-ml/markup/Manifest

19

index 1819e63..29247e7 100644

20

--- a/dev-ml/markup/Manifest

21

+++ b/dev-ml/markup/Manifest

22

@@ -1,2 +1 @@

23

-DIST markup-0.7.2.tar.gz 275010 SHA256 630a737ab6113e17999aacfd55f73b6671211d7980be86f0c711c0b385887c34 SHA512 72a87f54692a0b751c23e6b52bc4ecaa68334b0c6c067793cbf5b011b7d06ce7563f9aa2daeef3553ab48bb6cb9e592587b5a4f37279eaef7b45e19e5b372f73 WHIRLPOOL 679a01c5d197eadf1a8b74247e276405c182acff4c7781b577fbad9fcdc33be164ff81222e79c4e0e5193d1295ee4896ddda547cce1712bfb5ebda050f5bf5ac

24

 DIST markup-0.7.3.tar.gz 275094 SHA256 e1eb3562e0d26ccc33aa5dbe802e4210dbd7c30a8e69b6098b825afb11bb6af1 SHA512 e4577e438241d58c728507c88f14b7f029dbc4aa6b9c5dbf78f03b6c083a430026158c3146a88c14c9cd90a242b1bb4ed838b150bb89433fb6a6f673e5d2bb66 WHIRLPOOL ad967738706d4c017f266ecdef7b0772ce0bc17f9bc7dda228ffabc9cccccc88cde69337e063577d1fcda1e93cf4a7f18bbbf09709ee82f0a4b8382f5e339d2e

25

26

diff --git a/dev-ml/markup/files/test.patch b/dev-ml/markup/files/test.patch

27

deleted file mode 100644

28

index f2a5257..00000000

29

--- a/dev-ml/markup/files/test.patch

30

+++ /dev/null

31

@@ -1,273 +0,0 @@

32

-Index: markup.ml-0.7.2/test/test_encoding.ml

33

-===================================================================

34

---- markup.ml-0.7.2.orig/test/test_encoding.ml

35

-+++ markup.ml-0.7.2/test/test_encoding.ml

36

-@@ -15,9 +15,9 @@ let test_ucs_4 (f : Encoding.t) name s1

37

-   expect_error (1, 2) (`Decoding_error (bad_bytes, name))

38

-   begin fun report ->

39

-     let chars = s1 |> string |> f ~report in

40

--    next_option chars ok (assert_equal (Some (Char.code 'f')));

41

-+    next_option chars ok (assert_equal (Some ((Uchar.of_int (Char.code 'f')))));

42

-     next_option chars ok (assert_equal (Some Uutf.u_rep));

43

--    next_option chars ok (assert_equal (Some (Char.code 'o')));

44

-+    next_option chars ok (assert_equal (Some ((Uchar.of_int (Char.code 'o')))));

45

-     next_option chars ok (assert_equal None);

46

-     next_option chars ok (assert_equal None)

47

-   end;

48

-@@ -25,9 +25,9 @@ let test_ucs_4 (f : Encoding.t) name s1

49

-   expect_error (2, 2) (`Decoding_error ("\x00\x00\x00", name))

50

-   begin fun report ->

51

-     let chars = s2 |> string |> f ~report in

52

--    next_option chars ok (assert_equal (Some (Char.code 'f')));

53

--    next_option chars ok (assert_equal (Some 0x000A));

54

--    next_option chars ok (assert_equal (Some (Char.code 'o')));

55

-+    next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'f'))));

56

-+    next_option chars ok (assert_equal (Some (Uchar.of_int 0x000A)));

57

-+    next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'o'))));

58

-     next_option chars ok (assert_equal (Some Uutf.u_rep));

59

-     next_option chars ok (assert_equal None);

60

-     next_option chars ok (assert_equal None)

61

-@@ -38,12 +38,12 @@ let tests = [

62

-     let s = "\xef\xbb\xbffoo\xf0\x9f\x90\x99bar\xa0more" in

63

-     expect_error (1, 8) (`Decoding_error ("\xa0", "utf-8")) begin fun report ->

64

-       let chars = s |> string |> utf_8 ~report in

65

--      next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));

66

--      next_option chars ok (assert_equal (Some 0x1F419));

67

--      next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r']));

68

-+      next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));

69

-+      next_option chars ok (assert_equal (Some (Uchar.of_int 0x1F419)));

70

-+      next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r']));

71

-       next_option chars ok (assert_equal (Some Uutf.u_rep));

72

-       next_n 4 chars ok

73

--        (assert_equal (List.map Char.code ['m'; 'o'; 'r'; 'e']));

74

-+        (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['m'; 'o'; 'r'; 'e']));

75

-       next_option chars ok (assert_equal None);

76

-       next_option chars ok (assert_equal None)

77

-     end);

78

-@@ -53,11 +53,11 @@ let tests = [

79

-     expect_error (1, 6) (`Decoding_error ("\xdc\x19", "utf-16be"))

80

-     begin fun report ->

81

-       let chars = s |> string |> utf_16be ~report in

82

--      next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));

83

--      next_option chars ok (assert_equal (Some 0x1F419));

84

--      next_option chars ok (assert_equal (Some (Char.code 'b')));

85

-+      next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));

86

-+      next_option chars ok (assert_equal (Some (Uchar.of_int 0x1F419)));

87

-+      next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'b'))));

88

-       next_option chars ok (assert_equal (Some Uutf.u_rep));

89

--      next_n 16 chars ok (assert_equal (List.map Char.code ['a'; 'r']));

90

-+      next_n 16 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['a'; 'r']));

91

-       next_option chars ok (assert_equal None);

92

-       next_option chars ok (assert_equal None)

93

-     end);

94

-@@ -67,11 +67,11 @@ let tests = [

95

-     expect_error (1, 6) (`Decoding_error ("\x19\xdc", "utf-16le"))

96

-     begin fun report ->

97

-       let chars = s |> string |> utf_16le ~report in

98

--      next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));

99

--      next_option chars ok (assert_equal (Some 0x1F419));

100

--      next_option chars ok (assert_equal (Some (Char.code 'b')));

101

-+      next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));

102

-+      next_option chars ok (assert_equal (Some (Uchar.of_int 0x1F419)));

103

-+      next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'b'))));

104

-       next_option chars ok (assert_equal (Some Uutf.u_rep));

105

--      next_n 16 chars ok (assert_equal (List.map Char.code ['a'; 'r']));

106

-+      next_n 16 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['a'; 'r']));

107

-       next_option chars ok (assert_equal None);

108

-       next_option chars ok (assert_equal None)

109

-     end);

110

-@@ -79,7 +79,7 @@ let tests = [

111

-   ("encoding.iso_8859_1" >:: fun _ ->

112

-     let chars = string "foo\xa0" |> iso_8859_1 in

113

-     next_n 4 chars

114

--    ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'; '\xa0']));

115

-+    ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'; '\xa0']));

116

-     next_option chars ok (assert_equal None);

117

-     next_option chars ok (assert_equal None));

118

-

119

-@@ -88,26 +88,26 @@ let tests = [

120

-     expect_error (1, 4) (`Decoding_error ("\xa0", "us-ascii"))

121

-     begin fun report ->

122

-       let chars = s |> string |> us_ascii ~report in

123

--      next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));

124

-+      next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));

125

-       next_option chars ok (assert_equal (Some Uutf.u_rep));

126

--      next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r']));

127

-+      next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r']));

128

-       next_option chars ok (assert_equal None);

129

-       next_option chars ok (assert_equal None)

130

-     end);

131

-

132

-   ("encoding.windows_1251" >:: fun _ ->

133

-     let chars = string "foo\xe0\xe1\xe2bar" |> windows_1251 in

134

--    next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));

135

--    next_n 3 chars ok (assert_equal [0x0430; 0x0431; 0x0432]);

136

--    next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r']));

137

-+    next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));

138

-+    next_n 3 chars ok (assert_equal [Uchar.of_int 0x0430; Uchar.of_int 0x0431; Uchar.of_int 0x0432]);

139

-+    next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r']));

140

-     next_option chars ok (assert_equal None);

141

-     next_option chars ok (assert_equal None));

142

-

143

-   ("encoding.windows_1252" >:: fun _ ->

144

-     let chars = string "foo\x80\x83bar" |> windows_1252 in

145

--    next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));

146

--    next_n 2 chars ok (assert_equal [0x20AC; 0x0192]);

147

--    next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r']));

148

-+    next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));

149

-+    next_n 2 chars ok (assert_equal [Uchar.of_int 0x20AC; Uchar.of_int 0x0192]);

150

-+    next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r']));

151

-     next_option chars ok (assert_equal None);

152

-     next_option chars ok (assert_equal None));

153

-

154

-@@ -137,7 +137,7 @@ let tests = [

155

-

156

-   ("encoding.ebcdic" >:: fun _ ->

157

-     let chars = string "\x86\x96\x96" |> ebcdic in

158

--    next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));

159

-+    next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));

160

-     next_option chars ok (assert_equal None);

161

-     next_option chars ok (assert_equal None));

162

- ]

163

-Index: markup.ml-0.7.2/test/test_html_tokenizer.ml

164

-===================================================================

165

---- markup.ml-0.7.2.orig/test/test_html_tokenizer.ml

166

-+++ markup.ml-0.7.2/test/test_html_tokenizer.ml

167

-@@ -134,7 +134,7 @@ let tests = [

168

-     expect "&#1000000000000000000000000000000;"

169

-       [ 1,  1, E (`Bad_token ("&#1000000000000000000000000000000;",

170

-                               reference, "out of range"));

171

--        1,  1, S (`Char Uutf.u_rep);

172

-+        1,  1, S (`Char (Uchar.to_int Uutf.u_rep));

173

-         1, 35, S  `EOF];

174

-

175

-     expect "&#1000000000000000000000000000000"

176

-@@ -142,22 +142,22 @@ let tests = [

177

-                               reference, "missing ';' at end"));

178

-         1,  1, E (`Bad_token ("&#1000000000000000000000000000000",

179

-                               reference, "out of range"));

180

--        1,  1, S (`Char Uutf.u_rep);

181

-+        1,  1, S (`Char (Uchar.to_int Uutf.u_rep));

182

-         1, 34, S  `EOF];

183

-

184

-     expect "&#xD800;"

185

-       [ 1,  1, E (`Bad_token ("&#xD800;", reference, "out of range"));

186

--        1,  1, S (`Char Uutf.u_rep);

187

-+        1,  1, S (`Char (Uchar.to_int Uutf.u_rep));

188

-         1,  9, S  `EOF];

189

-

190

-     expect "&#x110000;"

191

-       [ 1,  1, E (`Bad_token ("&#x110000;", reference, "out of range"));

192

--        1,  1, S (`Char Uutf.u_rep);

193

-+        1,  1, S (`Char (Uchar.to_int Uutf.u_rep));

194

-         1, 11, S  `EOF];

195

-

196

-     expect "&#0;"

197

-       [ 1,  1, E (`Bad_token ("&#0;", reference, "out of range"));

198

--        1,  1, S (`Char Uutf.u_rep);

199

-+        1,  1, S (`Char (Uchar.to_int Uutf.u_rep));

200

-         1,  5, S  `EOF];

201

-

202

-     expect "&#x01;"

203

-@@ -264,7 +264,7 @@ let tests = [

204

-     expect ~state:`RCDATA "f\x00</foo>"

205

-       ([ 1,  1, S (`Char 0x66);

206

-          1,  2, E (`Bad_token ("U+0000", "content", "null"));

207

--         1,  2, S (`Char Uutf.u_rep)] @

208

-+         1,  2, S (`Char (Uchar.to_int Uutf.u_rep))] @

209

-        (char_sequence ~start:3 "</foo>"));

210

-

211

-     expect ~state:`RCDATA "<title>f</title >"

212

-@@ -302,7 +302,7 @@ let tests = [

213

-     expect ~state:`RAWTEXT "f\x00</foo>"

214

-       ([ 1,  1, S (`Char 0x66);

215

-          1,  2, E (`Bad_token ("U+0000", "content", "null"));

216

--         1,  2, S (`Char Uutf.u_rep)] @

217

-+         1,  2, S (`Char (Uchar.to_int Uutf.u_rep))] @

218

-        (char_sequence ~start:3 "</foo>")));

219

-

220

-   ("html.tokenizer.script-data" >:: fun _ ->

221

-@@ -330,7 +330,7 @@ let tests = [

222

-     expect ~state:`Script_data "f<!--o\x00o"

223

-       ((char_sequence ~no_eof:true "f<!--o") @

224

-        [1,  7, E (`Bad_token ("U+0000", "script", "null"));

225

--        1,  7, S (`Char Uutf.u_rep);

226

-+        1,  7, S (`Char (Uchar.to_int Uutf.u_rep));

227

-         1,  8, S (`Char 0x6F);

228

-         1,  9, E (`Unexpected_eoi "script");

229

-         1,  9, S  `EOF]);

230

-@@ -363,7 +363,7 @@ let tests = [

231

-     expect ~state:`Script_data "f<!--a-\x00-"

232

-       ((char_sequence ~no_eof:true "f<!--a-") @

233

-        [ 1,  8, E (`Bad_token ("U+0000", "script", "null"));

234

--         1,  8, S (`Char Uutf.u_rep);

235

-+         1,  8, S (`Char (Uchar.to_int Uutf.u_rep));

236

-          1,  9, S (`Char 0x02D);

237

-          1, 10, E (`Unexpected_eoi "script");

238

-          1, 10, S  `EOF]);

239

-@@ -371,7 +371,7 @@ let tests = [

240

-     expect ~state:`Script_data "f<!--a--\x00--"

241

-       ((char_sequence ~no_eof:true "f<!--a--") @

242

-        [ 1,  9, E (`Bad_token ("U+0000", "script", "null"));

243

--         1,  9, S (`Char Uutf.u_rep);

244

-+         1,  9, S (`Char (Uchar.to_int Uutf.u_rep));

245

-          1, 10, S (`Char 0x02D);

246

-          1, 11, S (`Char 0x02D);

247

-          1, 12, E (`Unexpected_eoi "script");

248

-@@ -380,14 +380,14 @@ let tests = [

249

-     expect ~state:`Script_data "f<!--<script>\x00"

250

-       ((char_sequence ~no_eof:true "f<!--<script>") @

251

-        [ 1, 14, E (`Bad_token ("U+0000", "script", "null"));

252

--         1, 14, S (`Char Uutf.u_rep);

253

-+         1, 14, S (`Char (Uchar.to_int Uutf.u_rep));

254

-          1, 15, E (`Unexpected_eoi "script");

255

-          1, 15, S  `EOF]);

256

-

257

-     expect ~state:`Script_data "f<!--<script>-\x00-"

258

-       ((char_sequence ~no_eof:true "f<!--<script>-") @

259

-        [ 1, 15, E (`Bad_token ("U+0000", "script", "null"));

260

--         1, 15, S (`Char Uutf.u_rep);

261

-+         1, 15, S (`Char (Uchar.to_int Uutf.u_rep));

262

-          1, 16, S (`Char 0x2D);

263

-          1, 17, E (`Unexpected_eoi "script");

264

-          1, 17, S  `EOF]);

265

-@@ -395,7 +395,7 @@ let tests = [

266

-     expect ~state:`Script_data "f<!--<script>--\x00--"

267

-       ((char_sequence ~no_eof:true "f<!--<script>--") @

268

-        [ 1, 16, E (`Bad_token ("U+0000", "script", "null"));

269

--         1, 16, S (`Char Uutf.u_rep);

270

-+         1, 16, S (`Char (Uchar.to_int Uutf.u_rep));

271

-          1, 17, S (`Char 0x2D);

272

-          1, 18, S (`Char 0x2D);

273

-          1, 19, E (`Unexpected_eoi "script");

274

-@@ -413,7 +413,7 @@ let tests = [

275

-     expect ~state:`Script_data "f\x00</foo>"

276

-       ([ 1,  1, S (`Char 0x66);

277

-          1,  2, E (`Bad_token ("U+0000", "content", "null"));

278

--         1,  2, S (`Char Uutf.u_rep)] @

279

-+         1,  2, S (`Char (Uchar.to_int Uutf.u_rep))] @

280

-        (char_sequence ~start:3 "</foo>")));

281

-

282

-   ("html.tokenizer.plaintext" >:: fun _ ->

283

-@@ -424,7 +424,7 @@ let tests = [

284

-     expect ~state:`PLAINTEXT "f\x00</foo>"

285

-       ([ 1,  1, S (`Char 0x66);

286

-          1,  2, E (`Bad_token ("U+0000", "content", "null"));

287

--         1,  2, S (`Char Uutf.u_rep)] @

288

-+         1,  2, S (`Char (Uchar.to_int Uutf.u_rep))] @

289

-        (char_sequence ~start:3 "</foo>")));

290

-

291

-   ("html.tokenizer.comment" >:: fun _ ->

292

-Index: markup.ml-0.7.2/test/test_input.ml

293

-===================================================================

294

---- markup.ml-0.7.2.orig/test/test_input.ml

295

-+++ markup.ml-0.7.2/test/test_input.ml

296

-@@ -71,7 +71,7 @@ let tests = [

297

-     end);

298

-

299

-   ("input.bom" >:: fun _ ->

300

--    [0xFEFF; 0x66]

301

-+    [Uchar.of_int 0xFEFF; Uchar.of_int 0x66]

302

-     |> of_list

303

-     |> preprocess is_valid_xml_char Error.ignore_errors

304

-     |> fst

305

306

diff --git a/dev-ml/markup/files/uutf.patch b/dev-ml/markup/files/uutf.patch

307

deleted file mode 100644

308

index f561084..00000000

309

--- a/dev-ml/markup/files/uutf.patch

310

+++ /dev/null

311

@@ -1,1085 +0,0 @@

312

-Index: markup.ml-0.7.2/src/common.ml

313

-===================================================================

314

---- markup.ml-0.7.2.orig/src/common.ml

315

-+++ markup.ml-0.7.2/src/common.ml

316

-@@ -134,7 +134,7 @@ let is_printable = is_in_range 0x0020 0x

317

- let char c =

318

-   if is_printable c then begin

319

-     let buffer = Buffer.create 4 in

320

--    add_utf_8 buffer c;

321

-+    add_utf_8 buffer (Uchar.of_int c);

322

-     Buffer.contents buffer

323

-   end

324

-   else

325

-Index: markup.ml-0.7.2/src/detect.ml

326

-===================================================================

327

---- markup.ml-0.7.2.orig/src/detect.ml

328

-+++ markup.ml-0.7.2/src/detect.ml

329

-@@ -222,7 +222,7 @@ let meta_tag_prescan =

330

-     let rec iterate () =

331

-       next source throw (fun () -> k "") (function

332

-         | c when c = quote -> k (Buffer.contents buffer)

333

--        | c -> add_utf_8 buffer (Char.code (Char.lowercase c)); iterate ())

334

-+        | c -> add_utf_8 buffer (Uchar.of_int (Char.code (Char.lowercase c))); iterate ())

335

-     in

336

-     iterate ()

337

-   in

338

-@@ -236,7 +236,7 @@ let meta_tag_prescan =

339

-           push source c;

340

-           k (Buffer.contents buffer)

341

-         | c ->

342

--          add_utf_8 buffer (Char.code (Char.lowercase c));

343

-+          add_utf_8 buffer (Uchar.of_int (Char.code (Char.lowercase c)));

344

-           iterate ())

345

-     in

346

-     iterate ()

347

-@@ -315,7 +315,7 @@ let meta_tag_prescan =

348

-               k (Buffer.contents buffer)

349

-

350

-             | Some c ->

351

--              add_utf_8 buffer (Char.code (Char.lowercase c));

352

-+              add_utf_8 buffer (Uchar.of_int (Char.code (Char.lowercase c)));

353

-               iterate ()

354

-           end

355

-         in

356

-Index: markup.ml-0.7.2/src/encoding.ml

357

-===================================================================

358

---- markup.ml-0.7.2.orig/src/encoding.ml

359

-+++ markup.ml-0.7.2/src/encoding.ml

360

-@@ -4,7 +4,7 @@

361

- open Common

362

- open Kstream

363

-

364

--type t = ?report:Error.parse_handler -> char Kstream.t -> int Kstream.t

365

-+type t = ?report:Error.parse_handler -> char Kstream.t -> Uchar.t Kstream.t

366

-

367

- let wrap f = fun ?(report = Error.ignore_errors) s -> f report s

368

-

369

-@@ -24,8 +24,8 @@ let _uutf_decoder encoding name =

370

-           k Uutf.u_rep)

371

-         | `Await ->

372

-           next bytes throw

373

--            (fun () -> Uutf.Manual.src decoder "" 0 0; run ())

374

--            (fun c -> Uutf.Manual.src decoder (String.make 1 c) 0 1; run ())

375

-+            (fun () -> Uutf.Manual.src decoder Bytes.empty 0 0; run ())

376

-+            (fun c -> Uutf.Manual.src decoder (Bytes.make 1 c) 0 1; run ())

377

-       in

378

-       run ())

379

-     |> make)

380

-@@ -87,7 +87,7 @@ let _ucs_4_decoder arrange name =

381

-               let skip =

382

-                 if !first then begin

383

-                   first := false;

384

--                  scalar = Uutf.u_bom

385

-+                  scalar = Uchar.to_int Uutf.u_bom

386

-                 end

387

-                 else

388

-                   false

389

-@@ -96,9 +96,9 @@ let _ucs_4_decoder arrange name =

390

-               if skip then run ()

391

-               else

392

-                 if scalar = 0x000A then

393

--                  newline k scalar

394

-+                  newline k (Uchar.of_int scalar)

395

-                 else

396

--                  char k scalar

397

-+                  char k (Uchar.of_int scalar)

398

-

399

-           | [] -> empty ()

400

-

401

-@@ -130,7 +130,7 @@ let code_page table =

402

-

403

-   (fun _ bytes ->

404

-     (fun throw empty k ->

405

--      next bytes throw empty (fun c -> k table.(Char.code c)))

406

-+      next bytes throw empty (fun c -> k (Uchar.of_int table.(Char.code c))))

407

-     |> make)

408

-   |> wrap

409

-

410

-Index: markup.ml-0.7.2/src/html_parser.ml

411

-===================================================================

412

---- markup.ml-0.7.2.orig/src/html_parser.ml

413

-+++ markup.ml-0.7.2/src/html_parser.ml

414

-@@ -1022,7 +1022,7 @@ let parse requested_context report (toke

415

-   let frameset_ok = ref true in

416

-   let head_seen = ref false in

417

-

418

--  let add_character = Text.add text in

419

-+  let add_character = (fun x y -> Text.add text x (Uchar.of_int y)) in

420

-

421

-   set_foreign (fun () ->

422

-     Stack.current_element_is_foreign context open_elements);

423

-@@ -2717,7 +2717,7 @@ let parse requested_context report (toke

424

-     | l, `Char 0 ->

425

-       report l (`Bad_token ("U+0000", "foreign content", "null")) !throw

426

-         (fun () ->

427

--      add_character l Uutf.u_rep;

428

-+      add_character l (Uchar.to_int Uutf.u_rep);

429

-       mode ())

430

-

431

-     | l, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020 as c) ->

432

-Index: markup.ml-0.7.2/src/html_tokenizer.ml

433

-===================================================================

434

---- markup.ml-0.7.2.orig/src/html_tokenizer.ml

435

-+++ markup.ml-0.7.2/src/html_tokenizer.ml

436

-@@ -252,7 +252,7 @@ let tokenize report (input, get_location

437

-                 report location

438

-                   (`Bad_token (prefix ^ text ^ semicolon, "character reference",

439

-                                "Windows-1252 character")) !throw (fun () ->

440

--                k (Some (`One n)))

441

-+                k (Some (`One (Uchar.of_int n))))

442

-

443

-               else

444

-                 match n with

445

-@@ -268,9 +268,9 @@ let tokenize report (input, get_location

446

-                     (`Bad_token (prefix ^ text ^ semicolon,

447

-                                  "character reference",

448

-                                  "invalid HTML character")) !throw (fun () ->

449

--                  k (Some (`One n)))

450

-+                  k (Some (`One (Uchar.of_int n))))

451

-

452

--                | n -> k (Some (`One n))

453

-+                | n -> k (Some (`One (Uchar.of_int n)))

454

-               end

455

-             end

456

-         in

457

-@@ -366,6 +366,10 @@ let tokenize report (input, get_location

458

-                   | _ -> unterminated ())

459

-         in

460

-

461

-+	let ma = function

462

-+	a, `One x -> (a, `One (Uchar.of_int x))

463

-+	| a, `Two (x,y) -> (a, `Two (Uchar.of_int x, Uchar.of_int y)) in

464

-+

465

-         let rec match_named best matched replace candidate =

466

-           next_option input !throw (function

467

-             | None -> finish best matched replace

468

-@@ -377,8 +381,8 @@ let tokenize report (input, get_location

469

-               | `None -> finish best matched (v::replace)

470

-               | `Continue -> match_named best matched (v::replace) candidate

471

-               | `Match_and_continue m ->

472

--                match_named (Some m) (v::(replace @ matched)) [] candidate

473

--              | `Match m -> finish (Some m) (v::matched) [])

474

-+                match_named (Some (ma m)) (v::(replace @ matched)) [] candidate

475

-+              | `Match m -> finish (Some (ma m)) (v::matched) [])

476

-         in

477

-         match_named None [] [] "")

478

-

479

-@@ -409,11 +413,11 @@ let tokenize report (input, get_location

480

-         emit (l, `Char 0x0026) state

481

-

482

-       | Some (`One c) ->

483

--        emit (l, `Char c) state

484

-+        emit (l, `Char (Uchar.to_int c)) state

485

-

486

-       | Some (`Two (c, c')) ->

487

--        emit (l, `Char c) (fun () ->

488

--        emit (l, `Char c') state)

489

-+        emit (l, `Char (Uchar.to_int c)) (fun () ->

490

-+        emit (l, `Char (Uchar.to_int c')) state)

491

-     end

492

-

493

-   (* 8.2.4.3. *)

494

-@@ -427,7 +431,7 @@ let tokenize report (input, get_location

495

-

496

-       | Some (l, 0) ->

497

-         report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () ->

498

--        emit (l, `Char Uutf.u_rep) rcdata_state)

499

-+        emit (l, `Char (Uchar.to_int Uutf.u_rep)) rcdata_state)

500

-

501

-       | None ->

502

-         emit_eof ()

503

-@@ -444,7 +448,7 @@ let tokenize report (input, get_location

504

-

505

-       | Some (l, 0) ->

506

-         report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () ->

507

--        emit (l, `Char Uutf.u_rep) rawtext_state)

508

-+        emit (l, `Char (Uchar.to_int Uutf.u_rep)) rawtext_state)

509

-

510

-       | None ->

511

-         emit_eof ()

512

-@@ -461,7 +465,7 @@ let tokenize report (input, get_location

513

-

514

-       | Some (l, 0) ->

515

-         report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () ->

516

--        emit_character l Uutf.u_rep script_data_state)

517

-+        emit_character l (Uchar.to_int Uutf.u_rep) script_data_state)

518

-

519

-       | None ->

520

-         emit_eof ()

521

-@@ -475,7 +479,7 @@ let tokenize report (input, get_location

522

-     next_option input !throw begin function

523

-       | Some (l, 0) ->

524

-         report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () ->

525

--        emit (l, `Char Uutf.u_rep) plaintext_state)

526

-+        emit (l, `Char (Uchar.to_int Uutf.u_rep)) plaintext_state)

527

-

528

-       | None ->

529

-         emit_eof ()

530

-@@ -501,7 +505,7 @@ let tokenize report (input, get_location

531

-         end_tag_open_state l' tag

532

-

533

-       | Some (_, c) when is_alphabetic c ->

534

--        add_utf_8 tag._tag_name (to_lowercase c);

535

-+        add_utf_8 tag._tag_name (Uchar.of_int (to_lowercase c));

536

-         tag_name_state l' tag

537

-

538

-       | Some (_, 0x003F) ->

539

-@@ -529,7 +533,7 @@ let tokenize report (input, get_location

540

-

541

-     next_option input !throw begin function

542

-       | Some (_, c) when is_alphabetic c ->

543

--        add_utf_8 tag._tag_name (to_lowercase c);

544

-+        add_utf_8 tag._tag_name (Uchar.of_int (to_lowercase c));

545

-         tag_name_state l' tag

546

-

547

-       | Some (_, 0x003E) ->

548

-@@ -569,7 +573,7 @@ let tokenize report (input, get_location

549

-         report (get_location ()) (`Unexpected_eoi "tag") !throw data_state

550

-

551

-       | Some (_, c) ->

552

--        add_utf_8 tag._tag_name (to_lowercase c);

553

-+        add_utf_8 tag._tag_name (Uchar.of_int (to_lowercase c));

554

-         tag_name_state l' tag

555

-     end

556

-

557

-@@ -589,7 +593,7 @@ let tokenize report (input, get_location

558

-     next_option input !throw begin function

559

-       | Some (_, c as v) when is_alphabetic c ->

560

-         let name_buffer = Buffer.create 32 in

561

--        add_utf_8 name_buffer (to_lowercase c);

562

-+        add_utf_8 name_buffer (Uchar.of_int (to_lowercase c));

563

-         text_end_tag_name_state state l' (v::cs) name_buffer

564

-

565

-       | maybe_v ->

566

-@@ -618,7 +622,7 @@ let tokenize report (input, get_location

567

-         emit_tag l' (create_tag ())

568

-

569

-       | Some ((_, c) as v) when is_alphabetic c ->

570

--        add_utf_8 name_buffer (to_lowercase c);

571

-+        add_utf_8 name_buffer (Uchar.of_int (to_lowercase c));

572

-         text_end_tag_name_state state l' (v::cs) name_buffer

573

-

574

-       | maybe_v ->

575

-@@ -676,7 +680,7 @@ let tokenize report (input, get_location

576

-

577

-       | Some (l, 0) ->

578

-         report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () ->

579

--        emit_character l Uutf.u_rep (fun () ->

580

-+        emit_character l (Uchar.to_int Uutf.u_rep) (fun () ->

581

-         script_data_escaped_state l'))

582

-

583

-       | None ->

584

-@@ -699,7 +703,7 @@ let tokenize report (input, get_location

585

-

586

-       | Some (l, 0) ->

587

-         report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () ->

588

--        emit_character l Uutf.u_rep (fun () ->

589

-+        emit_character l (Uchar.to_int Uutf.u_rep) (fun () ->

590

-         script_data_escaped_state l'))

591

-

592

-       | None ->

593

-@@ -725,7 +729,7 @@ let tokenize report (input, get_location

594

-

595

-       | Some (l, 0) ->

596

-         report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () ->

597

--        emit_character l Uutf.u_rep (fun () ->

598

-+        emit_character l (Uchar.to_int Uutf.u_rep) (fun () ->

599

-         script_data_escaped_state l'))

600

-

601

-       | None ->

602

-@@ -745,7 +749,7 @@ let tokenize report (input, get_location

603

-

604

-       | Some (_, c as v) when is_alphabetic c ->

605

-         let tag_buffer = Buffer.create 32 in

606

--        add_utf_8 tag_buffer (to_lowercase c);

607

-+        add_utf_8 tag_buffer (Uchar.of_int (to_lowercase c));

608

-         emit_characters (List.rev (v::cs)) (fun () ->

609

-         script_data_double_escape_start_state l' tag_buffer)

610

-

611

-@@ -765,7 +769,7 @@ let tokenize report (input, get_location

612

-         else script_data_escaped_state l')

613

-

614

-       | Some (l, c) when is_alphabetic c ->

615

--        add_utf_8 tag_buffer (to_lowercase c);

616

-+        add_utf_8 tag_buffer (Uchar.of_int (to_lowercase c));

617

-         emit_character l c (fun () ->

618

-         script_data_double_escape_start_state l' tag_buffer)

619

-

620

-@@ -787,7 +791,7 @@ let tokenize report (input, get_location

621

-

622

-       | Some (l, 0) ->

623

-         report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () ->

624

--        emit_character l Uutf.u_rep (fun () ->

625

-+        emit_character l (Uchar.to_int Uutf.u_rep) (fun () ->

626

-         script_data_double_escaped_state l'))

627

-

628

-       | None ->

629

-@@ -811,7 +815,7 @@ let tokenize report (input, get_location

630

-

631

-       | Some (l, 0) ->

632

-         report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () ->

633

--        emit_character l Uutf.u_rep (fun () ->

634

-+        emit_character l (Uchar.to_int Uutf.u_rep) (fun () ->

635

-         script_data_double_escaped_state l'))

636

-

637

-       | None ->

638

-@@ -838,7 +842,7 @@ let tokenize report (input, get_location

639

-

640

-       | Some (l, 0) ->

641

-         report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () ->

642

--        emit_character l Uutf.u_rep (fun () ->

643

-+        emit_character l (Uchar.to_int Uutf.u_rep) (fun () ->

644

-         script_data_double_escaped_state l'))

645

-

646

-       | None ->

647

-@@ -872,7 +876,7 @@ let tokenize report (input, get_location

648

-         else script_data_double_escaped_state l')

649

-

650

-       | Some (l, c) when is_alphabetic c ->

651

--        add_utf_8 tag_buffer (to_lowercase c);

652

-+        add_utf_8 tag_buffer (Uchar.of_int (to_lowercase c));

653

-         emit_character l c (fun () ->

654

-         script_data_double_escape_end_state l' tag_buffer)

655

-

656

-@@ -910,10 +914,10 @@ let tokenize report (input, get_location

657

-       | Some (l, (0x0022 | 0x0027 | 0x003C | 0x003D as c)) ->

658

-         report l (`Bad_token (char c, "attribute name",

659

-                               "invalid start character")) !throw (fun () ->

660

--        start_attribute c)

661

-+        start_attribute (Uchar.of_int c))

662

-

663

-       | Some (_, c) ->

664

--        start_attribute (to_lowercase c)

665

-+        start_attribute (Uchar.of_int (to_lowercase c))

666

-     end

667

-

668

-   (* 8.2.4.35. *)

669

-@@ -942,14 +946,14 @@ let tokenize report (input, get_location

670

-       | Some (l, (0x0022 | 0x0027 | 0x003C as c)) ->

671

-         report l (`Bad_token (char c, "attribute name",

672

-                               "invalid name character")) !throw (fun () ->

673

--        add_utf_8 name_buffer c;

674

-+        add_utf_8 name_buffer (Uchar.of_int c);

675

-         attribute_name_state l' tag name_buffer)

676

-

677

-       | None ->

678

-         report (get_location ()) (`Unexpected_eoi "tag") !throw data_state

679

-

680

-       | Some (_, c) ->

681

--        add_utf_8 name_buffer (to_lowercase c);

682

-+        add_utf_8 name_buffer (Uchar.of_int (to_lowercase c));

683

-         attribute_name_state l' tag name_buffer

684

-     end

685

-

686

-@@ -985,13 +989,13 @@ let tokenize report (input, get_location

687

-       | Some (l, (0x0022 | 0x0027 | 0x003C as c)) ->

688

-         report l (`Bad_token (char c, "attribute name",

689

-                               "invalid start character")) !throw (fun () ->

690

--        start_next_attribute c)

691

-+        start_next_attribute (Uchar.of_int c))

692

-

693

-       | None ->

694

-         report (get_location ()) (`Unexpected_eoi "tag") !throw data_state

695

-

696

-       | Some (_, c) ->

697

--        start_next_attribute (to_lowercase c)

698

-+        start_next_attribute (Uchar.of_int (to_lowercase c))

699

-     end

700

-

701

-   (* 8.2.4.37. *)

702

-@@ -1030,13 +1034,13 @@ let tokenize report (input, get_location

703

-       | Some (l, (0x003C | 0x003D | 0x0060 as c)) ->

704

-         report l (`Bad_token (char c, "attribute value",

705

-                               "invalid start character")) !throw (fun () ->

706

--        start_value attribute_value_unquoted_state (Some c))

707

-+        start_value attribute_value_unquoted_state (Some (Uchar.of_int c)))

708

-

709

-       | None ->

710

-         report (get_location ()) (`Unexpected_eoi "tag") !throw data_state

711

-

712

-       | Some (_, c) ->

713

--        start_value attribute_value_unquoted_state (Some c)

714

-+        start_value attribute_value_unquoted_state (Some (Uchar.of_int c))

715

-     end

716

-

717

-   (* 8.2.4.38 and 8.2.4.39. *)

718

-@@ -1062,7 +1066,7 @@ let tokenize report (input, get_location

719

-           data_state

720

-

721

-       | Some (_, c) ->

722

--        add_utf_8 value_buffer c;

723

-+        add_utf_8 value_buffer (Uchar.of_int c);

724

-         attribute_value_quoted_state quote l' tag name value_buffer

725

-     end

726

-

727

-@@ -1092,14 +1096,14 @@ let tokenize report (input, get_location

728

-       | Some (l, (0x0022 | 0x0027 | 0x003C | 0x003D | 0x0060 as c)) ->

729

-         report l (`Bad_token (char c, "attribute value",

730

-                               "invalid character")) !throw (fun () ->

731

--        add_utf_8 value_buffer c;

732

-+        add_utf_8 value_buffer (Uchar.of_int c);

733

-         attribute_value_unquoted_state l' tag name value_buffer)

734

-

735

-       | None ->

736

-         report (get_location ()) (`Unexpected_eoi "tag") !throw data_state

737

-

738

-       | Some (_, c) ->

739

--        add_utf_8 value_buffer c;

740

-+        add_utf_8 value_buffer (Uchar.of_int c);

741

-         attribute_value_unquoted_state l' tag name value_buffer

742

-     end

743

-

744

-@@ -1107,7 +1111,7 @@ let tokenize report (input, get_location

745

-   and character_reference_in_attribute allowed l value_buffer k =

746

-     consume_character_reference true (Some allowed) l begin function

747

-       | None ->

748

--        add_utf_8 value_buffer 0x0026;

749

-+        add_utf_8 value_buffer (Uchar.of_int 0x0026);

750

-         k ()

751

-

752

-       | Some (`One c) ->

753

-@@ -1176,7 +1180,7 @@ let tokenize report (input, get_location

754

-           emit_comment l' buffer

755

-

756

-         | Some (_, c) ->

757

--          add_utf_8 buffer c;

758

-+          add_utf_8 buffer (Uchar.of_int c);

759

-           consume ()

760

-       end

761

-     in

762

-@@ -1239,7 +1243,7 @@ let tokenize report (input, get_location

763

-         emit_comment l' buffer)

764

-

765

-       | Some (_, c) ->

766

--        add_utf_8 buffer c;

767

-+        add_utf_8 buffer (Uchar.of_int c);

768

-         comment_state l' buffer

769

-     end

770

-

771

-@@ -1266,7 +1270,7 @@ let tokenize report (input, get_location

772

-

773

-       | Some (_, c) ->

774

-         Buffer.add_char buffer '-';

775

--        add_utf_8 buffer c;

776

-+        add_utf_8 buffer (Uchar.of_int c);

777

-         comment_state l' buffer

778

-     end

779

-

780

-@@ -1286,7 +1290,7 @@ let tokenize report (input, get_location

781

-         emit_comment l' buffer)

782

-

783

-       | Some (_, c) ->

784

--        add_utf_8 buffer c;

785

-+        add_utf_8 buffer (Uchar.of_int c);

786

-         comment_state l' buffer

787

-     end

788

-

789

-@@ -1308,7 +1312,7 @@ let tokenize report (input, get_location

790

-

791

-       | Some (_, c) ->

792

-         Buffer.add_char buffer '-';

793

--        add_utf_8 buffer c;

794

-+        add_utf_8 buffer (Uchar.of_int c);

795

-         comment_state l' buffer

796

-     end

797

-

798

-@@ -1343,7 +1347,7 @@ let tokenize report (input, get_location

799

-         report l (`Bad_token ("--" ^ (char c), "comment",

800

-                               "'--' should be in '-->'")) !throw (fun () ->

801

-         Buffer.add_string buffer "--";

802

--        add_utf_8 buffer c;

803

-+        add_utf_8 buffer (Uchar.of_int c);

804

-         comment_state l' buffer)

805

-     end

806

-

807

-@@ -1369,7 +1373,7 @@ let tokenize report (input, get_location

808

-

809

-       | Some (_, c) ->

810

-         Buffer.add_string buffer "--!";

811

--        add_utf_8 buffer c;

812

-+        add_utf_8 buffer (Uchar.of_int c);

813

-         comment_state l' buffer

814

-     end

815

-

816

-@@ -1420,7 +1424,7 @@ let tokenize report (input, get_location

817

-

818

-       | Some (_, c) ->

819

-         doctype._doctype_name <-

820

--          add_doctype_char doctype._doctype_name (to_lowercase c);

821

-+          add_doctype_char doctype._doctype_name (Uchar.of_int (to_lowercase c));

822

-         doctype_name_state l' doctype

823

-     end

824

-

825

-@@ -1445,7 +1449,7 @@ let tokenize report (input, get_location

826

-

827

-       | Some (_, c) ->

828

-         doctype._doctype_name <-

829

--          add_doctype_char doctype._doctype_name (to_lowercase c);

830

-+          add_doctype_char doctype._doctype_name (Uchar.of_int (to_lowercase c));

831

-         doctype_name_state l' doctype

832

-     end

833

-

834

-@@ -1574,7 +1578,7 @@ let tokenize report (input, get_location

835

-         emit_doctype ~quirks:true l' doctype)

836

-

837

-       | Some (_, c) ->

838

--        add doctype c;

839

-+        add doctype (Uchar.of_int c);

840

-         doctype_identifier_quoted_state add quote next_state l' doctype

841

-     end

842

-

843

-Index: markup.ml-0.7.2/src/html_writer.ml

844

-===================================================================

845

---- markup.ml-0.7.2.orig/src/html_writer.ml

846

-+++ markup.ml-0.7.2/src/html_writer.ml

847

-@@ -8,7 +8,7 @@ let _escape_attribute s =

848

-   Uutf.String.fold_utf_8 (fun () _ -> function

849

-     | `Malformed _ -> ()

850

-     | `Uchar c ->

851

--      match c with

852

-+      match (Uchar.to_int c) with

853

-       | 0x0026 -> Buffer.add_string buffer "&amp;"

854

-       | 0x00A0 -> Buffer.add_string buffer "&nbsp;"

855

-       | 0x0022 -> Buffer.add_string buffer "&quot;"

856

-@@ -21,7 +21,7 @@ let _escape_text s =

857

-   Uutf.String.fold_utf_8 (fun () _ -> function

858

-     | `Malformed _ -> ()

859

-     | `Uchar c ->

860

--      match c with

861

-+      match (Uchar.to_int c) with

862

-       | 0x0026 -> Buffer.add_string buffer "&amp;"

863

-       | 0x00A0 -> Buffer.add_string buffer "&nbsp;"

864

-       | 0x003C -> Buffer.add_string buffer "&lt;"

865

-Index: markup.ml-0.7.2/src/input.ml

866

-===================================================================

867

---- markup.ml-0.7.2.orig/src/input.ml

868

-+++ markup.ml-0.7.2/src/input.ml

869

-@@ -27,13 +27,13 @@ let preprocess is_valid_char report sour

870

-       in

871

-

872

-       let rec iterate () =

873

--        next source throw empty (function

874

-+        next source throw empty (fun x -> match Uchar.to_int x with

875

-           | 0xFEFF when !first_char -> first_char := false; iterate ()

876

-

877

-           | 0x0D ->

878

--            next source throw newline (function

879

-+            next source throw newline (fun y -> match Uchar.to_int y with

880

-               | 0x0A -> newline ()

881

--              | c -> push source c; newline ())

882

-+              | c -> push source (Uchar.of_int c); newline ())

883

-

884

-           | 0x0A -> newline ()

885

-

886

-Index: markup.ml-0.7.2/src/input.mli

887

-===================================================================

888

---- markup.ml-0.7.2.orig/src/input.mli

889

-+++ markup.ml-0.7.2/src/input.mli

890

-@@ -4,5 +4,5 @@

891

- open Common

892

-

893

- val preprocess :

894

--  (int -> bool) -> Error.parse_handler -> int Kstream.t ->

895

-+  (int -> bool) -> Error.parse_handler -> Uchar.t Kstream.t ->

896

-     (location * int) Kstream.t * (unit -> location)

897

-Index: markup.ml-0.7.2/src/markup.ml

898

-===================================================================

899

---- markup.ml-0.7.2.orig/src/markup.ml

900

-+++ markup.ml-0.7.2/src/markup.ml

901

-@@ -187,7 +187,7 @@ sig

902

-

903

-     val decode :

904

-       ?report:(location -> Error.t -> unit io) -> t ->

905

--      (char, _) stream -> (int, async) stream

906

-+      (char, _) stream -> (Uchar.t, async) stream

907

-   end

908

-

909

-   val parse_xml :

910

-Index: markup.ml-0.7.2/src/markup.mli

911

-===================================================================

912

---- markup.ml-0.7.2.orig/src/markup.mli

913

-+++ markup.ml-0.7.2/src/markup.mli

914

-@@ -194,7 +194,7 @@ sig

915

-

916

-   val decode :

917

-     ?report:(location -> Error.t -> unit) -> t ->

918

--    (char, 's) stream -> (int, 's) stream

919

-+    (char, 's) stream -> (Uchar.t, 's) stream

920

-   (** Applies a decoder to a byte stream. Illegal input byte sequences result in

921

-       calls to the error handler [~report] with error kind [`Decoding_error].

922

-       The illegal bytes are then skipped, and zero or more U+FFFD replacement

923

-@@ -764,7 +764,7 @@ sig

924

-

925

-     val decode :

926

-       ?report:(location -> Error.t -> unit io) -> Encoding.t ->

927

--      (char, _) stream -> (int, async) stream

928

-+      (char, _) stream -> (Uchar.t, async) stream

929

-   end

930

-

931

-   (** {2 XML} *)

932

-@@ -838,7 +838,7 @@ val kstream : ('a, _) stream -> 'a Kstre

933

- val of_kstream : 'a Kstream.t -> ('a, _) stream

934

-

935

- val preprocess_input_stream :

936

--  (int, 's) stream -> (location * int, 's) stream * (unit -> location)

937

-+  (Uchar.t, 's) stream -> (location * int, 's) stream * (unit -> location)

938

-

939

- (**/**)

940

-

941

-Index: markup.ml-0.7.2/src/utility.ml

942

-===================================================================

943

---- markup.ml-0.7.2.orig/src/utility.ml

944

-+++ markup.ml-0.7.2/src/utility.ml

945

-@@ -346,11 +346,11 @@ let xhtml_entity name =

946

-

947

-     match lookup 0 with

948

-     | `One c ->

949

--      add_utf_8 buffer c;

950

-+      add_utf_8 buffer (Uchar.of_int c);

951

-       Some (Buffer.contents buffer)

952

-     | `Two (c, c') ->

953

--      add_utf_8 buffer c;

954

--      add_utf_8 buffer c';

955

-+      add_utf_8 buffer (Uchar.of_int c);

956

-+      add_utf_8 buffer (Uchar.of_int c');

957

-       Some (Buffer.contents buffer)

958

-

959

-   with Exit -> None

960

-Index: markup.ml-0.7.2/src/xml_tokenizer.ml

961

-===================================================================

962

---- markup.ml-0.7.2.orig/src/xml_tokenizer.ml

963

-+++ markup.ml-0.7.2/src/xml_tokenizer.ml

964

-@@ -101,7 +101,7 @@ let tokenize report resolve_reference (i

965

-               end

966

-

967

-           | _, c when filter c ->

968

--            add_utf_8 buffer c;

969

-+            add_utf_8 buffer (Uchar.of_int c);

970

-             read ()

971

-

972

-           | l, c ->

973

-@@ -133,7 +133,7 @@ let tokenize report resolve_reference (i

974

-

975

-       | _, c when is_name_start_char c ->

976

-         let buffer = Buffer.create 32 in

977

--        add_utf_8 buffer c;

978

-+        add_utf_8 buffer (Uchar.of_int c);

979

-         let rec read () =

980

-           next input !throw unexpected_eoi begin function

981

-             | _, 0x003B ->

982

-@@ -146,7 +146,7 @@ let tokenize report resolve_reference (i

983

-               end

984

-

985

-             | _, c when is_name_char c ->

986

--              add_utf_8 buffer c;

987

-+              add_utf_8 buffer (Uchar.of_int c);

988

-               read ()

989

-

990

-             | l, c ->

991

-@@ -218,7 +218,7 @@ let tokenize report resolve_reference (i

992

-           report_if (not @@ is_name_start_char c) l (fun () ->

993

-             `Bad_token (char c, "attribute", "invalid start character"))

994

-             !throw (fun () ->

995

--          add_utf_8 name_buffer c;

996

-+          add_utf_8 name_buffer (Uchar.of_int c);

997

-           name_state ())

998

-       end

999

-

1000

-@@ -235,7 +235,7 @@ let tokenize report resolve_reference (i

1001

-           report_if (not @@ is_name_start_char c) l (fun () ->

1002

-             `Bad_token (char c, "attribute", "invalid name character"))

1003

-             !throw (fun () ->

1004

--          add_utf_8 name_buffer c;

1005

-+          add_utf_8 name_buffer (Uchar.of_int c);

1006

-           name_state ())

1007

-       end

1008

-

1009

-@@ -275,14 +275,14 @@ let tokenize report resolve_reference (i

1010

-           report l

1011

-             (`Bad_token ("&", "attribute", "replace with '&amp;'"))

1012

-             !throw (fun () ->

1013

--          add_utf_8 value_buffer 0x0026;

1014

-+          add_utf_8 value_buffer (Uchar.of_int 0x0026);

1015

-           state ())

1016

-       end

1017

-

1018

-     and handle_lt l state =

1019

-       report l (`Bad_token ("<", "attribute", "replace with '&lt;'")) !throw

1020

-         (fun () ->

1021

--      add_utf_8 value_buffer 0x003C;

1022

-+      add_utf_8 value_buffer (Uchar.of_int 0x003C);

1023

-       state ())

1024

-

1025

-     and quoted_value_state quote =

1026

-@@ -300,7 +300,7 @@ let tokenize report resolve_reference (i

1027

-           quoted_value_state quote)

1028

-

1029

-         | _, c ->

1030

--          add_utf_8 value_buffer c;

1031

-+          add_utf_8 value_buffer (Uchar.of_int c);

1032

-           quoted_value_state quote

1033

-       end

1034

-

1035

-@@ -317,7 +317,7 @@ let tokenize report resolve_reference (i

1036

-           handle_lt l unquoted_value_state

1037

-

1038

-         | _, c ->

1039

--          add_utf_8 value_buffer c;

1040

-+          add_utf_8 value_buffer (Uchar.of_int c);

1041

-           unquoted_value_state ()

1042

-       end

1043

-

1044

-@@ -372,7 +372,7 @@ let tokenize report resolve_reference (i

1045

-           report_if (not @@ is_name_start_char c) l (fun () ->

1046

-             `Bad_token (char c, pi, "invalid start character")) !throw

1047

-             (fun () ->

1048

--          add_utf_8 target_buffer c;

1049

-+          add_utf_8 target_buffer (Uchar.of_int c);

1050

-           target_state ())

1051

-       end

1052

-

1053

-@@ -388,13 +388,13 @@ let tokenize report resolve_reference (i

1054

-           report_if (not @@ is_name_char c) l (fun () ->

1055

-             `Bad_token (char c, pi, "invalid name character")) !throw

1056

-             (fun () ->

1057

--          add_utf_8 target_buffer c;

1058

-+          add_utf_8 target_buffer (Uchar.of_int c);

1059

-           target_state ())

1060

-       end

1061

-

1062

-     and text_state () =

1063

-       next' pi finish_pi (fun (_, c) ->

1064

--        add_utf_8 text_buffer c;

1065

-+        add_utf_8 text_buffer (Uchar.of_int c);

1066

-         text_state ())

1067

-

1068

-     and xml_declaration_state () =

1069

-@@ -572,7 +572,7 @@ let tokenize report resolve_reference (i

1070

-   and initial_state () =

1071

-     next input !throw (fun () -> emit_eoi ()) begin function

1072

-       | l, (0x005D as c) ->

1073

--        add_character l c;

1074

-+        add_character l (Uchar.of_int c);

1075

-         one_bracket_state l

1076

-

1077

-       | l, 0x003C ->

1078

-@@ -583,7 +583,7 @@ let tokenize report resolve_reference (i

1079

-           | None ->

1080

-             report l (`Bad_token (char c, "text", "replace with '&amp;'"))

1081

-               !throw (fun () ->

1082

--            add_character l c;

1083

-+            add_character l (Uchar.of_int c);

1084

-             initial_state ())

1085

-

1086

-           | Some s ->

1087

-@@ -591,14 +591,14 @@ let tokenize report resolve_reference (i

1088

-             initial_state ())

1089

-

1090

-       | l, c ->

1091

--        add_character l c;

1092

-+        add_character l (Uchar.of_int c);

1093

-         initial_state ()

1094

-     end

1095

-

1096

-   and one_bracket_state l' =

1097

-     next_option input !throw begin function

1098

-       | Some (l, (0x005D as c)) ->

1099

--        add_character l c;

1100

-+        add_character l (Uchar.of_int c);

1101

-         two_brackets_state l' l

1102

-

1103

-       | v ->

1104

-@@ -611,11 +611,11 @@ let tokenize report resolve_reference (i

1105

-       | Some (l, (0x003E as c)) ->

1106

-         report l' (`Bad_token ("]]>", "text", "must end a CDATA section"))

1107

-           !throw (fun () ->

1108

--        add_character l c;

1109

-+        add_character l (Uchar.of_int c);

1110

-         initial_state ())

1111

-

1112

-       | Some (l, (0x005D as c)) ->

1113

--        add_character l c;

1114

-+        add_character l (Uchar.of_int c);

1115

-         two_brackets_state l'' l

1116

-

1117

-       | v ->

1118

-@@ -626,7 +626,7 @@ let tokenize report resolve_reference (i

1119

-   and begin_markup_state l' =

1120

-     let recover v =

1121

-       lt_in_text l' (fun () ->

1122

--      add_character l' 0x003C;

1123

-+      add_character l' (Uchar.of_int 0x003C);

1124

-       push_option input v;

1125

-       initial_state ())

1126

-     in

1127

-@@ -648,7 +648,7 @@ let tokenize report resolve_reference (i

1128

-

1129

-       | _, c when is_name_start_char c ->

1130

-         let tag_name_buffer = Buffer.create 32 in

1131

--        add_utf_8 tag_name_buffer c;

1132

-+        add_utf_8 tag_name_buffer (Uchar.of_int c);

1133

-         start_tag_state l' tag_name_buffer

1134

-

1135

-       | l, c as v ->

1136

-@@ -660,7 +660,7 @@ let tokenize report resolve_reference (i

1137

-   and start_tag_state l' buffer =

1138

-     let recover v =

1139

-       lt_in_text l' (fun () ->

1140

--      add_character l' 0x003C;

1141

-+      add_character l' (Uchar.of_int 0x003C);

1142

-       add_string l' (Buffer.contents buffer);

1143

-       push_option input v;

1144

-       initial_state ())

1145

-@@ -680,7 +680,7 @@ let tokenize report resolve_reference (i

1146

-         attributes_state l' (Buffer.contents buffer) []

1147

-

1148

-       | _, c when is_name_char c ->

1149

--        add_utf_8 buffer c;

1150

-+        add_utf_8 buffer (Uchar.of_int c);

1151

-         start_tag_state l' buffer

1152

-

1153

-       | l, c as v ->

1154

-@@ -731,8 +731,8 @@ let tokenize report resolve_reference (i

1155

-   and end_tag_state l' =

1156

-     let recover v =

1157

-       lt_in_text l' (fun () ->

1158

--      add_character l' 0x003C;

1159

--      add_character l' 0x002F;

1160

-+      add_character l' (Uchar.of_int 0x003C);

1161

-+      add_character l' (Uchar.of_int 0x002F);

1162

-       push_option input v;

1163

-       initial_state ())

1164

-     in

1165

-@@ -743,7 +743,7 @@ let tokenize report resolve_reference (i

1166

-     begin function

1167

-       | _, c when is_name_start_char c ->

1168

-         let name_buffer = Buffer.create 32 in

1169

--        add_utf_8 name_buffer c;

1170

-+        add_utf_8 name_buffer (Uchar.of_int c);

1171

-         end_tag_name_state l' name_buffer

1172

-

1173

-       | l, c as v ->

1174

-@@ -755,8 +755,8 @@ let tokenize report resolve_reference (i

1175

-   and end_tag_name_state l' buffer =

1176

-     let recover v =

1177

-       lt_in_text l' (fun () ->

1178

--      add_character l' 0x003C;

1179

--      add_character l' 0x002F;

1180

-+      add_character l' (Uchar.of_int 0x003C);

1181

-+      add_character l' (Uchar.of_int 0x002F);

1182

-       add_string l' (Buffer.contents buffer);

1183

-       push_option input v;

1184

-       initial_state ())

1185

-@@ -773,7 +773,7 @@ let tokenize report resolve_reference (i

1186

-         end_tag_whitespace_state false l' (Buffer.contents buffer)

1187

-

1188

-       | _, c when is_name_char c ->

1189

--        add_utf_8 buffer c;

1190

-+        add_utf_8 buffer (Uchar.of_int c);

1191

-         end_tag_name_state l' buffer

1192

-

1193

-       | l, c as v ->

1194

-@@ -821,8 +821,8 @@ let tokenize report resolve_reference (i

1195

-

1196

-       | v ->

1197

-         bad_comment_start "<!" l' (fun () ->

1198

--        add_character l' 0x003C;

1199

--        add_character l' 0x0021;

1200

-+        add_character l' (Uchar.of_int 0x003C);

1201

-+        add_character l' (Uchar.of_int 0x0021);

1202

-         push_option input v;

1203

-         initial_state ())

1204

-     end

1205

-@@ -834,9 +834,9 @@ let tokenize report resolve_reference (i

1206

-

1207

-       | v ->

1208

-         bad_comment_start "<!-" l' (fun () ->

1209

--        add_character l' 0x003C;

1210

--        add_character l' 0x0021;

1211

--        add_character l' 0x002D;

1212

-+        add_character l' (Uchar.of_int 0x003C);

1213

-+        add_character l' (Uchar.of_int 0x0021);

1214

-+        add_character l' (Uchar.of_int 0x002D);

1215

-         push_option input v;

1216

-         initial_state ())

1217

-     end

1218

-@@ -852,7 +852,7 @@ let tokenize report resolve_reference (i

1219

-         comment_one_dash_state l' l buffer

1220

-

1221

-       | _, c ->

1222

--        add_utf_8 buffer c;

1223

-+        add_utf_8 buffer (Uchar.of_int c);

1224

-         comment_state l' buffer

1225

-     end

1226

-

1227

-@@ -863,8 +863,8 @@ let tokenize report resolve_reference (i

1228

-         comment_two_dashes_state false l' l'' buffer

1229

-

1230

-       | _, c ->

1231

--        add_utf_8 buffer 0x002D;

1232

--        add_utf_8 buffer c;

1233

-+        add_utf_8 buffer (Uchar.of_int 0x002D);

1234

-+        add_utf_8 buffer (Uchar.of_int c);

1235

-         comment_state l' buffer

1236

-     end

1237

-

1238

-@@ -883,14 +883,14 @@ let tokenize report resolve_reference (i

1239

-

1240

-       | _, 0x002D ->

1241

-         recover (fun () ->

1242

--        add_utf_8 buffer 0x002D;

1243

-+        add_utf_8 buffer (Uchar.of_int 0x002D);

1244

-         comment_two_dashes_state true l' l'' buffer)

1245

-

1246

-       | _, c ->

1247

-         recover (fun () ->

1248

--        add_utf_8 buffer 0x002D;

1249

--        add_utf_8 buffer 0x002D;

1250

--        add_utf_8 buffer c;

1251

-+        add_utf_8 buffer (Uchar.of_int 0x002D);

1252

-+        add_utf_8 buffer (Uchar.of_int 0x002D);

1253

-+        add_utf_8 buffer (Uchar.of_int c);

1254

-         comment_state l' buffer)

1255

-     end

1256

-

1257

-@@ -905,9 +905,9 @@ let tokenize report resolve_reference (i

1258

-           !throw (fun () ->

1259

-         lt_in_text l' (fun () ->

1260

-         push_list input cs;

1261

--        add_character l' 0x003C;

1262

--        add_character l' 0x0021;

1263

--        add_character l' 0x005B;

1264

-+        add_character l' (Uchar.of_int 0x003C);

1265

-+        add_character l' (Uchar.of_int 0x0021);

1266

-+        add_character l' (Uchar.of_int 0x005B);

1267

-         initial_state ()))

1268

-     end

1269

-

1270

-@@ -918,7 +918,7 @@ let tokenize report resolve_reference (i

1271

-         cdata_one_bracket_state l' l

1272

-

1273

-       | l, c ->

1274

--        add_character l c;

1275

-+        add_character l (Uchar.of_int c);

1276

-         cdata_state l'

1277

-     end

1278

-

1279

-@@ -929,8 +929,8 @@ let tokenize report resolve_reference (i

1280

-         cdata_two_brackets_state l' l'' l

1281

-

1282

-       | l, c ->

1283

--        add_character l'' 0x005D;

1284

--        add_character l c;

1285

-+        add_character l'' (Uchar.of_int 0x005D);

1286

-+        add_character l   (Uchar.of_int c);

1287

-         cdata_state l'

1288

-     end

1289

-

1290

-@@ -941,13 +941,13 @@ let tokenize report resolve_reference (i

1291

-         initial_state ()

1292

-

1293

-       | l, 0x005D ->

1294

--        add_character l'' 0x005D;

1295

-+        add_character l'' (Uchar.of_int 0x005D);

1296

-         cdata_two_brackets_state l' l''' l

1297

-

1298

-       | l, c ->

1299

--        add_character l'' 0x005D;

1300

--        add_character l''' 0x005D;

1301

--        add_character l c;

1302

-+        add_character l'' (Uchar.of_int 0x005D);

1303

-+        add_character l''' (Uchar.of_int 0x005D);

1304

-+        add_character l (Uchar.of_int c);

1305

-         cdata_state l'

1306

-     end

1307

-

1308

-@@ -963,9 +963,9 @@ let tokenize report resolve_reference (i

1309

-           !throw (fun () ->

1310

-         lt_in_text l' (fun () ->

1311

-         push_list input cs;

1312

--        add_character l' 0x003C;

1313

--        add_character l' 0x0021;

1314

--        add_character l' 0x0044;

1315

-+        add_character l' (Uchar.of_int 0x003C);

1316

-+        add_character l' (Uchar.of_int 0x0021);

1317

-+        add_character l' (Uchar.of_int 0x0044);

1318

-         initial_state ()))

1319

-     end

1320

-

1321

-@@ -980,15 +980,15 @@ let tokenize report resolve_reference (i

1322

-         emit_doctype l' buffer initial_state

1323

-

1324

-       | _, (0x0022 | 0x0027 as c) ->

1325

--        add_utf_8 buffer c;

1326

-+        add_utf_8 buffer (Uchar.of_int c);

1327

-         doctype_quoted_state (fun () -> doctype_state l' buffer) c l' buffer

1328

-

1329

-       | _, (0x003C as c) ->

1330

--        add_utf_8 buffer c;

1331

-+        add_utf_8 buffer (Uchar.of_int c);

1332

-         doctype_item_state (fun () -> doctype_state l' buffer) l' buffer

1333

-

1334

-       | _, c ->

1335

--        add_utf_8 buffer c;

1336

-+        add_utf_8 buffer (Uchar.of_int c);

1337

-         doctype_state l' buffer

1338

-     end

1339

-

1340

-@@ -996,11 +996,11 @@ let tokenize report resolve_reference (i

1341

-     next input !throw (fun () -> unterminated_doctype l' buffer)

1342

-     begin function

1343

-       | _, c when c = quote ->

1344

--        add_utf_8 buffer c;

1345

-+        add_utf_8 buffer (Uchar.of_int c);

1346

-         state ()

1347

-

1348

-       | _, c ->

1349

--        add_utf_8 buffer c;

1350

-+        add_utf_8 buffer (Uchar.of_int c);

1351

-         doctype_quoted_state state quote l' buffer

1352

-     end

1353

-

1354

-@@ -1008,18 +1008,18 @@ let tokenize report resolve_reference (i

1355

-     next input !throw (fun () -> unterminated_doctype l' buffer)

1356

-     begin function

1357

-       | _, (0x0021 as c) ->

1358

--        add_utf_8 buffer c;

1359

-+        add_utf_8 buffer (Uchar.of_int c);

1360

-         doctype_declaration_state state l' buffer

1361

-

1362

-       | l, (0x003F as c) ->

1363

--        add_utf_8 buffer c;

1364

--        let undo = tap (fun (_, c) -> add_utf_8 buffer c) input in

1365

-+        add_utf_8 buffer (Uchar.of_int c);

1366

-+        let undo = tap (fun (_, c) -> add_utf_8 buffer (Uchar.of_int c)) input in

1367

-         parse_declaration_or_processing_instruction l (fun _ ->

1368

-         undo ();

1369

-         state ())

1370

-

1371

-       | _, c ->

1372

--        add_utf_8 buffer c;

1373

-+        add_utf_8 buffer (Uchar.of_int c);

1374

-         state ()

1375

-     end

1376

-

1377

-@@ -1027,16 +1027,16 @@ let tokenize report resolve_reference (i

1378

-     next input !throw (fun () -> unterminated_doctype l' buffer)

1379

-     begin function

1380

-       | _, (0x003E as c) ->

1381

--        add_utf_8 buffer c;

1382

-+        add_utf_8 buffer (Uchar.of_int c);

1383

-         state ()

1384

-

1385

-       | _, (0x0022 | 0x0027 as c) ->

1386

--        add_utf_8 buffer c;

1387

-+        add_utf_8 buffer (Uchar.of_int c);

1388

-         doctype_quoted_state

1389

-           (fun () -> doctype_declaration_state state l' buffer) c l' buffer

1390

-

1391

-       | _, c ->

1392

--        add_utf_8 buffer c;

1393

-+        add_utf_8 buffer (Uchar.of_int c);

1394

-         doctype_declaration_state state l' buffer

1395

-     end

1396

-

1397

1398

diff --git a/dev-ml/markup/markup-0.7.2-r1.ebuild b/dev-ml/markup/markup-0.7.2-r1.ebuild

1399

deleted file mode 100644

1400

index f70ac55..00000000

1401

--- a/dev-ml/markup/markup-0.7.2-r1.ebuild

1402

+++ /dev/null

1403

@@ -1,44 +0,0 @@

1404

-# Copyright 1999-2016 Gentoo Foundation

1405

-# Distributed under the terms of the GNU General Public License v2

1406

-# $Id$

1407

-

1408

-EAPI=5

1409

-

1410

-inherit findlib eutils

1411

-

1412

-DESCRIPTION="Error-recovering streaming HTML5 and XML parsers"

1413

-HOMEPAGE="https://github.com/aantron/markup.ml"

1414

-SRC_URI="https://github.com/aantron/markup.ml/archive/${PV}.tar.gz -> ${P}.tar.gz"

1415

-

1416

-LICENSE="BSD"

1417

-SLOT="0/${PV}p1"

1418

-KEYWORDS="~amd64"

1419

-IUSE="doc test"

1420

-

1421

-DEPEND="

1422

-	dev-lang/ocaml:=[ocamlopt]

1423

-	dev-ml/lwt:=[ocamlopt]

1424

-	>=dev-ml/uutf-1.0:=[ocamlopt]

1425

-"

1426

-RDEPEND="${DEPEND}"

1427

-DEPEND="${DEPEND}

1428

-	test? ( dev-ml/ounit )

1429

-	dev-ml/ocamlbuild"

1430

-S="${WORKDIR}/${PN}.ml-${PV}"

1431

-

1432

-src_prepare() {

1433

-	epatch "${FILESDIR}/uutf.patch" \

1434

-		"${FILESDIR}/test.patch"

1435

-}

1436

-

1437

-src_compile() {

1438

-	emake

1439

-	use doc && emake docs

1440

-}

1441

-

1442

-src_install() {

1443

-	findlib_src_preinst

1444

-	emake ocamlfind-install

1445

-	dodoc README.md

1446

-	use doc && dohtml doc/html/*

1447

-}

Gentoo Archives: gentoo-commits