Gentoo Archives: gentoo-commits

From: Alexis Ballier <aballier@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] repo/gentoo:master commit in: dev-ml/markup/, dev-ml/markup/files/
Date: Thu, 01 Dec 2016 17:55:04
Message-Id: 1480614892.02edd946d92a10dd27a13f2ece1868483d4c2880.aballier@gentoo
1 commit: 02edd946d92a10dd27a13f2ece1868483d4c2880
2 Author: Alexis Ballier <aballier <AT> gentoo <DOT> org>
3 AuthorDate: Thu Dec 1 17:54:24 2016 +0000
4 Commit: Alexis Ballier <aballier <AT> gentoo <DOT> org>
5 CommitDate: Thu Dec 1 17:54:52 2016 +0000
6 URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=02edd946
7
8 dev-ml/markup: remove old
9
10 Package-Manager: portage-2.3.2
11
12 dev-ml/markup/Manifest | 1 -
13 dev-ml/markup/files/test.patch | 273 ---------
14 dev-ml/markup/files/uutf.patch | 1085 ----------------------------------
15 dev-ml/markup/markup-0.7.2-r1.ebuild | 44 --
16 4 files changed, 1403 deletions(-)
17
18 diff --git a/dev-ml/markup/Manifest b/dev-ml/markup/Manifest
19 index 1819e63..29247e7 100644
20 --- a/dev-ml/markup/Manifest
21 +++ b/dev-ml/markup/Manifest
22 @@ -1,2 +1 @@
23 -DIST markup-0.7.2.tar.gz 275010 SHA256 630a737ab6113e17999aacfd55f73b6671211d7980be86f0c711c0b385887c34 SHA512 72a87f54692a0b751c23e6b52bc4ecaa68334b0c6c067793cbf5b011b7d06ce7563f9aa2daeef3553ab48bb6cb9e592587b5a4f37279eaef7b45e19e5b372f73 WHIRLPOOL 679a01c5d197eadf1a8b74247e276405c182acff4c7781b577fbad9fcdc33be164ff81222e79c4e0e5193d1295ee4896ddda547cce1712bfb5ebda050f5bf5ac
24 DIST markup-0.7.3.tar.gz 275094 SHA256 e1eb3562e0d26ccc33aa5dbe802e4210dbd7c30a8e69b6098b825afb11bb6af1 SHA512 e4577e438241d58c728507c88f14b7f029dbc4aa6b9c5dbf78f03b6c083a430026158c3146a88c14c9cd90a242b1bb4ed838b150bb89433fb6a6f673e5d2bb66 WHIRLPOOL ad967738706d4c017f266ecdef7b0772ce0bc17f9bc7dda228ffabc9cccccc88cde69337e063577d1fcda1e93cf4a7f18bbbf09709ee82f0a4b8382f5e339d2e
25
26 diff --git a/dev-ml/markup/files/test.patch b/dev-ml/markup/files/test.patch
27 deleted file mode 100644
28 index f2a5257..00000000
29 --- a/dev-ml/markup/files/test.patch
30 +++ /dev/null
31 @@ -1,273 +0,0 @@
32 -Index: markup.ml-0.7.2/test/test_encoding.ml
33 -===================================================================
34 ---- markup.ml-0.7.2.orig/test/test_encoding.ml
35 -+++ markup.ml-0.7.2/test/test_encoding.ml
36 -@@ -15,9 +15,9 @@ let test_ucs_4 (f : Encoding.t) name s1
37 - expect_error (1, 2) (`Decoding_error (bad_bytes, name))
38 - begin fun report ->
39 - let chars = s1 |> string |> f ~report in
40 -- next_option chars ok (assert_equal (Some (Char.code 'f')));
41 -+ next_option chars ok (assert_equal (Some ((Uchar.of_int (Char.code 'f')))));
42 - next_option chars ok (assert_equal (Some Uutf.u_rep));
43 -- next_option chars ok (assert_equal (Some (Char.code 'o')));
44 -+ next_option chars ok (assert_equal (Some ((Uchar.of_int (Char.code 'o')))));
45 - next_option chars ok (assert_equal None);
46 - next_option chars ok (assert_equal None)
47 - end;
48 -@@ -25,9 +25,9 @@ let test_ucs_4 (f : Encoding.t) name s1
49 - expect_error (2, 2) (`Decoding_error ("\x00\x00\x00", name))
50 - begin fun report ->
51 - let chars = s2 |> string |> f ~report in
52 -- next_option chars ok (assert_equal (Some (Char.code 'f')));
53 -- next_option chars ok (assert_equal (Some 0x000A));
54 -- next_option chars ok (assert_equal (Some (Char.code 'o')));
55 -+ next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'f'))));
56 -+ next_option chars ok (assert_equal (Some (Uchar.of_int 0x000A)));
57 -+ next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'o'))));
58 - next_option chars ok (assert_equal (Some Uutf.u_rep));
59 - next_option chars ok (assert_equal None);
60 - next_option chars ok (assert_equal None)
61 -@@ -38,12 +38,12 @@ let tests = [
62 - let s = "\xef\xbb\xbffoo\xf0\x9f\x90\x99bar\xa0more" in
63 - expect_error (1, 8) (`Decoding_error ("\xa0", "utf-8")) begin fun report ->
64 - let chars = s |> string |> utf_8 ~report in
65 -- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));
66 -- next_option chars ok (assert_equal (Some 0x1F419));
67 -- next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r']));
68 -+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));
69 -+ next_option chars ok (assert_equal (Some (Uchar.of_int 0x1F419)));
70 -+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r']));
71 - next_option chars ok (assert_equal (Some Uutf.u_rep));
72 - next_n 4 chars ok
73 -- (assert_equal (List.map Char.code ['m'; 'o'; 'r'; 'e']));
74 -+ (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['m'; 'o'; 'r'; 'e']));
75 - next_option chars ok (assert_equal None);
76 - next_option chars ok (assert_equal None)
77 - end);
78 -@@ -53,11 +53,11 @@ let tests = [
79 - expect_error (1, 6) (`Decoding_error ("\xdc\x19", "utf-16be"))
80 - begin fun report ->
81 - let chars = s |> string |> utf_16be ~report in
82 -- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));
83 -- next_option chars ok (assert_equal (Some 0x1F419));
84 -- next_option chars ok (assert_equal (Some (Char.code 'b')));
85 -+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));
86 -+ next_option chars ok (assert_equal (Some (Uchar.of_int 0x1F419)));
87 -+ next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'b'))));
88 - next_option chars ok (assert_equal (Some Uutf.u_rep));
89 -- next_n 16 chars ok (assert_equal (List.map Char.code ['a'; 'r']));
90 -+ next_n 16 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['a'; 'r']));
91 - next_option chars ok (assert_equal None);
92 - next_option chars ok (assert_equal None)
93 - end);
94 -@@ -67,11 +67,11 @@ let tests = [
95 - expect_error (1, 6) (`Decoding_error ("\x19\xdc", "utf-16le"))
96 - begin fun report ->
97 - let chars = s |> string |> utf_16le ~report in
98 -- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));
99 -- next_option chars ok (assert_equal (Some 0x1F419));
100 -- next_option chars ok (assert_equal (Some (Char.code 'b')));
101 -+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));
102 -+ next_option chars ok (assert_equal (Some (Uchar.of_int 0x1F419)));
103 -+ next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'b'))));
104 - next_option chars ok (assert_equal (Some Uutf.u_rep));
105 -- next_n 16 chars ok (assert_equal (List.map Char.code ['a'; 'r']));
106 -+ next_n 16 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['a'; 'r']));
107 - next_option chars ok (assert_equal None);
108 - next_option chars ok (assert_equal None)
109 - end);
110 -@@ -79,7 +79,7 @@ let tests = [
111 - ("encoding.iso_8859_1" >:: fun _ ->
112 - let chars = string "foo\xa0" |> iso_8859_1 in
113 - next_n 4 chars
114 -- ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'; '\xa0']));
115 -+ ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'; '\xa0']));
116 - next_option chars ok (assert_equal None);
117 - next_option chars ok (assert_equal None));
118 -
119 -@@ -88,26 +88,26 @@ let tests = [
120 - expect_error (1, 4) (`Decoding_error ("\xa0", "us-ascii"))
121 - begin fun report ->
122 - let chars = s |> string |> us_ascii ~report in
123 -- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));
124 -+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));
125 - next_option chars ok (assert_equal (Some Uutf.u_rep));
126 -- next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r']));
127 -+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r']));
128 - next_option chars ok (assert_equal None);
129 - next_option chars ok (assert_equal None)
130 - end);
131 -
132 - ("encoding.windows_1251" >:: fun _ ->
133 - let chars = string "foo\xe0\xe1\xe2bar" |> windows_1251 in
134 -- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));
135 -- next_n 3 chars ok (assert_equal [0x0430; 0x0431; 0x0432]);
136 -- next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r']));
137 -+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));
138 -+ next_n 3 chars ok (assert_equal [Uchar.of_int 0x0430; Uchar.of_int 0x0431; Uchar.of_int 0x0432]);
139 -+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r']));
140 - next_option chars ok (assert_equal None);
141 - next_option chars ok (assert_equal None));
142 -
143 - ("encoding.windows_1252" >:: fun _ ->
144 - let chars = string "foo\x80\x83bar" |> windows_1252 in
145 -- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));
146 -- next_n 2 chars ok (assert_equal [0x20AC; 0x0192]);
147 -- next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r']));
148 -+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));
149 -+ next_n 2 chars ok (assert_equal [Uchar.of_int 0x20AC; Uchar.of_int 0x0192]);
150 -+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r']));
151 - next_option chars ok (assert_equal None);
152 - next_option chars ok (assert_equal None));
153 -
154 -@@ -137,7 +137,7 @@ let tests = [
155 -
156 - ("encoding.ebcdic" >:: fun _ ->
157 - let chars = string "\x86\x96\x96" |> ebcdic in
158 -- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o']));
159 -+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o']));
160 - next_option chars ok (assert_equal None);
161 - next_option chars ok (assert_equal None));
162 - ]
163 -Index: markup.ml-0.7.2/test/test_html_tokenizer.ml
164 -===================================================================
165 ---- markup.ml-0.7.2.orig/test/test_html_tokenizer.ml
166 -+++ markup.ml-0.7.2/test/test_html_tokenizer.ml
167 -@@ -134,7 +134,7 @@ let tests = [
168 - expect "&#1000000000000000000000000000000;"
169 - [ 1, 1, E (`Bad_token ("&#1000000000000000000000000000000;",
170 - reference, "out of range"));
171 -- 1, 1, S (`Char Uutf.u_rep);
172 -+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep));
173 - 1, 35, S `EOF];
174 -
175 - expect "&#1000000000000000000000000000000"
176 -@@ -142,22 +142,22 @@ let tests = [
177 - reference, "missing ';' at end"));
178 - 1, 1, E (`Bad_token ("&#1000000000000000000000000000000",
179 - reference, "out of range"));
180 -- 1, 1, S (`Char Uutf.u_rep);
181 -+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep));
182 - 1, 34, S `EOF];
183 -
184 - expect "&#xD800;"
185 - [ 1, 1, E (`Bad_token ("&#xD800;", reference, "out of range"));
186 -- 1, 1, S (`Char Uutf.u_rep);
187 -+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep));
188 - 1, 9, S `EOF];
189 -
190 - expect "&#x110000;"
191 - [ 1, 1, E (`Bad_token ("&#x110000;", reference, "out of range"));
192 -- 1, 1, S (`Char Uutf.u_rep);
193 -+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep));
194 - 1, 11, S `EOF];
195 -
196 - expect "&#0;"
197 - [ 1, 1, E (`Bad_token ("&#0;", reference, "out of range"));
198 -- 1, 1, S (`Char Uutf.u_rep);
199 -+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep));
200 - 1, 5, S `EOF];
201 -
202 - expect "&#x01;"
203 -@@ -264,7 +264,7 @@ let tests = [
204 - expect ~state:`RCDATA "f\x00</foo>"
205 - ([ 1, 1, S (`Char 0x66);
206 - 1, 2, E (`Bad_token ("U+0000", "content", "null"));
207 -- 1, 2, S (`Char Uutf.u_rep)] @
208 -+ 1, 2, S (`Char (Uchar.to_int Uutf.u_rep))] @
209 - (char_sequence ~start:3 "</foo>"));
210 -
211 - expect ~state:`RCDATA "<title>f</title >"
212 -@@ -302,7 +302,7 @@ let tests = [
213 - expect ~state:`RAWTEXT "f\x00</foo>"
214 - ([ 1, 1, S (`Char 0x66);
215 - 1, 2, E (`Bad_token ("U+0000", "content", "null"));
216 -- 1, 2, S (`Char Uutf.u_rep)] @
217 -+ 1, 2, S (`Char (Uchar.to_int Uutf.u_rep))] @
218 - (char_sequence ~start:3 "</foo>")));
219 -
220 - ("html.tokenizer.script-data" >:: fun _ ->
221 -@@ -330,7 +330,7 @@ let tests = [
222 - expect ~state:`Script_data "f<!--o\x00o"
223 - ((char_sequence ~no_eof:true "f<!--o") @
224 - [1, 7, E (`Bad_token ("U+0000", "script", "null"));
225 -- 1, 7, S (`Char Uutf.u_rep);
226 -+ 1, 7, S (`Char (Uchar.to_int Uutf.u_rep));
227 - 1, 8, S (`Char 0x6F);
228 - 1, 9, E (`Unexpected_eoi "script");
229 - 1, 9, S `EOF]);
230 -@@ -363,7 +363,7 @@ let tests = [
231 - expect ~state:`Script_data "f<!--a-\x00-"
232 - ((char_sequence ~no_eof:true "f<!--a-") @
233 - [ 1, 8, E (`Bad_token ("U+0000", "script", "null"));
234 -- 1, 8, S (`Char Uutf.u_rep);
235 -+ 1, 8, S (`Char (Uchar.to_int Uutf.u_rep));
236 - 1, 9, S (`Char 0x02D);
237 - 1, 10, E (`Unexpected_eoi "script");
238 - 1, 10, S `EOF]);
239 -@@ -371,7 +371,7 @@ let tests = [
240 - expect ~state:`Script_data "f<!--a--\x00--"
241 - ((char_sequence ~no_eof:true "f<!--a--") @
242 - [ 1, 9, E (`Bad_token ("U+0000", "script", "null"));
243 -- 1, 9, S (`Char Uutf.u_rep);
244 -+ 1, 9, S (`Char (Uchar.to_int Uutf.u_rep));
245 - 1, 10, S (`Char 0x02D);
246 - 1, 11, S (`Char 0x02D);
247 - 1, 12, E (`Unexpected_eoi "script");
248 -@@ -380,14 +380,14 @@ let tests = [
249 - expect ~state:`Script_data "f<!--<script>\x00"
250 - ((char_sequence ~no_eof:true "f<!--<script>") @
251 - [ 1, 14, E (`Bad_token ("U+0000", "script", "null"));
252 -- 1, 14, S (`Char Uutf.u_rep);
253 -+ 1, 14, S (`Char (Uchar.to_int Uutf.u_rep));
254 - 1, 15, E (`Unexpected_eoi "script");
255 - 1, 15, S `EOF]);
256 -
257 - expect ~state:`Script_data "f<!--<script>-\x00-"
258 - ((char_sequence ~no_eof:true "f<!--<script>-") @
259 - [ 1, 15, E (`Bad_token ("U+0000", "script", "null"));
260 -- 1, 15, S (`Char Uutf.u_rep);
261 -+ 1, 15, S (`Char (Uchar.to_int Uutf.u_rep));
262 - 1, 16, S (`Char 0x2D);
263 - 1, 17, E (`Unexpected_eoi "script");
264 - 1, 17, S `EOF]);
265 -@@ -395,7 +395,7 @@ let tests = [
266 - expect ~state:`Script_data "f<!--<script>--\x00--"
267 - ((char_sequence ~no_eof:true "f<!--<script>--") @
268 - [ 1, 16, E (`Bad_token ("U+0000", "script", "null"));
269 -- 1, 16, S (`Char Uutf.u_rep);
270 -+ 1, 16, S (`Char (Uchar.to_int Uutf.u_rep));
271 - 1, 17, S (`Char 0x2D);
272 - 1, 18, S (`Char 0x2D);
273 - 1, 19, E (`Unexpected_eoi "script");
274 -@@ -413,7 +413,7 @@ let tests = [
275 - expect ~state:`Script_data "f\x00</foo>"
276 - ([ 1, 1, S (`Char 0x66);
277 - 1, 2, E (`Bad_token ("U+0000", "content", "null"));
278 -- 1, 2, S (`Char Uutf.u_rep)] @
279 -+ 1, 2, S (`Char (Uchar.to_int Uutf.u_rep))] @
280 - (char_sequence ~start:3 "</foo>")));
281 -
282 - ("html.tokenizer.plaintext" >:: fun _ ->
283 -@@ -424,7 +424,7 @@ let tests = [
284 - expect ~state:`PLAINTEXT "f\x00</foo>"
285 - ([ 1, 1, S (`Char 0x66);
286 - 1, 2, E (`Bad_token ("U+0000", "content", "null"));
287 -- 1, 2, S (`Char Uutf.u_rep)] @
288 -+ 1, 2, S (`Char (Uchar.to_int Uutf.u_rep))] @
289 - (char_sequence ~start:3 "</foo>")));
290 -
291 - ("html.tokenizer.comment" >:: fun _ ->
292 -Index: markup.ml-0.7.2/test/test_input.ml
293 -===================================================================
294 ---- markup.ml-0.7.2.orig/test/test_input.ml
295 -+++ markup.ml-0.7.2/test/test_input.ml
296 -@@ -71,7 +71,7 @@ let tests = [
297 - end);
298 -
299 - ("input.bom" >:: fun _ ->
300 -- [0xFEFF; 0x66]
301 -+ [Uchar.of_int 0xFEFF; Uchar.of_int 0x66]
302 - |> of_list
303 - |> preprocess is_valid_xml_char Error.ignore_errors
304 - |> fst
305
306 diff --git a/dev-ml/markup/files/uutf.patch b/dev-ml/markup/files/uutf.patch
307 deleted file mode 100644
308 index f561084..00000000
309 --- a/dev-ml/markup/files/uutf.patch
310 +++ /dev/null
311 @@ -1,1085 +0,0 @@
312 -Index: markup.ml-0.7.2/src/common.ml
313 -===================================================================
314 ---- markup.ml-0.7.2.orig/src/common.ml
315 -+++ markup.ml-0.7.2/src/common.ml
316 -@@ -134,7 +134,7 @@ let is_printable = is_in_range 0x0020 0x
317 - let char c =
318 - if is_printable c then begin
319 - let buffer = Buffer.create 4 in
320 -- add_utf_8 buffer c;
321 -+ add_utf_8 buffer (Uchar.of_int c);
322 - Buffer.contents buffer
323 - end
324 - else
325 -Index: markup.ml-0.7.2/src/detect.ml
326 -===================================================================
327 ---- markup.ml-0.7.2.orig/src/detect.ml
328 -+++ markup.ml-0.7.2/src/detect.ml
329 -@@ -222,7 +222,7 @@ let meta_tag_prescan =
330 - let rec iterate () =
331 - next source throw (fun () -> k "") (function
332 - | c when c = quote -> k (Buffer.contents buffer)
333 -- | c -> add_utf_8 buffer (Char.code (Char.lowercase c)); iterate ())
334 -+ | c -> add_utf_8 buffer (Uchar.of_int (Char.code (Char.lowercase c))); iterate ())
335 - in
336 - iterate ()
337 - in
338 -@@ -236,7 +236,7 @@ let meta_tag_prescan =
339 - push source c;
340 - k (Buffer.contents buffer)
341 - | c ->
342 -- add_utf_8 buffer (Char.code (Char.lowercase c));
343 -+ add_utf_8 buffer (Uchar.of_int (Char.code (Char.lowercase c)));
344 - iterate ())
345 - in
346 - iterate ()
347 -@@ -315,7 +315,7 @@ let meta_tag_prescan =
348 - k (Buffer.contents buffer)
349 -
350 - | Some c ->
351 -- add_utf_8 buffer (Char.code (Char.lowercase c));
352 -+ add_utf_8 buffer (Uchar.of_int (Char.code (Char.lowercase c)));
353 - iterate ()
354 - end
355 - in
356 -Index: markup.ml-0.7.2/src/encoding.ml
357 -===================================================================
358 ---- markup.ml-0.7.2.orig/src/encoding.ml
359 -+++ markup.ml-0.7.2/src/encoding.ml
360 -@@ -4,7 +4,7 @@
361 - open Common
362 - open Kstream
363 -
364 --type t = ?report:Error.parse_handler -> char Kstream.t -> int Kstream.t
365 -+type t = ?report:Error.parse_handler -> char Kstream.t -> Uchar.t Kstream.t
366 -
367 - let wrap f = fun ?(report = Error.ignore_errors) s -> f report s
368 -
369 -@@ -24,8 +24,8 @@ let _uutf_decoder encoding name =
370 - k Uutf.u_rep)
371 - | `Await ->
372 - next bytes throw
373 -- (fun () -> Uutf.Manual.src decoder "" 0 0; run ())
374 -- (fun c -> Uutf.Manual.src decoder (String.make 1 c) 0 1; run ())
375 -+ (fun () -> Uutf.Manual.src decoder Bytes.empty 0 0; run ())
376 -+ (fun c -> Uutf.Manual.src decoder (Bytes.make 1 c) 0 1; run ())
377 - in
378 - run ())
379 - |> make)
380 -@@ -87,7 +87,7 @@ let _ucs_4_decoder arrange name =
381 - let skip =
382 - if !first then begin
383 - first := false;
384 -- scalar = Uutf.u_bom
385 -+ scalar = Uchar.to_int Uutf.u_bom
386 - end
387 - else
388 - false
389 -@@ -96,9 +96,9 @@ let _ucs_4_decoder arrange name =
390 - if skip then run ()
391 - else
392 - if scalar = 0x000A then
393 -- newline k scalar
394 -+ newline k (Uchar.of_int scalar)
395 - else
396 -- char k scalar
397 -+ char k (Uchar.of_int scalar)
398 -
399 - | [] -> empty ()
400 -
401 -@@ -130,7 +130,7 @@ let code_page table =
402 -
403 - (fun _ bytes ->
404 - (fun throw empty k ->
405 -- next bytes throw empty (fun c -> k table.(Char.code c)))
406 -+ next bytes throw empty (fun c -> k (Uchar.of_int table.(Char.code c))))
407 - |> make)
408 - |> wrap
409 -
410 -Index: markup.ml-0.7.2/src/html_parser.ml
411 -===================================================================
412 ---- markup.ml-0.7.2.orig/src/html_parser.ml
413 -+++ markup.ml-0.7.2/src/html_parser.ml
414 -@@ -1022,7 +1022,7 @@ let parse requested_context report (toke
415 - let frameset_ok = ref true in
416 - let head_seen = ref false in
417 -
418 -- let add_character = Text.add text in
419 -+ let add_character = (fun x y -> Text.add text x (Uchar.of_int y)) in
420 -
421 - set_foreign (fun () ->
422 - Stack.current_element_is_foreign context open_elements);
423 -@@ -2717,7 +2717,7 @@ let parse requested_context report (toke
424 - | l, `Char 0 ->
425 - report l (`Bad_token ("U+0000", "foreign content", "null")) !throw
426 - (fun () ->
427 -- add_character l Uutf.u_rep;
428 -+ add_character l (Uchar.to_int Uutf.u_rep);
429 - mode ())
430 -
431 - | l, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020 as c) ->
432 -Index: markup.ml-0.7.2/src/html_tokenizer.ml
433 -===================================================================
434 ---- markup.ml-0.7.2.orig/src/html_tokenizer.ml
435 -+++ markup.ml-0.7.2/src/html_tokenizer.ml
436 -@@ -252,7 +252,7 @@ let tokenize report (input, get_location
437 - report location
438 - (`Bad_token (prefix ^ text ^ semicolon, "character reference",
439 - "Windows-1252 character")) !throw (fun () ->
440 -- k (Some (`One n)))
441 -+ k (Some (`One (Uchar.of_int n))))
442 -
443 - else
444 - match n with
445 -@@ -268,9 +268,9 @@ let tokenize report (input, get_location
446 - (`Bad_token (prefix ^ text ^ semicolon,
447 - "character reference",
448 - "invalid HTML character")) !throw (fun () ->
449 -- k (Some (`One n)))
450 -+ k (Some (`One (Uchar.of_int n))))
451 -
452 -- | n -> k (Some (`One n))
453 -+ | n -> k (Some (`One (Uchar.of_int n)))
454 - end
455 - end
456 - in
457 -@@ -366,6 +366,10 @@ let tokenize report (input, get_location
458 - | _ -> unterminated ())
459 - in
460 -
461 -+ let ma = function
462 -+ a, `One x -> (a, `One (Uchar.of_int x))
463 -+ | a, `Two (x,y) -> (a, `Two (Uchar.of_int x, Uchar.of_int y)) in
464 -+
465 - let rec match_named best matched replace candidate =
466 - next_option input !throw (function
467 - | None -> finish best matched replace
468 -@@ -377,8 +381,8 @@ let tokenize report (input, get_location
469 - | `None -> finish best matched (v::replace)
470 - | `Continue -> match_named best matched (v::replace) candidate
471 - | `Match_and_continue m ->
472 -- match_named (Some m) (v::(replace @ matched)) [] candidate
473 -- | `Match m -> finish (Some m) (v::matched) [])
474 -+ match_named (Some (ma m)) (v::(replace @ matched)) [] candidate
475 -+ | `Match m -> finish (Some (ma m)) (v::matched) [])
476 - in
477 - match_named None [] [] "")
478 -
479 -@@ -409,11 +413,11 @@ let tokenize report (input, get_location
480 - emit (l, `Char 0x0026) state
481 -
482 - | Some (`One c) ->
483 -- emit (l, `Char c) state
484 -+ emit (l, `Char (Uchar.to_int c)) state
485 -
486 - | Some (`Two (c, c')) ->
487 -- emit (l, `Char c) (fun () ->
488 -- emit (l, `Char c') state)
489 -+ emit (l, `Char (Uchar.to_int c)) (fun () ->
490 -+ emit (l, `Char (Uchar.to_int c')) state)
491 - end
492 -
493 - (* 8.2.4.3. *)
494 -@@ -427,7 +431,7 @@ let tokenize report (input, get_location
495 -
496 - | Some (l, 0) ->
497 - report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () ->
498 -- emit (l, `Char Uutf.u_rep) rcdata_state)
499 -+ emit (l, `Char (Uchar.to_int Uutf.u_rep)) rcdata_state)
500 -
501 - | None ->
502 - emit_eof ()
503 -@@ -444,7 +448,7 @@ let tokenize report (input, get_location
504 -
505 - | Some (l, 0) ->
506 - report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () ->
507 -- emit (l, `Char Uutf.u_rep) rawtext_state)
508 -+ emit (l, `Char (Uchar.to_int Uutf.u_rep)) rawtext_state)
509 -
510 - | None ->
511 - emit_eof ()
512 -@@ -461,7 +465,7 @@ let tokenize report (input, get_location
513 -
514 - | Some (l, 0) ->
515 - report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () ->
516 -- emit_character l Uutf.u_rep script_data_state)
517 -+ emit_character l (Uchar.to_int Uutf.u_rep) script_data_state)
518 -
519 - | None ->
520 - emit_eof ()
521 -@@ -475,7 +479,7 @@ let tokenize report (input, get_location
522 - next_option input !throw begin function
523 - | Some (l, 0) ->
524 - report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () ->
525 -- emit (l, `Char Uutf.u_rep) plaintext_state)
526 -+ emit (l, `Char (Uchar.to_int Uutf.u_rep)) plaintext_state)
527 -
528 - | None ->
529 - emit_eof ()
530 -@@ -501,7 +505,7 @@ let tokenize report (input, get_location
531 - end_tag_open_state l' tag
532 -
533 - | Some (_, c) when is_alphabetic c ->
534 -- add_utf_8 tag._tag_name (to_lowercase c);
535 -+ add_utf_8 tag._tag_name (Uchar.of_int (to_lowercase c));
536 - tag_name_state l' tag
537 -
538 - | Some (_, 0x003F) ->
539 -@@ -529,7 +533,7 @@ let tokenize report (input, get_location
540 -
541 - next_option input !throw begin function
542 - | Some (_, c) when is_alphabetic c ->
543 -- add_utf_8 tag._tag_name (to_lowercase c);
544 -+ add_utf_8 tag._tag_name (Uchar.of_int (to_lowercase c));
545 - tag_name_state l' tag
546 -
547 - | Some (_, 0x003E) ->
548 -@@ -569,7 +573,7 @@ let tokenize report (input, get_location
549 - report (get_location ()) (`Unexpected_eoi "tag") !throw data_state
550 -
551 - | Some (_, c) ->
552 -- add_utf_8 tag._tag_name (to_lowercase c);
553 -+ add_utf_8 tag._tag_name (Uchar.of_int (to_lowercase c));
554 - tag_name_state l' tag
555 - end
556 -
557 -@@ -589,7 +593,7 @@ let tokenize report (input, get_location
558 - next_option input !throw begin function
559 - | Some (_, c as v) when is_alphabetic c ->
560 - let name_buffer = Buffer.create 32 in
561 -- add_utf_8 name_buffer (to_lowercase c);
562 -+ add_utf_8 name_buffer (Uchar.of_int (to_lowercase c));
563 - text_end_tag_name_state state l' (v::cs) name_buffer
564 -
565 - | maybe_v ->
566 -@@ -618,7 +622,7 @@ let tokenize report (input, get_location
567 - emit_tag l' (create_tag ())
568 -
569 - | Some ((_, c) as v) when is_alphabetic c ->
570 -- add_utf_8 name_buffer (to_lowercase c);
571 -+ add_utf_8 name_buffer (Uchar.of_int (to_lowercase c));
572 - text_end_tag_name_state state l' (v::cs) name_buffer
573 -
574 - | maybe_v ->
575 -@@ -676,7 +680,7 @@ let tokenize report (input, get_location
576 -
577 - | Some (l, 0) ->
578 - report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () ->
579 -- emit_character l Uutf.u_rep (fun () ->
580 -+ emit_character l (Uchar.to_int Uutf.u_rep) (fun () ->
581 - script_data_escaped_state l'))
582 -
583 - | None ->
584 -@@ -699,7 +703,7 @@ let tokenize report (input, get_location
585 -
586 - | Some (l, 0) ->
587 - report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () ->
588 -- emit_character l Uutf.u_rep (fun () ->
589 -+ emit_character l (Uchar.to_int Uutf.u_rep) (fun () ->
590 - script_data_escaped_state l'))
591 -
592 - | None ->
593 -@@ -725,7 +729,7 @@ let tokenize report (input, get_location
594 -
595 - | Some (l, 0) ->
596 - report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () ->
597 -- emit_character l Uutf.u_rep (fun () ->
598 -+ emit_character l (Uchar.to_int Uutf.u_rep) (fun () ->
599 - script_data_escaped_state l'))
600 -
601 - | None ->
602 -@@ -745,7 +749,7 @@ let tokenize report (input, get_location
603 -
604 - | Some (_, c as v) when is_alphabetic c ->
605 - let tag_buffer = Buffer.create 32 in
606 -- add_utf_8 tag_buffer (to_lowercase c);
607 -+ add_utf_8 tag_buffer (Uchar.of_int (to_lowercase c));
608 - emit_characters (List.rev (v::cs)) (fun () ->
609 - script_data_double_escape_start_state l' tag_buffer)
610 -
611 -@@ -765,7 +769,7 @@ let tokenize report (input, get_location
612 - else script_data_escaped_state l')
613 -
614 - | Some (l, c) when is_alphabetic c ->
615 -- add_utf_8 tag_buffer (to_lowercase c);
616 -+ add_utf_8 tag_buffer (Uchar.of_int (to_lowercase c));
617 - emit_character l c (fun () ->
618 - script_data_double_escape_start_state l' tag_buffer)
619 -
620 -@@ -787,7 +791,7 @@ let tokenize report (input, get_location
621 -
622 - | Some (l, 0) ->
623 - report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () ->
624 -- emit_character l Uutf.u_rep (fun () ->
625 -+ emit_character l (Uchar.to_int Uutf.u_rep) (fun () ->
626 - script_data_double_escaped_state l'))
627 -
628 - | None ->
629 -@@ -811,7 +815,7 @@ let tokenize report (input, get_location
630 -
631 - | Some (l, 0) ->
632 - report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () ->
633 -- emit_character l Uutf.u_rep (fun () ->
634 -+ emit_character l (Uchar.to_int Uutf.u_rep) (fun () ->
635 - script_data_double_escaped_state l'))
636 -
637 - | None ->
638 -@@ -838,7 +842,7 @@ let tokenize report (input, get_location
639 -
640 - | Some (l, 0) ->
641 - report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () ->
642 -- emit_character l Uutf.u_rep (fun () ->
643 -+ emit_character l (Uchar.to_int Uutf.u_rep) (fun () ->
644 - script_data_double_escaped_state l'))
645 -
646 - | None ->
647 -@@ -872,7 +876,7 @@ let tokenize report (input, get_location
648 - else script_data_double_escaped_state l')
649 -
650 - | Some (l, c) when is_alphabetic c ->
651 -- add_utf_8 tag_buffer (to_lowercase c);
652 -+ add_utf_8 tag_buffer (Uchar.of_int (to_lowercase c));
653 - emit_character l c (fun () ->
654 - script_data_double_escape_end_state l' tag_buffer)
655 -
656 -@@ -910,10 +914,10 @@ let tokenize report (input, get_location
657 - | Some (l, (0x0022 | 0x0027 | 0x003C | 0x003D as c)) ->
658 - report l (`Bad_token (char c, "attribute name",
659 - "invalid start character")) !throw (fun () ->
660 -- start_attribute c)
661 -+ start_attribute (Uchar.of_int c))
662 -
663 - | Some (_, c) ->
664 -- start_attribute (to_lowercase c)
665 -+ start_attribute (Uchar.of_int (to_lowercase c))
666 - end
667 -
668 - (* 8.2.4.35. *)
669 -@@ -942,14 +946,14 @@ let tokenize report (input, get_location
670 - | Some (l, (0x0022 | 0x0027 | 0x003C as c)) ->
671 - report l (`Bad_token (char c, "attribute name",
672 - "invalid name character")) !throw (fun () ->
673 -- add_utf_8 name_buffer c;
674 -+ add_utf_8 name_buffer (Uchar.of_int c);
675 - attribute_name_state l' tag name_buffer)
676 -
677 - | None ->
678 - report (get_location ()) (`Unexpected_eoi "tag") !throw data_state
679 -
680 - | Some (_, c) ->
681 -- add_utf_8 name_buffer (to_lowercase c);
682 -+ add_utf_8 name_buffer (Uchar.of_int (to_lowercase c));
683 - attribute_name_state l' tag name_buffer
684 - end
685 -
686 -@@ -985,13 +989,13 @@ let tokenize report (input, get_location
687 - | Some (l, (0x0022 | 0x0027 | 0x003C as c)) ->
688 - report l (`Bad_token (char c, "attribute name",
689 - "invalid start character")) !throw (fun () ->
690 -- start_next_attribute c)
691 -+ start_next_attribute (Uchar.of_int c))
692 -
693 - | None ->
694 - report (get_location ()) (`Unexpected_eoi "tag") !throw data_state
695 -
696 - | Some (_, c) ->
697 -- start_next_attribute (to_lowercase c)
698 -+ start_next_attribute (Uchar.of_int (to_lowercase c))
699 - end
700 -
701 - (* 8.2.4.37. *)
702 -@@ -1030,13 +1034,13 @@ let tokenize report (input, get_location
703 - | Some (l, (0x003C | 0x003D | 0x0060 as c)) ->
704 - report l (`Bad_token (char c, "attribute value",
705 - "invalid start character")) !throw (fun () ->
706 -- start_value attribute_value_unquoted_state (Some c))
707 -+ start_value attribute_value_unquoted_state (Some (Uchar.of_int c)))
708 -
709 - | None ->
710 - report (get_location ()) (`Unexpected_eoi "tag") !throw data_state
711 -
712 - | Some (_, c) ->
713 -- start_value attribute_value_unquoted_state (Some c)
714 -+ start_value attribute_value_unquoted_state (Some (Uchar.of_int c))
715 - end
716 -
717 - (* 8.2.4.38 and 8.2.4.39. *)
718 -@@ -1062,7 +1066,7 @@ let tokenize report (input, get_location
719 - data_state
720 -
721 - | Some (_, c) ->
722 -- add_utf_8 value_buffer c;
723 -+ add_utf_8 value_buffer (Uchar.of_int c);
724 - attribute_value_quoted_state quote l' tag name value_buffer
725 - end
726 -
727 -@@ -1092,14 +1096,14 @@ let tokenize report (input, get_location
728 - | Some (l, (0x0022 | 0x0027 | 0x003C | 0x003D | 0x0060 as c)) ->
729 - report l (`Bad_token (char c, "attribute value",
730 - "invalid character")) !throw (fun () ->
731 -- add_utf_8 value_buffer c;
732 -+ add_utf_8 value_buffer (Uchar.of_int c);
733 - attribute_value_unquoted_state l' tag name value_buffer)
734 -
735 - | None ->
736 - report (get_location ()) (`Unexpected_eoi "tag") !throw data_state
737 -
738 - | Some (_, c) ->
739 -- add_utf_8 value_buffer c;
740 -+ add_utf_8 value_buffer (Uchar.of_int c);
741 - attribute_value_unquoted_state l' tag name value_buffer
742 - end
743 -
744 -@@ -1107,7 +1111,7 @@ let tokenize report (input, get_location
745 - and character_reference_in_attribute allowed l value_buffer k =
746 - consume_character_reference true (Some allowed) l begin function
747 - | None ->
748 -- add_utf_8 value_buffer 0x0026;
749 -+ add_utf_8 value_buffer (Uchar.of_int 0x0026);
750 - k ()
751 -
752 - | Some (`One c) ->
753 -@@ -1176,7 +1180,7 @@ let tokenize report (input, get_location
754 - emit_comment l' buffer
755 -
756 - | Some (_, c) ->
757 -- add_utf_8 buffer c;
758 -+ add_utf_8 buffer (Uchar.of_int c);
759 - consume ()
760 - end
761 - in
762 -@@ -1239,7 +1243,7 @@ let tokenize report (input, get_location
763 - emit_comment l' buffer)
764 -
765 - | Some (_, c) ->
766 -- add_utf_8 buffer c;
767 -+ add_utf_8 buffer (Uchar.of_int c);
768 - comment_state l' buffer
769 - end
770 -
771 -@@ -1266,7 +1270,7 @@ let tokenize report (input, get_location
772 -
773 - | Some (_, c) ->
774 - Buffer.add_char buffer '-';
775 -- add_utf_8 buffer c;
776 -+ add_utf_8 buffer (Uchar.of_int c);
777 - comment_state l' buffer
778 - end
779 -
780 -@@ -1286,7 +1290,7 @@ let tokenize report (input, get_location
781 - emit_comment l' buffer)
782 -
783 - | Some (_, c) ->
784 -- add_utf_8 buffer c;
785 -+ add_utf_8 buffer (Uchar.of_int c);
786 - comment_state l' buffer
787 - end
788 -
789 -@@ -1308,7 +1312,7 @@ let tokenize report (input, get_location
790 -
791 - | Some (_, c) ->
792 - Buffer.add_char buffer '-';
793 -- add_utf_8 buffer c;
794 -+ add_utf_8 buffer (Uchar.of_int c);
795 - comment_state l' buffer
796 - end
797 -
798 -@@ -1343,7 +1347,7 @@ let tokenize report (input, get_location
799 - report l (`Bad_token ("--" ^ (char c), "comment",
800 - "'--' should be in '-->'")) !throw (fun () ->
801 - Buffer.add_string buffer "--";
802 -- add_utf_8 buffer c;
803 -+ add_utf_8 buffer (Uchar.of_int c);
804 - comment_state l' buffer)
805 - end
806 -
807 -@@ -1369,7 +1373,7 @@ let tokenize report (input, get_location
808 -
809 - | Some (_, c) ->
810 - Buffer.add_string buffer "--!";
811 -- add_utf_8 buffer c;
812 -+ add_utf_8 buffer (Uchar.of_int c);
813 - comment_state l' buffer
814 - end
815 -
816 -@@ -1420,7 +1424,7 @@ let tokenize report (input, get_location
817 -
818 - | Some (_, c) ->
819 - doctype._doctype_name <-
820 -- add_doctype_char doctype._doctype_name (to_lowercase c);
821 -+ add_doctype_char doctype._doctype_name (Uchar.of_int (to_lowercase c));
822 - doctype_name_state l' doctype
823 - end
824 -
825 -@@ -1445,7 +1449,7 @@ let tokenize report (input, get_location
826 -
827 - | Some (_, c) ->
828 - doctype._doctype_name <-
829 -- add_doctype_char doctype._doctype_name (to_lowercase c);
830 -+ add_doctype_char doctype._doctype_name (Uchar.of_int (to_lowercase c));
831 - doctype_name_state l' doctype
832 - end
833 -
834 -@@ -1574,7 +1578,7 @@ let tokenize report (input, get_location
835 - emit_doctype ~quirks:true l' doctype)
836 -
837 - | Some (_, c) ->
838 -- add doctype c;
839 -+ add doctype (Uchar.of_int c);
840 - doctype_identifier_quoted_state add quote next_state l' doctype
841 - end
842 -
843 -Index: markup.ml-0.7.2/src/html_writer.ml
844 -===================================================================
845 ---- markup.ml-0.7.2.orig/src/html_writer.ml
846 -+++ markup.ml-0.7.2/src/html_writer.ml
847 -@@ -8,7 +8,7 @@ let _escape_attribute s =
848 - Uutf.String.fold_utf_8 (fun () _ -> function
849 - | `Malformed _ -> ()
850 - | `Uchar c ->
851 -- match c with
852 -+ match (Uchar.to_int c) with
853 - | 0x0026 -> Buffer.add_string buffer "&amp;"
854 - | 0x00A0 -> Buffer.add_string buffer "&nbsp;"
855 - | 0x0022 -> Buffer.add_string buffer "&quot;"
856 -@@ -21,7 +21,7 @@ let _escape_text s =
857 - Uutf.String.fold_utf_8 (fun () _ -> function
858 - | `Malformed _ -> ()
859 - | `Uchar c ->
860 -- match c with
861 -+ match (Uchar.to_int c) with
862 - | 0x0026 -> Buffer.add_string buffer "&amp;"
863 - | 0x00A0 -> Buffer.add_string buffer "&nbsp;"
864 - | 0x003C -> Buffer.add_string buffer "&lt;"
865 -Index: markup.ml-0.7.2/src/input.ml
866 -===================================================================
867 ---- markup.ml-0.7.2.orig/src/input.ml
868 -+++ markup.ml-0.7.2/src/input.ml
869 -@@ -27,13 +27,13 @@ let preprocess is_valid_char report sour
870 - in
871 -
872 - let rec iterate () =
873 -- next source throw empty (function
874 -+ next source throw empty (fun x -> match Uchar.to_int x with
875 - | 0xFEFF when !first_char -> first_char := false; iterate ()
876 -
877 - | 0x0D ->
878 -- next source throw newline (function
879 -+ next source throw newline (fun y -> match Uchar.to_int y with
880 - | 0x0A -> newline ()
881 -- | c -> push source c; newline ())
882 -+ | c -> push source (Uchar.of_int c); newline ())
883 -
884 - | 0x0A -> newline ()
885 -
886 -Index: markup.ml-0.7.2/src/input.mli
887 -===================================================================
888 ---- markup.ml-0.7.2.orig/src/input.mli
889 -+++ markup.ml-0.7.2/src/input.mli
890 -@@ -4,5 +4,5 @@
891 - open Common
892 -
893 - val preprocess :
894 -- (int -> bool) -> Error.parse_handler -> int Kstream.t ->
895 -+ (int -> bool) -> Error.parse_handler -> Uchar.t Kstream.t ->
896 - (location * int) Kstream.t * (unit -> location)
897 -Index: markup.ml-0.7.2/src/markup.ml
898 -===================================================================
899 ---- markup.ml-0.7.2.orig/src/markup.ml
900 -+++ markup.ml-0.7.2/src/markup.ml
901 -@@ -187,7 +187,7 @@ sig
902 -
903 - val decode :
904 - ?report:(location -> Error.t -> unit io) -> t ->
905 -- (char, _) stream -> (int, async) stream
906 -+ (char, _) stream -> (Uchar.t, async) stream
907 - end
908 -
909 - val parse_xml :
910 -Index: markup.ml-0.7.2/src/markup.mli
911 -===================================================================
912 ---- markup.ml-0.7.2.orig/src/markup.mli
913 -+++ markup.ml-0.7.2/src/markup.mli
914 -@@ -194,7 +194,7 @@ sig
915 -
916 - val decode :
917 - ?report:(location -> Error.t -> unit) -> t ->
918 -- (char, 's) stream -> (int, 's) stream
919 -+ (char, 's) stream -> (Uchar.t, 's) stream
920 - (** Applies a decoder to a byte stream. Illegal input byte sequences result in
921 - calls to the error handler [~report] with error kind [`Decoding_error].
922 - The illegal bytes are then skipped, and zero or more U+FFFD replacement
923 -@@ -764,7 +764,7 @@ sig
924 -
925 - val decode :
926 - ?report:(location -> Error.t -> unit io) -> Encoding.t ->
927 -- (char, _) stream -> (int, async) stream
928 -+ (char, _) stream -> (Uchar.t, async) stream
929 - end
930 -
931 - (** {2 XML} *)
932 -@@ -838,7 +838,7 @@ val kstream : ('a, _) stream -> 'a Kstre
933 - val of_kstream : 'a Kstream.t -> ('a, _) stream
934 -
935 - val preprocess_input_stream :
936 -- (int, 's) stream -> (location * int, 's) stream * (unit -> location)
937 -+ (Uchar.t, 's) stream -> (location * int, 's) stream * (unit -> location)
938 -
939 - (**/**)
940 -
941 -Index: markup.ml-0.7.2/src/utility.ml
942 -===================================================================
943 ---- markup.ml-0.7.2.orig/src/utility.ml
944 -+++ markup.ml-0.7.2/src/utility.ml
945 -@@ -346,11 +346,11 @@ let xhtml_entity name =
946 -
947 - match lookup 0 with
948 - | `One c ->
949 -- add_utf_8 buffer c;
950 -+ add_utf_8 buffer (Uchar.of_int c);
951 - Some (Buffer.contents buffer)
952 - | `Two (c, c') ->
953 -- add_utf_8 buffer c;
954 -- add_utf_8 buffer c';
955 -+ add_utf_8 buffer (Uchar.of_int c);
956 -+ add_utf_8 buffer (Uchar.of_int c');
957 - Some (Buffer.contents buffer)
958 -
959 - with Exit -> None
960 -Index: markup.ml-0.7.2/src/xml_tokenizer.ml
961 -===================================================================
962 ---- markup.ml-0.7.2.orig/src/xml_tokenizer.ml
963 -+++ markup.ml-0.7.2/src/xml_tokenizer.ml
964 -@@ -101,7 +101,7 @@ let tokenize report resolve_reference (i
965 - end
966 -
967 - | _, c when filter c ->
968 -- add_utf_8 buffer c;
969 -+ add_utf_8 buffer (Uchar.of_int c);
970 - read ()
971 -
972 - | l, c ->
973 -@@ -133,7 +133,7 @@ let tokenize report resolve_reference (i
974 -
975 - | _, c when is_name_start_char c ->
976 - let buffer = Buffer.create 32 in
977 -- add_utf_8 buffer c;
978 -+ add_utf_8 buffer (Uchar.of_int c);
979 - let rec read () =
980 - next input !throw unexpected_eoi begin function
981 - | _, 0x003B ->
982 -@@ -146,7 +146,7 @@ let tokenize report resolve_reference (i
983 - end
984 -
985 - | _, c when is_name_char c ->
986 -- add_utf_8 buffer c;
987 -+ add_utf_8 buffer (Uchar.of_int c);
988 - read ()
989 -
990 - | l, c ->
991 -@@ -218,7 +218,7 @@ let tokenize report resolve_reference (i
992 - report_if (not @@ is_name_start_char c) l (fun () ->
993 - `Bad_token (char c, "attribute", "invalid start character"))
994 - !throw (fun () ->
995 -- add_utf_8 name_buffer c;
996 -+ add_utf_8 name_buffer (Uchar.of_int c);
997 - name_state ())
998 - end
999 -
1000 -@@ -235,7 +235,7 @@ let tokenize report resolve_reference (i
1001 - report_if (not @@ is_name_start_char c) l (fun () ->
1002 - `Bad_token (char c, "attribute", "invalid name character"))
1003 - !throw (fun () ->
1004 -- add_utf_8 name_buffer c;
1005 -+ add_utf_8 name_buffer (Uchar.of_int c);
1006 - name_state ())
1007 - end
1008 -
1009 -@@ -275,14 +275,14 @@ let tokenize report resolve_reference (i
1010 - report l
1011 - (`Bad_token ("&", "attribute", "replace with '&amp;'"))
1012 - !throw (fun () ->
1013 -- add_utf_8 value_buffer 0x0026;
1014 -+ add_utf_8 value_buffer (Uchar.of_int 0x0026);
1015 - state ())
1016 - end
1017 -
1018 - and handle_lt l state =
1019 - report l (`Bad_token ("<", "attribute", "replace with '&lt;'")) !throw
1020 - (fun () ->
1021 -- add_utf_8 value_buffer 0x003C;
1022 -+ add_utf_8 value_buffer (Uchar.of_int 0x003C);
1023 - state ())
1024 -
1025 - and quoted_value_state quote =
1026 -@@ -300,7 +300,7 @@ let tokenize report resolve_reference (i
1027 - quoted_value_state quote)
1028 -
1029 - | _, c ->
1030 -- add_utf_8 value_buffer c;
1031 -+ add_utf_8 value_buffer (Uchar.of_int c);
1032 - quoted_value_state quote
1033 - end
1034 -
1035 -@@ -317,7 +317,7 @@ let tokenize report resolve_reference (i
1036 - handle_lt l unquoted_value_state
1037 -
1038 - | _, c ->
1039 -- add_utf_8 value_buffer c;
1040 -+ add_utf_8 value_buffer (Uchar.of_int c);
1041 - unquoted_value_state ()
1042 - end
1043 -
1044 -@@ -372,7 +372,7 @@ let tokenize report resolve_reference (i
1045 - report_if (not @@ is_name_start_char c) l (fun () ->
1046 - `Bad_token (char c, pi, "invalid start character")) !throw
1047 - (fun () ->
1048 -- add_utf_8 target_buffer c;
1049 -+ add_utf_8 target_buffer (Uchar.of_int c);
1050 - target_state ())
1051 - end
1052 -
1053 -@@ -388,13 +388,13 @@ let tokenize report resolve_reference (i
1054 - report_if (not @@ is_name_char c) l (fun () ->
1055 - `Bad_token (char c, pi, "invalid name character")) !throw
1056 - (fun () ->
1057 -- add_utf_8 target_buffer c;
1058 -+ add_utf_8 target_buffer (Uchar.of_int c);
1059 - target_state ())
1060 - end
1061 -
1062 - and text_state () =
1063 - next' pi finish_pi (fun (_, c) ->
1064 -- add_utf_8 text_buffer c;
1065 -+ add_utf_8 text_buffer (Uchar.of_int c);
1066 - text_state ())
1067 -
1068 - and xml_declaration_state () =
1069 -@@ -572,7 +572,7 @@ let tokenize report resolve_reference (i
1070 - and initial_state () =
1071 - next input !throw (fun () -> emit_eoi ()) begin function
1072 - | l, (0x005D as c) ->
1073 -- add_character l c;
1074 -+ add_character l (Uchar.of_int c);
1075 - one_bracket_state l
1076 -
1077 - | l, 0x003C ->
1078 -@@ -583,7 +583,7 @@ let tokenize report resolve_reference (i
1079 - | None ->
1080 - report l (`Bad_token (char c, "text", "replace with '&amp;'"))
1081 - !throw (fun () ->
1082 -- add_character l c;
1083 -+ add_character l (Uchar.of_int c);
1084 - initial_state ())
1085 -
1086 - | Some s ->
1087 -@@ -591,14 +591,14 @@ let tokenize report resolve_reference (i
1088 - initial_state ())
1089 -
1090 - | l, c ->
1091 -- add_character l c;
1092 -+ add_character l (Uchar.of_int c);
1093 - initial_state ()
1094 - end
1095 -
1096 - and one_bracket_state l' =
1097 - next_option input !throw begin function
1098 - | Some (l, (0x005D as c)) ->
1099 -- add_character l c;
1100 -+ add_character l (Uchar.of_int c);
1101 - two_brackets_state l' l
1102 -
1103 - | v ->
1104 -@@ -611,11 +611,11 @@ let tokenize report resolve_reference (i
1105 - | Some (l, (0x003E as c)) ->
1106 - report l' (`Bad_token ("]]>", "text", "must end a CDATA section"))
1107 - !throw (fun () ->
1108 -- add_character l c;
1109 -+ add_character l (Uchar.of_int c);
1110 - initial_state ())
1111 -
1112 - | Some (l, (0x005D as c)) ->
1113 -- add_character l c;
1114 -+ add_character l (Uchar.of_int c);
1115 - two_brackets_state l'' l
1116 -
1117 - | v ->
1118 -@@ -626,7 +626,7 @@ let tokenize report resolve_reference (i
1119 - and begin_markup_state l' =
1120 - let recover v =
1121 - lt_in_text l' (fun () ->
1122 -- add_character l' 0x003C;
1123 -+ add_character l' (Uchar.of_int 0x003C);
1124 - push_option input v;
1125 - initial_state ())
1126 - in
1127 -@@ -648,7 +648,7 @@ let tokenize report resolve_reference (i
1128 -
1129 - | _, c when is_name_start_char c ->
1130 - let tag_name_buffer = Buffer.create 32 in
1131 -- add_utf_8 tag_name_buffer c;
1132 -+ add_utf_8 tag_name_buffer (Uchar.of_int c);
1133 - start_tag_state l' tag_name_buffer
1134 -
1135 - | l, c as v ->
1136 -@@ -660,7 +660,7 @@ let tokenize report resolve_reference (i
1137 - and start_tag_state l' buffer =
1138 - let recover v =
1139 - lt_in_text l' (fun () ->
1140 -- add_character l' 0x003C;
1141 -+ add_character l' (Uchar.of_int 0x003C);
1142 - add_string l' (Buffer.contents buffer);
1143 - push_option input v;
1144 - initial_state ())
1145 -@@ -680,7 +680,7 @@ let tokenize report resolve_reference (i
1146 - attributes_state l' (Buffer.contents buffer) []
1147 -
1148 - | _, c when is_name_char c ->
1149 -- add_utf_8 buffer c;
1150 -+ add_utf_8 buffer (Uchar.of_int c);
1151 - start_tag_state l' buffer
1152 -
1153 - | l, c as v ->
1154 -@@ -731,8 +731,8 @@ let tokenize report resolve_reference (i
1155 - and end_tag_state l' =
1156 - let recover v =
1157 - lt_in_text l' (fun () ->
1158 -- add_character l' 0x003C;
1159 -- add_character l' 0x002F;
1160 -+ add_character l' (Uchar.of_int 0x003C);
1161 -+ add_character l' (Uchar.of_int 0x002F);
1162 - push_option input v;
1163 - initial_state ())
1164 - in
1165 -@@ -743,7 +743,7 @@ let tokenize report resolve_reference (i
1166 - begin function
1167 - | _, c when is_name_start_char c ->
1168 - let name_buffer = Buffer.create 32 in
1169 -- add_utf_8 name_buffer c;
1170 -+ add_utf_8 name_buffer (Uchar.of_int c);
1171 - end_tag_name_state l' name_buffer
1172 -
1173 - | l, c as v ->
1174 -@@ -755,8 +755,8 @@ let tokenize report resolve_reference (i
1175 - and end_tag_name_state l' buffer =
1176 - let recover v =
1177 - lt_in_text l' (fun () ->
1178 -- add_character l' 0x003C;
1179 -- add_character l' 0x002F;
1180 -+ add_character l' (Uchar.of_int 0x003C);
1181 -+ add_character l' (Uchar.of_int 0x002F);
1182 - add_string l' (Buffer.contents buffer);
1183 - push_option input v;
1184 - initial_state ())
1185 -@@ -773,7 +773,7 @@ let tokenize report resolve_reference (i
1186 - end_tag_whitespace_state false l' (Buffer.contents buffer)
1187 -
1188 - | _, c when is_name_char c ->
1189 -- add_utf_8 buffer c;
1190 -+ add_utf_8 buffer (Uchar.of_int c);
1191 - end_tag_name_state l' buffer
1192 -
1193 - | l, c as v ->
1194 -@@ -821,8 +821,8 @@ let tokenize report resolve_reference (i
1195 -
1196 - | v ->
1197 - bad_comment_start "<!" l' (fun () ->
1198 -- add_character l' 0x003C;
1199 -- add_character l' 0x0021;
1200 -+ add_character l' (Uchar.of_int 0x003C);
1201 -+ add_character l' (Uchar.of_int 0x0021);
1202 - push_option input v;
1203 - initial_state ())
1204 - end
1205 -@@ -834,9 +834,9 @@ let tokenize report resolve_reference (i
1206 -
1207 - | v ->
1208 - bad_comment_start "<!-" l' (fun () ->
1209 -- add_character l' 0x003C;
1210 -- add_character l' 0x0021;
1211 -- add_character l' 0x002D;
1212 -+ add_character l' (Uchar.of_int 0x003C);
1213 -+ add_character l' (Uchar.of_int 0x0021);
1214 -+ add_character l' (Uchar.of_int 0x002D);
1215 - push_option input v;
1216 - initial_state ())
1217 - end
1218 -@@ -852,7 +852,7 @@ let tokenize report resolve_reference (i
1219 - comment_one_dash_state l' l buffer
1220 -
1221 - | _, c ->
1222 -- add_utf_8 buffer c;
1223 -+ add_utf_8 buffer (Uchar.of_int c);
1224 - comment_state l' buffer
1225 - end
1226 -
1227 -@@ -863,8 +863,8 @@ let tokenize report resolve_reference (i
1228 - comment_two_dashes_state false l' l'' buffer
1229 -
1230 - | _, c ->
1231 -- add_utf_8 buffer 0x002D;
1232 -- add_utf_8 buffer c;
1233 -+ add_utf_8 buffer (Uchar.of_int 0x002D);
1234 -+ add_utf_8 buffer (Uchar.of_int c);
1235 - comment_state l' buffer
1236 - end
1237 -
1238 -@@ -883,14 +883,14 @@ let tokenize report resolve_reference (i
1239 -
1240 - | _, 0x002D ->
1241 - recover (fun () ->
1242 -- add_utf_8 buffer 0x002D;
1243 -+ add_utf_8 buffer (Uchar.of_int 0x002D);
1244 - comment_two_dashes_state true l' l'' buffer)
1245 -
1246 - | _, c ->
1247 - recover (fun () ->
1248 -- add_utf_8 buffer 0x002D;
1249 -- add_utf_8 buffer 0x002D;
1250 -- add_utf_8 buffer c;
1251 -+ add_utf_8 buffer (Uchar.of_int 0x002D);
1252 -+ add_utf_8 buffer (Uchar.of_int 0x002D);
1253 -+ add_utf_8 buffer (Uchar.of_int c);
1254 - comment_state l' buffer)
1255 - end
1256 -
1257 -@@ -905,9 +905,9 @@ let tokenize report resolve_reference (i
1258 - !throw (fun () ->
1259 - lt_in_text l' (fun () ->
1260 - push_list input cs;
1261 -- add_character l' 0x003C;
1262 -- add_character l' 0x0021;
1263 -- add_character l' 0x005B;
1264 -+ add_character l' (Uchar.of_int 0x003C);
1265 -+ add_character l' (Uchar.of_int 0x0021);
1266 -+ add_character l' (Uchar.of_int 0x005B);
1267 - initial_state ()))
1268 - end
1269 -
1270 -@@ -918,7 +918,7 @@ let tokenize report resolve_reference (i
1271 - cdata_one_bracket_state l' l
1272 -
1273 - | l, c ->
1274 -- add_character l c;
1275 -+ add_character l (Uchar.of_int c);
1276 - cdata_state l'
1277 - end
1278 -
1279 -@@ -929,8 +929,8 @@ let tokenize report resolve_reference (i
1280 - cdata_two_brackets_state l' l'' l
1281 -
1282 - | l, c ->
1283 -- add_character l'' 0x005D;
1284 -- add_character l c;
1285 -+ add_character l'' (Uchar.of_int 0x005D);
1286 -+ add_character l (Uchar.of_int c);
1287 - cdata_state l'
1288 - end
1289 -
1290 -@@ -941,13 +941,13 @@ let tokenize report resolve_reference (i
1291 - initial_state ()
1292 -
1293 - | l, 0x005D ->
1294 -- add_character l'' 0x005D;
1295 -+ add_character l'' (Uchar.of_int 0x005D);
1296 - cdata_two_brackets_state l' l''' l
1297 -
1298 - | l, c ->
1299 -- add_character l'' 0x005D;
1300 -- add_character l''' 0x005D;
1301 -- add_character l c;
1302 -+ add_character l'' (Uchar.of_int 0x005D);
1303 -+ add_character l''' (Uchar.of_int 0x005D);
1304 -+ add_character l (Uchar.of_int c);
1305 - cdata_state l'
1306 - end
1307 -
1308 -@@ -963,9 +963,9 @@ let tokenize report resolve_reference (i
1309 - !throw (fun () ->
1310 - lt_in_text l' (fun () ->
1311 - push_list input cs;
1312 -- add_character l' 0x003C;
1313 -- add_character l' 0x0021;
1314 -- add_character l' 0x0044;
1315 -+ add_character l' (Uchar.of_int 0x003C);
1316 -+ add_character l' (Uchar.of_int 0x0021);
1317 -+ add_character l' (Uchar.of_int 0x0044);
1318 - initial_state ()))
1319 - end
1320 -
1321 -@@ -980,15 +980,15 @@ let tokenize report resolve_reference (i
1322 - emit_doctype l' buffer initial_state
1323 -
1324 - | _, (0x0022 | 0x0027 as c) ->
1325 -- add_utf_8 buffer c;
1326 -+ add_utf_8 buffer (Uchar.of_int c);
1327 - doctype_quoted_state (fun () -> doctype_state l' buffer) c l' buffer
1328 -
1329 - | _, (0x003C as c) ->
1330 -- add_utf_8 buffer c;
1331 -+ add_utf_8 buffer (Uchar.of_int c);
1332 - doctype_item_state (fun () -> doctype_state l' buffer) l' buffer
1333 -
1334 - | _, c ->
1335 -- add_utf_8 buffer c;
1336 -+ add_utf_8 buffer (Uchar.of_int c);
1337 - doctype_state l' buffer
1338 - end
1339 -
1340 -@@ -996,11 +996,11 @@ let tokenize report resolve_reference (i
1341 - next input !throw (fun () -> unterminated_doctype l' buffer)
1342 - begin function
1343 - | _, c when c = quote ->
1344 -- add_utf_8 buffer c;
1345 -+ add_utf_8 buffer (Uchar.of_int c);
1346 - state ()
1347 -
1348 - | _, c ->
1349 -- add_utf_8 buffer c;
1350 -+ add_utf_8 buffer (Uchar.of_int c);
1351 - doctype_quoted_state state quote l' buffer
1352 - end
1353 -
1354 -@@ -1008,18 +1008,18 @@ let tokenize report resolve_reference (i
1355 - next input !throw (fun () -> unterminated_doctype l' buffer)
1356 - begin function
1357 - | _, (0x0021 as c) ->
1358 -- add_utf_8 buffer c;
1359 -+ add_utf_8 buffer (Uchar.of_int c);
1360 - doctype_declaration_state state l' buffer
1361 -
1362 - | l, (0x003F as c) ->
1363 -- add_utf_8 buffer c;
1364 -- let undo = tap (fun (_, c) -> add_utf_8 buffer c) input in
1365 -+ add_utf_8 buffer (Uchar.of_int c);
1366 -+ let undo = tap (fun (_, c) -> add_utf_8 buffer (Uchar.of_int c)) input in
1367 - parse_declaration_or_processing_instruction l (fun _ ->
1368 - undo ();
1369 - state ())
1370 -
1371 - | _, c ->
1372 -- add_utf_8 buffer c;
1373 -+ add_utf_8 buffer (Uchar.of_int c);
1374 - state ()
1375 - end
1376 -
1377 -@@ -1027,16 +1027,16 @@ let tokenize report resolve_reference (i
1378 - next input !throw (fun () -> unterminated_doctype l' buffer)
1379 - begin function
1380 - | _, (0x003E as c) ->
1381 -- add_utf_8 buffer c;
1382 -+ add_utf_8 buffer (Uchar.of_int c);
1383 - state ()
1384 -
1385 - | _, (0x0022 | 0x0027 as c) ->
1386 -- add_utf_8 buffer c;
1387 -+ add_utf_8 buffer (Uchar.of_int c);
1388 - doctype_quoted_state
1389 - (fun () -> doctype_declaration_state state l' buffer) c l' buffer
1390 -
1391 - | _, c ->
1392 -- add_utf_8 buffer c;
1393 -+ add_utf_8 buffer (Uchar.of_int c);
1394 - doctype_declaration_state state l' buffer
1395 - end
1396 -
1397
1398 diff --git a/dev-ml/markup/markup-0.7.2-r1.ebuild b/dev-ml/markup/markup-0.7.2-r1.ebuild
1399 deleted file mode 100644
1400 index f70ac55..00000000
1401 --- a/dev-ml/markup/markup-0.7.2-r1.ebuild
1402 +++ /dev/null
1403 @@ -1,44 +0,0 @@
1404 -# Copyright 1999-2016 Gentoo Foundation
1405 -# Distributed under the terms of the GNU General Public License v2
1406 -# $Id$
1407 -
1408 -EAPI=5
1409 -
1410 -inherit findlib eutils
1411 -
1412 -DESCRIPTION="Error-recovering streaming HTML5 and XML parsers"
1413 -HOMEPAGE="https://github.com/aantron/markup.ml"
1414 -SRC_URI="https://github.com/aantron/markup.ml/archive/${PV}.tar.gz -> ${P}.tar.gz"
1415 -
1416 -LICENSE="BSD"
1417 -SLOT="0/${PV}p1"
1418 -KEYWORDS="~amd64"
1419 -IUSE="doc test"
1420 -
1421 -DEPEND="
1422 - dev-lang/ocaml:=[ocamlopt]
1423 - dev-ml/lwt:=[ocamlopt]
1424 - >=dev-ml/uutf-1.0:=[ocamlopt]
1425 -"
1426 -RDEPEND="${DEPEND}"
1427 -DEPEND="${DEPEND}
1428 - test? ( dev-ml/ounit )
1429 - dev-ml/ocamlbuild"
1430 -S="${WORKDIR}/${PN}.ml-${PV}"
1431 -
1432 -src_prepare() {
1433 - epatch "${FILESDIR}/uutf.patch" \
1434 - "${FILESDIR}/test.patch"
1435 -}
1436 -
1437 -src_compile() {
1438 - emake
1439 - use doc && emake docs
1440 -}
1441 -
1442 -src_install() {
1443 - findlib_src_preinst
1444 - emake ocamlfind-install
1445 - dodoc README.md
1446 - use doc && dohtml doc/html/*
1447 -}