1 |
commit: 56c0c3a567b08228699194eb2820a6f59595ab6a |
2 |
Author: Alexis Ballier <aballier <AT> gentoo <DOT> org> |
3 |
AuthorDate: Mon Nov 28 20:26:55 2016 +0000 |
4 |
Commit: Alexis Ballier <aballier <AT> gentoo <DOT> org> |
5 |
CommitDate: Mon Nov 28 20:27:33 2016 +0000 |
6 |
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=56c0c3a5 |
7 |
|
8 |
dev-ml/markup: fix build with uutf 1.0 |
9 |
|
10 |
Package-Manager: portage-2.3.2 |
11 |
|
12 |
dev-ml/markup/files/test.patch | 273 +++++ |
13 |
dev-ml/markup/files/uutf.patch | 1085 ++++++++++++++++++++ |
14 |
...{markup-0.7.2.ebuild => markup-0.7.2-r1.ebuild} | 11 +- |
15 |
3 files changed, 1366 insertions(+), 3 deletions(-) |
16 |
|
17 |
diff --git a/dev-ml/markup/files/test.patch b/dev-ml/markup/files/test.patch |
18 |
new file mode 100644 |
19 |
index 00000000..f2a5257 |
20 |
--- /dev/null |
21 |
+++ b/dev-ml/markup/files/test.patch |
22 |
@@ -0,0 +1,273 @@ |
23 |
+Index: markup.ml-0.7.2/test/test_encoding.ml |
24 |
+=================================================================== |
25 |
+--- markup.ml-0.7.2.orig/test/test_encoding.ml |
26 |
++++ markup.ml-0.7.2/test/test_encoding.ml |
27 |
+@@ -15,9 +15,9 @@ let test_ucs_4 (f : Encoding.t) name s1 |
28 |
+ expect_error (1, 2) (`Decoding_error (bad_bytes, name)) |
29 |
+ begin fun report -> |
30 |
+ let chars = s1 |> string |> f ~report in |
31 |
+- next_option chars ok (assert_equal (Some (Char.code 'f'))); |
32 |
++ next_option chars ok (assert_equal (Some ((Uchar.of_int (Char.code 'f'))))); |
33 |
+ next_option chars ok (assert_equal (Some Uutf.u_rep)); |
34 |
+- next_option chars ok (assert_equal (Some (Char.code 'o'))); |
35 |
++ next_option chars ok (assert_equal (Some ((Uchar.of_int (Char.code 'o'))))); |
36 |
+ next_option chars ok (assert_equal None); |
37 |
+ next_option chars ok (assert_equal None) |
38 |
+ end; |
39 |
+@@ -25,9 +25,9 @@ let test_ucs_4 (f : Encoding.t) name s1 |
40 |
+ expect_error (2, 2) (`Decoding_error ("\x00\x00\x00", name)) |
41 |
+ begin fun report -> |
42 |
+ let chars = s2 |> string |> f ~report in |
43 |
+- next_option chars ok (assert_equal (Some (Char.code 'f'))); |
44 |
+- next_option chars ok (assert_equal (Some 0x000A)); |
45 |
+- next_option chars ok (assert_equal (Some (Char.code 'o'))); |
46 |
++ next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'f')))); |
47 |
++ next_option chars ok (assert_equal (Some (Uchar.of_int 0x000A))); |
48 |
++ next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'o')))); |
49 |
+ next_option chars ok (assert_equal (Some Uutf.u_rep)); |
50 |
+ next_option chars ok (assert_equal None); |
51 |
+ next_option chars ok (assert_equal None) |
52 |
+@@ -38,12 +38,12 @@ let tests = [ |
53 |
+ let s = "\xef\xbb\xbffoo\xf0\x9f\x90\x99bar\xa0more" in |
54 |
+ expect_error (1, 8) (`Decoding_error ("\xa0", "utf-8")) begin fun report -> |
55 |
+ let chars = s |> string |> utf_8 ~report in |
56 |
+- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); |
57 |
+- next_option chars ok (assert_equal (Some 0x1F419)); |
58 |
+- next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r'])); |
59 |
++ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'])); |
60 |
++ next_option chars ok (assert_equal (Some (Uchar.of_int 0x1F419))); |
61 |
++ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r'])); |
62 |
+ next_option chars ok (assert_equal (Some Uutf.u_rep)); |
63 |
+ next_n 4 chars ok |
64 |
+- (assert_equal (List.map Char.code ['m'; 'o'; 'r'; 'e'])); |
65 |
++ (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['m'; 'o'; 'r'; 'e'])); |
66 |
+ next_option chars ok (assert_equal None); |
67 |
+ next_option chars ok (assert_equal None) |
68 |
+ end); |
69 |
+@@ -53,11 +53,11 @@ let tests = [ |
70 |
+ expect_error (1, 6) (`Decoding_error ("\xdc\x19", "utf-16be")) |
71 |
+ begin fun report -> |
72 |
+ let chars = s |> string |> utf_16be ~report in |
73 |
+- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); |
74 |
+- next_option chars ok (assert_equal (Some 0x1F419)); |
75 |
+- next_option chars ok (assert_equal (Some (Char.code 'b'))); |
76 |
++ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'])); |
77 |
++ next_option chars ok (assert_equal (Some (Uchar.of_int 0x1F419))); |
78 |
++ next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'b')))); |
79 |
+ next_option chars ok (assert_equal (Some Uutf.u_rep)); |
80 |
+- next_n 16 chars ok (assert_equal (List.map Char.code ['a'; 'r'])); |
81 |
++ next_n 16 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['a'; 'r'])); |
82 |
+ next_option chars ok (assert_equal None); |
83 |
+ next_option chars ok (assert_equal None) |
84 |
+ end); |
85 |
+@@ -67,11 +67,11 @@ let tests = [ |
86 |
+ expect_error (1, 6) (`Decoding_error ("\x19\xdc", "utf-16le")) |
87 |
+ begin fun report -> |
88 |
+ let chars = s |> string |> utf_16le ~report in |
89 |
+- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); |
90 |
+- next_option chars ok (assert_equal (Some 0x1F419)); |
91 |
+- next_option chars ok (assert_equal (Some (Char.code 'b'))); |
92 |
++ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'])); |
93 |
++ next_option chars ok (assert_equal (Some (Uchar.of_int 0x1F419))); |
94 |
++ next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'b')))); |
95 |
+ next_option chars ok (assert_equal (Some Uutf.u_rep)); |
96 |
+- next_n 16 chars ok (assert_equal (List.map Char.code ['a'; 'r'])); |
97 |
++ next_n 16 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['a'; 'r'])); |
98 |
+ next_option chars ok (assert_equal None); |
99 |
+ next_option chars ok (assert_equal None) |
100 |
+ end); |
101 |
+@@ -79,7 +79,7 @@ let tests = [ |
102 |
+ ("encoding.iso_8859_1" >:: fun _ -> |
103 |
+ let chars = string "foo\xa0" |> iso_8859_1 in |
104 |
+ next_n 4 chars |
105 |
+- ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'; '\xa0'])); |
106 |
++ ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'; '\xa0'])); |
107 |
+ next_option chars ok (assert_equal None); |
108 |
+ next_option chars ok (assert_equal None)); |
109 |
+ |
110 |
+@@ -88,26 +88,26 @@ let tests = [ |
111 |
+ expect_error (1, 4) (`Decoding_error ("\xa0", "us-ascii")) |
112 |
+ begin fun report -> |
113 |
+ let chars = s |> string |> us_ascii ~report in |
114 |
+- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); |
115 |
++ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'])); |
116 |
+ next_option chars ok (assert_equal (Some Uutf.u_rep)); |
117 |
+- next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r'])); |
118 |
++ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r'])); |
119 |
+ next_option chars ok (assert_equal None); |
120 |
+ next_option chars ok (assert_equal None) |
121 |
+ end); |
122 |
+ |
123 |
+ ("encoding.windows_1251" >:: fun _ -> |
124 |
+ let chars = string "foo\xe0\xe1\xe2bar" |> windows_1251 in |
125 |
+- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); |
126 |
+- next_n 3 chars ok (assert_equal [0x0430; 0x0431; 0x0432]); |
127 |
+- next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r'])); |
128 |
++ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'])); |
129 |
++ next_n 3 chars ok (assert_equal [Uchar.of_int 0x0430; Uchar.of_int 0x0431; Uchar.of_int 0x0432]); |
130 |
++ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r'])); |
131 |
+ next_option chars ok (assert_equal None); |
132 |
+ next_option chars ok (assert_equal None)); |
133 |
+ |
134 |
+ ("encoding.windows_1252" >:: fun _ -> |
135 |
+ let chars = string "foo\x80\x83bar" |> windows_1252 in |
136 |
+- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); |
137 |
+- next_n 2 chars ok (assert_equal [0x20AC; 0x0192]); |
138 |
+- next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r'])); |
139 |
++ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'])); |
140 |
++ next_n 2 chars ok (assert_equal [Uchar.of_int 0x20AC; Uchar.of_int 0x0192]); |
141 |
++ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r'])); |
142 |
+ next_option chars ok (assert_equal None); |
143 |
+ next_option chars ok (assert_equal None)); |
144 |
+ |
145 |
+@@ -137,7 +137,7 @@ let tests = [ |
146 |
+ |
147 |
+ ("encoding.ebcdic" >:: fun _ -> |
148 |
+ let chars = string "\x86\x96\x96" |> ebcdic in |
149 |
+- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); |
150 |
++ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'])); |
151 |
+ next_option chars ok (assert_equal None); |
152 |
+ next_option chars ok (assert_equal None)); |
153 |
+ ] |
154 |
+Index: markup.ml-0.7.2/test/test_html_tokenizer.ml |
155 |
+=================================================================== |
156 |
+--- markup.ml-0.7.2.orig/test/test_html_tokenizer.ml |
157 |
++++ markup.ml-0.7.2/test/test_html_tokenizer.ml |
158 |
+@@ -134,7 +134,7 @@ let tests = [ |
159 |
+ expect "�" |
160 |
+ [ 1, 1, E (`Bad_token ("�", |
161 |
+ reference, "out of range")); |
162 |
+- 1, 1, S (`Char Uutf.u_rep); |
163 |
++ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep)); |
164 |
+ 1, 35, S `EOF]; |
165 |
+ |
166 |
+ expect "�" |
167 |
+@@ -142,22 +142,22 @@ let tests = [ |
168 |
+ reference, "missing ';' at end")); |
169 |
+ 1, 1, E (`Bad_token ("�", |
170 |
+ reference, "out of range")); |
171 |
+- 1, 1, S (`Char Uutf.u_rep); |
172 |
++ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep)); |
173 |
+ 1, 34, S `EOF]; |
174 |
+ |
175 |
+ expect "�" |
176 |
+ [ 1, 1, E (`Bad_token ("�", reference, "out of range")); |
177 |
+- 1, 1, S (`Char Uutf.u_rep); |
178 |
++ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep)); |
179 |
+ 1, 9, S `EOF]; |
180 |
+ |
181 |
+ expect "�" |
182 |
+ [ 1, 1, E (`Bad_token ("�", reference, "out of range")); |
183 |
+- 1, 1, S (`Char Uutf.u_rep); |
184 |
++ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep)); |
185 |
+ 1, 11, S `EOF]; |
186 |
+ |
187 |
+ expect "�" |
188 |
+ [ 1, 1, E (`Bad_token ("�", reference, "out of range")); |
189 |
+- 1, 1, S (`Char Uutf.u_rep); |
190 |
++ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep)); |
191 |
+ 1, 5, S `EOF]; |
192 |
+ |
193 |
+ expect "" |
194 |
+@@ -264,7 +264,7 @@ let tests = [ |
195 |
+ expect ~state:`RCDATA "f\x00</foo>" |
196 |
+ ([ 1, 1, S (`Char 0x66); |
197 |
+ 1, 2, E (`Bad_token ("U+0000", "content", "null")); |
198 |
+- 1, 2, S (`Char Uutf.u_rep)] @ |
199 |
++ 1, 2, S (`Char (Uchar.to_int Uutf.u_rep))] @ |
200 |
+ (char_sequence ~start:3 "</foo>")); |
201 |
+ |
202 |
+ expect ~state:`RCDATA "<title>f</title >" |
203 |
+@@ -302,7 +302,7 @@ let tests = [ |
204 |
+ expect ~state:`RAWTEXT "f\x00</foo>" |
205 |
+ ([ 1, 1, S (`Char 0x66); |
206 |
+ 1, 2, E (`Bad_token ("U+0000", "content", "null")); |
207 |
+- 1, 2, S (`Char Uutf.u_rep)] @ |
208 |
++ 1, 2, S (`Char (Uchar.to_int Uutf.u_rep))] @ |
209 |
+ (char_sequence ~start:3 "</foo>"))); |
210 |
+ |
211 |
+ ("html.tokenizer.script-data" >:: fun _ -> |
212 |
+@@ -330,7 +330,7 @@ let tests = [ |
213 |
+ expect ~state:`Script_data "f<!--o\x00o" |
214 |
+ ((char_sequence ~no_eof:true "f<!--o") @ |
215 |
+ [1, 7, E (`Bad_token ("U+0000", "script", "null")); |
216 |
+- 1, 7, S (`Char Uutf.u_rep); |
217 |
++ 1, 7, S (`Char (Uchar.to_int Uutf.u_rep)); |
218 |
+ 1, 8, S (`Char 0x6F); |
219 |
+ 1, 9, E (`Unexpected_eoi "script"); |
220 |
+ 1, 9, S `EOF]); |
221 |
+@@ -363,7 +363,7 @@ let tests = [ |
222 |
+ expect ~state:`Script_data "f<!--a-\x00-" |
223 |
+ ((char_sequence ~no_eof:true "f<!--a-") @ |
224 |
+ [ 1, 8, E (`Bad_token ("U+0000", "script", "null")); |
225 |
+- 1, 8, S (`Char Uutf.u_rep); |
226 |
++ 1, 8, S (`Char (Uchar.to_int Uutf.u_rep)); |
227 |
+ 1, 9, S (`Char 0x02D); |
228 |
+ 1, 10, E (`Unexpected_eoi "script"); |
229 |
+ 1, 10, S `EOF]); |
230 |
+@@ -371,7 +371,7 @@ let tests = [ |
231 |
+ expect ~state:`Script_data "f<!--a--\x00--" |
232 |
+ ((char_sequence ~no_eof:true "f<!--a--") @ |
233 |
+ [ 1, 9, E (`Bad_token ("U+0000", "script", "null")); |
234 |
+- 1, 9, S (`Char Uutf.u_rep); |
235 |
++ 1, 9, S (`Char (Uchar.to_int Uutf.u_rep)); |
236 |
+ 1, 10, S (`Char 0x02D); |
237 |
+ 1, 11, S (`Char 0x02D); |
238 |
+ 1, 12, E (`Unexpected_eoi "script"); |
239 |
+@@ -380,14 +380,14 @@ let tests = [ |
240 |
+ expect ~state:`Script_data "f<!--<script>\x00" |
241 |
+ ((char_sequence ~no_eof:true "f<!--<script>") @ |
242 |
+ [ 1, 14, E (`Bad_token ("U+0000", "script", "null")); |
243 |
+- 1, 14, S (`Char Uutf.u_rep); |
244 |
++ 1, 14, S (`Char (Uchar.to_int Uutf.u_rep)); |
245 |
+ 1, 15, E (`Unexpected_eoi "script"); |
246 |
+ 1, 15, S `EOF]); |
247 |
+ |
248 |
+ expect ~state:`Script_data "f<!--<script>-\x00-" |
249 |
+ ((char_sequence ~no_eof:true "f<!--<script>-") @ |
250 |
+ [ 1, 15, E (`Bad_token ("U+0000", "script", "null")); |
251 |
+- 1, 15, S (`Char Uutf.u_rep); |
252 |
++ 1, 15, S (`Char (Uchar.to_int Uutf.u_rep)); |
253 |
+ 1, 16, S (`Char 0x2D); |
254 |
+ 1, 17, E (`Unexpected_eoi "script"); |
255 |
+ 1, 17, S `EOF]); |
256 |
+@@ -395,7 +395,7 @@ let tests = [ |
257 |
+ expect ~state:`Script_data "f<!--<script>--\x00--" |
258 |
+ ((char_sequence ~no_eof:true "f<!--<script>--") @ |
259 |
+ [ 1, 16, E (`Bad_token ("U+0000", "script", "null")); |
260 |
+- 1, 16, S (`Char Uutf.u_rep); |
261 |
++ 1, 16, S (`Char (Uchar.to_int Uutf.u_rep)); |
262 |
+ 1, 17, S (`Char 0x2D); |
263 |
+ 1, 18, S (`Char 0x2D); |
264 |
+ 1, 19, E (`Unexpected_eoi "script"); |
265 |
+@@ -413,7 +413,7 @@ let tests = [ |
266 |
+ expect ~state:`Script_data "f\x00</foo>" |
267 |
+ ([ 1, 1, S (`Char 0x66); |
268 |
+ 1, 2, E (`Bad_token ("U+0000", "content", "null")); |
269 |
+- 1, 2, S (`Char Uutf.u_rep)] @ |
270 |
++ 1, 2, S (`Char (Uchar.to_int Uutf.u_rep))] @ |
271 |
+ (char_sequence ~start:3 "</foo>"))); |
272 |
+ |
273 |
+ ("html.tokenizer.plaintext" >:: fun _ -> |
274 |
+@@ -424,7 +424,7 @@ let tests = [ |
275 |
+ expect ~state:`PLAINTEXT "f\x00</foo>" |
276 |
+ ([ 1, 1, S (`Char 0x66); |
277 |
+ 1, 2, E (`Bad_token ("U+0000", "content", "null")); |
278 |
+- 1, 2, S (`Char Uutf.u_rep)] @ |
279 |
++ 1, 2, S (`Char (Uchar.to_int Uutf.u_rep))] @ |
280 |
+ (char_sequence ~start:3 "</foo>"))); |
281 |
+ |
282 |
+ ("html.tokenizer.comment" >:: fun _ -> |
283 |
+Index: markup.ml-0.7.2/test/test_input.ml |
284 |
+=================================================================== |
285 |
+--- markup.ml-0.7.2.orig/test/test_input.ml |
286 |
++++ markup.ml-0.7.2/test/test_input.ml |
287 |
+@@ -71,7 +71,7 @@ let tests = [ |
288 |
+ end); |
289 |
+ |
290 |
+ ("input.bom" >:: fun _ -> |
291 |
+- [0xFEFF; 0x66] |
292 |
++ [Uchar.of_int 0xFEFF; Uchar.of_int 0x66] |
293 |
+ |> of_list |
294 |
+ |> preprocess is_valid_xml_char Error.ignore_errors |
295 |
+ |> fst |
296 |
|
297 |
diff --git a/dev-ml/markup/files/uutf.patch b/dev-ml/markup/files/uutf.patch |
298 |
new file mode 100644 |
299 |
index 00000000..f561084 |
300 |
--- /dev/null |
301 |
+++ b/dev-ml/markup/files/uutf.patch |
302 |
@@ -0,0 +1,1085 @@ |
303 |
+Index: markup.ml-0.7.2/src/common.ml |
304 |
+=================================================================== |
305 |
+--- markup.ml-0.7.2.orig/src/common.ml |
306 |
++++ markup.ml-0.7.2/src/common.ml |
307 |
+@@ -134,7 +134,7 @@ let is_printable = is_in_range 0x0020 0x |
308 |
+ let char c = |
309 |
+ if is_printable c then begin |
310 |
+ let buffer = Buffer.create 4 in |
311 |
+- add_utf_8 buffer c; |
312 |
++ add_utf_8 buffer (Uchar.of_int c); |
313 |
+ Buffer.contents buffer |
314 |
+ end |
315 |
+ else |
316 |
+Index: markup.ml-0.7.2/src/detect.ml |
317 |
+=================================================================== |
318 |
+--- markup.ml-0.7.2.orig/src/detect.ml |
319 |
++++ markup.ml-0.7.2/src/detect.ml |
320 |
+@@ -222,7 +222,7 @@ let meta_tag_prescan = |
321 |
+ let rec iterate () = |
322 |
+ next source throw (fun () -> k "") (function |
323 |
+ | c when c = quote -> k (Buffer.contents buffer) |
324 |
+- | c -> add_utf_8 buffer (Char.code (Char.lowercase c)); iterate ()) |
325 |
++ | c -> add_utf_8 buffer (Uchar.of_int (Char.code (Char.lowercase c))); iterate ()) |
326 |
+ in |
327 |
+ iterate () |
328 |
+ in |
329 |
+@@ -236,7 +236,7 @@ let meta_tag_prescan = |
330 |
+ push source c; |
331 |
+ k (Buffer.contents buffer) |
332 |
+ | c -> |
333 |
+- add_utf_8 buffer (Char.code (Char.lowercase c)); |
334 |
++ add_utf_8 buffer (Uchar.of_int (Char.code (Char.lowercase c))); |
335 |
+ iterate ()) |
336 |
+ in |
337 |
+ iterate () |
338 |
+@@ -315,7 +315,7 @@ let meta_tag_prescan = |
339 |
+ k (Buffer.contents buffer) |
340 |
+ |
341 |
+ | Some c -> |
342 |
+- add_utf_8 buffer (Char.code (Char.lowercase c)); |
343 |
++ add_utf_8 buffer (Uchar.of_int (Char.code (Char.lowercase c))); |
344 |
+ iterate () |
345 |
+ end |
346 |
+ in |
347 |
+Index: markup.ml-0.7.2/src/encoding.ml |
348 |
+=================================================================== |
349 |
+--- markup.ml-0.7.2.orig/src/encoding.ml |
350 |
++++ markup.ml-0.7.2/src/encoding.ml |
351 |
+@@ -4,7 +4,7 @@ |
352 |
+ open Common |
353 |
+ open Kstream |
354 |
+ |
355 |
+-type t = ?report:Error.parse_handler -> char Kstream.t -> int Kstream.t |
356 |
++type t = ?report:Error.parse_handler -> char Kstream.t -> Uchar.t Kstream.t |
357 |
+ |
358 |
+ let wrap f = fun ?(report = Error.ignore_errors) s -> f report s |
359 |
+ |
360 |
+@@ -24,8 +24,8 @@ let _uutf_decoder encoding name = |
361 |
+ k Uutf.u_rep) |
362 |
+ | `Await -> |
363 |
+ next bytes throw |
364 |
+- (fun () -> Uutf.Manual.src decoder "" 0 0; run ()) |
365 |
+- (fun c -> Uutf.Manual.src decoder (String.make 1 c) 0 1; run ()) |
366 |
++ (fun () -> Uutf.Manual.src decoder Bytes.empty 0 0; run ()) |
367 |
++ (fun c -> Uutf.Manual.src decoder (Bytes.make 1 c) 0 1; run ()) |
368 |
+ in |
369 |
+ run ()) |
370 |
+ |> make) |
371 |
+@@ -87,7 +87,7 @@ let _ucs_4_decoder arrange name = |
372 |
+ let skip = |
373 |
+ if !first then begin |
374 |
+ first := false; |
375 |
+- scalar = Uutf.u_bom |
376 |
++ scalar = Uchar.to_int Uutf.u_bom |
377 |
+ end |
378 |
+ else |
379 |
+ false |
380 |
+@@ -96,9 +96,9 @@ let _ucs_4_decoder arrange name = |
381 |
+ if skip then run () |
382 |
+ else |
383 |
+ if scalar = 0x000A then |
384 |
+- newline k scalar |
385 |
++ newline k (Uchar.of_int scalar) |
386 |
+ else |
387 |
+- char k scalar |
388 |
++ char k (Uchar.of_int scalar) |
389 |
+ |
390 |
+ | [] -> empty () |
391 |
+ |
392 |
+@@ -130,7 +130,7 @@ let code_page table = |
393 |
+ |
394 |
+ (fun _ bytes -> |
395 |
+ (fun throw empty k -> |
396 |
+- next bytes throw empty (fun c -> k table.(Char.code c))) |
397 |
++ next bytes throw empty (fun c -> k (Uchar.of_int table.(Char.code c)))) |
398 |
+ |> make) |
399 |
+ |> wrap |
400 |
+ |
401 |
+Index: markup.ml-0.7.2/src/html_parser.ml |
402 |
+=================================================================== |
403 |
+--- markup.ml-0.7.2.orig/src/html_parser.ml |
404 |
++++ markup.ml-0.7.2/src/html_parser.ml |
405 |
+@@ -1022,7 +1022,7 @@ let parse requested_context report (toke |
406 |
+ let frameset_ok = ref true in |
407 |
+ let head_seen = ref false in |
408 |
+ |
409 |
+- let add_character = Text.add text in |
410 |
++ let add_character = (fun x y -> Text.add text x (Uchar.of_int y)) in |
411 |
+ |
412 |
+ set_foreign (fun () -> |
413 |
+ Stack.current_element_is_foreign context open_elements); |
414 |
+@@ -2717,7 +2717,7 @@ let parse requested_context report (toke |
415 |
+ | l, `Char 0 -> |
416 |
+ report l (`Bad_token ("U+0000", "foreign content", "null")) !throw |
417 |
+ (fun () -> |
418 |
+- add_character l Uutf.u_rep; |
419 |
++ add_character l (Uchar.to_int Uutf.u_rep); |
420 |
+ mode ()) |
421 |
+ |
422 |
+ | l, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020 as c) -> |
423 |
+Index: markup.ml-0.7.2/src/html_tokenizer.ml |
424 |
+=================================================================== |
425 |
+--- markup.ml-0.7.2.orig/src/html_tokenizer.ml |
426 |
++++ markup.ml-0.7.2/src/html_tokenizer.ml |
427 |
+@@ -252,7 +252,7 @@ let tokenize report (input, get_location |
428 |
+ report location |
429 |
+ (`Bad_token (prefix ^ text ^ semicolon, "character reference", |
430 |
+ "Windows-1252 character")) !throw (fun () -> |
431 |
+- k (Some (`One n))) |
432 |
++ k (Some (`One (Uchar.of_int n)))) |
433 |
+ |
434 |
+ else |
435 |
+ match n with |
436 |
+@@ -268,9 +268,9 @@ let tokenize report (input, get_location |
437 |
+ (`Bad_token (prefix ^ text ^ semicolon, |
438 |
+ "character reference", |
439 |
+ "invalid HTML character")) !throw (fun () -> |
440 |
+- k (Some (`One n))) |
441 |
++ k (Some (`One (Uchar.of_int n)))) |
442 |
+ |
443 |
+- | n -> k (Some (`One n)) |
444 |
++ | n -> k (Some (`One (Uchar.of_int n))) |
445 |
+ end |
446 |
+ end |
447 |
+ in |
448 |
+@@ -366,6 +366,10 @@ let tokenize report (input, get_location |
449 |
+ | _ -> unterminated ()) |
450 |
+ in |
451 |
+ |
452 |
++ let ma = function |
453 |
++ a, `One x -> (a, `One (Uchar.of_int x)) |
454 |
++ | a, `Two (x,y) -> (a, `Two (Uchar.of_int x, Uchar.of_int y)) in |
455 |
++ |
456 |
+ let rec match_named best matched replace candidate = |
457 |
+ next_option input !throw (function |
458 |
+ | None -> finish best matched replace |
459 |
+@@ -377,8 +381,8 @@ let tokenize report (input, get_location |
460 |
+ | `None -> finish best matched (v::replace) |
461 |
+ | `Continue -> match_named best matched (v::replace) candidate |
462 |
+ | `Match_and_continue m -> |
463 |
+- match_named (Some m) (v::(replace @ matched)) [] candidate |
464 |
+- | `Match m -> finish (Some m) (v::matched) []) |
465 |
++ match_named (Some (ma m)) (v::(replace @ matched)) [] candidate |
466 |
++ | `Match m -> finish (Some (ma m)) (v::matched) []) |
467 |
+ in |
468 |
+ match_named None [] [] "") |
469 |
+ |
470 |
+@@ -409,11 +413,11 @@ let tokenize report (input, get_location |
471 |
+ emit (l, `Char 0x0026) state |
472 |
+ |
473 |
+ | Some (`One c) -> |
474 |
+- emit (l, `Char c) state |
475 |
++ emit (l, `Char (Uchar.to_int c)) state |
476 |
+ |
477 |
+ | Some (`Two (c, c')) -> |
478 |
+- emit (l, `Char c) (fun () -> |
479 |
+- emit (l, `Char c') state) |
480 |
++ emit (l, `Char (Uchar.to_int c)) (fun () -> |
481 |
++ emit (l, `Char (Uchar.to_int c')) state) |
482 |
+ end |
483 |
+ |
484 |
+ (* 8.2.4.3. *) |
485 |
+@@ -427,7 +431,7 @@ let tokenize report (input, get_location |
486 |
+ |
487 |
+ | Some (l, 0) -> |
488 |
+ report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () -> |
489 |
+- emit (l, `Char Uutf.u_rep) rcdata_state) |
490 |
++ emit (l, `Char (Uchar.to_int Uutf.u_rep)) rcdata_state) |
491 |
+ |
492 |
+ | None -> |
493 |
+ emit_eof () |
494 |
+@@ -444,7 +448,7 @@ let tokenize report (input, get_location |
495 |
+ |
496 |
+ | Some (l, 0) -> |
497 |
+ report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () -> |
498 |
+- emit (l, `Char Uutf.u_rep) rawtext_state) |
499 |
++ emit (l, `Char (Uchar.to_int Uutf.u_rep)) rawtext_state) |
500 |
+ |
501 |
+ | None -> |
502 |
+ emit_eof () |
503 |
+@@ -461,7 +465,7 @@ let tokenize report (input, get_location |
504 |
+ |
505 |
+ | Some (l, 0) -> |
506 |
+ report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () -> |
507 |
+- emit_character l Uutf.u_rep script_data_state) |
508 |
++ emit_character l (Uchar.to_int Uutf.u_rep) script_data_state) |
509 |
+ |
510 |
+ | None -> |
511 |
+ emit_eof () |
512 |
+@@ -475,7 +479,7 @@ let tokenize report (input, get_location |
513 |
+ next_option input !throw begin function |
514 |
+ | Some (l, 0) -> |
515 |
+ report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () -> |
516 |
+- emit (l, `Char Uutf.u_rep) plaintext_state) |
517 |
++ emit (l, `Char (Uchar.to_int Uutf.u_rep)) plaintext_state) |
518 |
+ |
519 |
+ | None -> |
520 |
+ emit_eof () |
521 |
+@@ -501,7 +505,7 @@ let tokenize report (input, get_location |
522 |
+ end_tag_open_state l' tag |
523 |
+ |
524 |
+ | Some (_, c) when is_alphabetic c -> |
525 |
+- add_utf_8 tag._tag_name (to_lowercase c); |
526 |
++ add_utf_8 tag._tag_name (Uchar.of_int (to_lowercase c)); |
527 |
+ tag_name_state l' tag |
528 |
+ |
529 |
+ | Some (_, 0x003F) -> |
530 |
+@@ -529,7 +533,7 @@ let tokenize report (input, get_location |
531 |
+ |
532 |
+ next_option input !throw begin function |
533 |
+ | Some (_, c) when is_alphabetic c -> |
534 |
+- add_utf_8 tag._tag_name (to_lowercase c); |
535 |
++ add_utf_8 tag._tag_name (Uchar.of_int (to_lowercase c)); |
536 |
+ tag_name_state l' tag |
537 |
+ |
538 |
+ | Some (_, 0x003E) -> |
539 |
+@@ -569,7 +573,7 @@ let tokenize report (input, get_location |
540 |
+ report (get_location ()) (`Unexpected_eoi "tag") !throw data_state |
541 |
+ |
542 |
+ | Some (_, c) -> |
543 |
+- add_utf_8 tag._tag_name (to_lowercase c); |
544 |
++ add_utf_8 tag._tag_name (Uchar.of_int (to_lowercase c)); |
545 |
+ tag_name_state l' tag |
546 |
+ end |
547 |
+ |
548 |
+@@ -589,7 +593,7 @@ let tokenize report (input, get_location |
549 |
+ next_option input !throw begin function |
550 |
+ | Some (_, c as v) when is_alphabetic c -> |
551 |
+ let name_buffer = Buffer.create 32 in |
552 |
+- add_utf_8 name_buffer (to_lowercase c); |
553 |
++ add_utf_8 name_buffer (Uchar.of_int (to_lowercase c)); |
554 |
+ text_end_tag_name_state state l' (v::cs) name_buffer |
555 |
+ |
556 |
+ | maybe_v -> |
557 |
+@@ -618,7 +622,7 @@ let tokenize report (input, get_location |
558 |
+ emit_tag l' (create_tag ()) |
559 |
+ |
560 |
+ | Some ((_, c) as v) when is_alphabetic c -> |
561 |
+- add_utf_8 name_buffer (to_lowercase c); |
562 |
++ add_utf_8 name_buffer (Uchar.of_int (to_lowercase c)); |
563 |
+ text_end_tag_name_state state l' (v::cs) name_buffer |
564 |
+ |
565 |
+ | maybe_v -> |
566 |
+@@ -676,7 +680,7 @@ let tokenize report (input, get_location |
567 |
+ |
568 |
+ | Some (l, 0) -> |
569 |
+ report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () -> |
570 |
+- emit_character l Uutf.u_rep (fun () -> |
571 |
++ emit_character l (Uchar.to_int Uutf.u_rep) (fun () -> |
572 |
+ script_data_escaped_state l')) |
573 |
+ |
574 |
+ | None -> |
575 |
+@@ -699,7 +703,7 @@ let tokenize report (input, get_location |
576 |
+ |
577 |
+ | Some (l, 0) -> |
578 |
+ report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () -> |
579 |
+- emit_character l Uutf.u_rep (fun () -> |
580 |
++ emit_character l (Uchar.to_int Uutf.u_rep) (fun () -> |
581 |
+ script_data_escaped_state l')) |
582 |
+ |
583 |
+ | None -> |
584 |
+@@ -725,7 +729,7 @@ let tokenize report (input, get_location |
585 |
+ |
586 |
+ | Some (l, 0) -> |
587 |
+ report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () -> |
588 |
+- emit_character l Uutf.u_rep (fun () -> |
589 |
++ emit_character l (Uchar.to_int Uutf.u_rep) (fun () -> |
590 |
+ script_data_escaped_state l')) |
591 |
+ |
592 |
+ | None -> |
593 |
+@@ -745,7 +749,7 @@ let tokenize report (input, get_location |
594 |
+ |
595 |
+ | Some (_, c as v) when is_alphabetic c -> |
596 |
+ let tag_buffer = Buffer.create 32 in |
597 |
+- add_utf_8 tag_buffer (to_lowercase c); |
598 |
++ add_utf_8 tag_buffer (Uchar.of_int (to_lowercase c)); |
599 |
+ emit_characters (List.rev (v::cs)) (fun () -> |
600 |
+ script_data_double_escape_start_state l' tag_buffer) |
601 |
+ |
602 |
+@@ -765,7 +769,7 @@ let tokenize report (input, get_location |
603 |
+ else script_data_escaped_state l') |
604 |
+ |
605 |
+ | Some (l, c) when is_alphabetic c -> |
606 |
+- add_utf_8 tag_buffer (to_lowercase c); |
607 |
++ add_utf_8 tag_buffer (Uchar.of_int (to_lowercase c)); |
608 |
+ emit_character l c (fun () -> |
609 |
+ script_data_double_escape_start_state l' tag_buffer) |
610 |
+ |
611 |
+@@ -787,7 +791,7 @@ let tokenize report (input, get_location |
612 |
+ |
613 |
+ | Some (l, 0) -> |
614 |
+ report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () -> |
615 |
+- emit_character l Uutf.u_rep (fun () -> |
616 |
++ emit_character l (Uchar.to_int Uutf.u_rep) (fun () -> |
617 |
+ script_data_double_escaped_state l')) |
618 |
+ |
619 |
+ | None -> |
620 |
+@@ -811,7 +815,7 @@ let tokenize report (input, get_location |
621 |
+ |
622 |
+ | Some (l, 0) -> |
623 |
+ report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () -> |
624 |
+- emit_character l Uutf.u_rep (fun () -> |
625 |
++ emit_character l (Uchar.to_int Uutf.u_rep) (fun () -> |
626 |
+ script_data_double_escaped_state l')) |
627 |
+ |
628 |
+ | None -> |
629 |
+@@ -838,7 +842,7 @@ let tokenize report (input, get_location |
630 |
+ |
631 |
+ | Some (l, 0) -> |
632 |
+ report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () -> |
633 |
+- emit_character l Uutf.u_rep (fun () -> |
634 |
++ emit_character l (Uchar.to_int Uutf.u_rep) (fun () -> |
635 |
+ script_data_double_escaped_state l')) |
636 |
+ |
637 |
+ | None -> |
638 |
+@@ -872,7 +876,7 @@ let tokenize report (input, get_location |
639 |
+ else script_data_double_escaped_state l') |
640 |
+ |
641 |
+ | Some (l, c) when is_alphabetic c -> |
642 |
+- add_utf_8 tag_buffer (to_lowercase c); |
643 |
++ add_utf_8 tag_buffer (Uchar.of_int (to_lowercase c)); |
644 |
+ emit_character l c (fun () -> |
645 |
+ script_data_double_escape_end_state l' tag_buffer) |
646 |
+ |
647 |
+@@ -910,10 +914,10 @@ let tokenize report (input, get_location |
648 |
+ | Some (l, (0x0022 | 0x0027 | 0x003C | 0x003D as c)) -> |
649 |
+ report l (`Bad_token (char c, "attribute name", |
650 |
+ "invalid start character")) !throw (fun () -> |
651 |
+- start_attribute c) |
652 |
++ start_attribute (Uchar.of_int c)) |
653 |
+ |
654 |
+ | Some (_, c) -> |
655 |
+- start_attribute (to_lowercase c) |
656 |
++ start_attribute (Uchar.of_int (to_lowercase c)) |
657 |
+ end |
658 |
+ |
659 |
+ (* 8.2.4.35. *) |
660 |
+@@ -942,14 +946,14 @@ let tokenize report (input, get_location |
661 |
+ | Some (l, (0x0022 | 0x0027 | 0x003C as c)) -> |
662 |
+ report l (`Bad_token (char c, "attribute name", |
663 |
+ "invalid name character")) !throw (fun () -> |
664 |
+- add_utf_8 name_buffer c; |
665 |
++ add_utf_8 name_buffer (Uchar.of_int c); |
666 |
+ attribute_name_state l' tag name_buffer) |
667 |
+ |
668 |
+ | None -> |
669 |
+ report (get_location ()) (`Unexpected_eoi "tag") !throw data_state |
670 |
+ |
671 |
+ | Some (_, c) -> |
672 |
+- add_utf_8 name_buffer (to_lowercase c); |
673 |
++ add_utf_8 name_buffer (Uchar.of_int (to_lowercase c)); |
674 |
+ attribute_name_state l' tag name_buffer |
675 |
+ end |
676 |
+ |
677 |
+@@ -985,13 +989,13 @@ let tokenize report (input, get_location |
678 |
+ | Some (l, (0x0022 | 0x0027 | 0x003C as c)) -> |
679 |
+ report l (`Bad_token (char c, "attribute name", |
680 |
+ "invalid start character")) !throw (fun () -> |
681 |
+- start_next_attribute c) |
682 |
++ start_next_attribute (Uchar.of_int c)) |
683 |
+ |
684 |
+ | None -> |
685 |
+ report (get_location ()) (`Unexpected_eoi "tag") !throw data_state |
686 |
+ |
687 |
+ | Some (_, c) -> |
688 |
+- start_next_attribute (to_lowercase c) |
689 |
++ start_next_attribute (Uchar.of_int (to_lowercase c)) |
690 |
+ end |
691 |
+ |
692 |
+ (* 8.2.4.37. *) |
693 |
+@@ -1030,13 +1034,13 @@ let tokenize report (input, get_location |
694 |
+ | Some (l, (0x003C | 0x003D | 0x0060 as c)) -> |
695 |
+ report l (`Bad_token (char c, "attribute value", |
696 |
+ "invalid start character")) !throw (fun () -> |
697 |
+- start_value attribute_value_unquoted_state (Some c)) |
698 |
++ start_value attribute_value_unquoted_state (Some (Uchar.of_int c))) |
699 |
+ |
700 |
+ | None -> |
701 |
+ report (get_location ()) (`Unexpected_eoi "tag") !throw data_state |
702 |
+ |
703 |
+ | Some (_, c) -> |
704 |
+- start_value attribute_value_unquoted_state (Some c) |
705 |
++ start_value attribute_value_unquoted_state (Some (Uchar.of_int c)) |
706 |
+ end |
707 |
+ |
708 |
+ (* 8.2.4.38 and 8.2.4.39. *) |
709 |
+@@ -1062,7 +1066,7 @@ let tokenize report (input, get_location |
710 |
+ data_state |
711 |
+ |
712 |
+ | Some (_, c) -> |
713 |
+- add_utf_8 value_buffer c; |
714 |
++ add_utf_8 value_buffer (Uchar.of_int c); |
715 |
+ attribute_value_quoted_state quote l' tag name value_buffer |
716 |
+ end |
717 |
+ |
718 |
+@@ -1092,14 +1096,14 @@ let tokenize report (input, get_location |
719 |
+ | Some (l, (0x0022 | 0x0027 | 0x003C | 0x003D | 0x0060 as c)) -> |
720 |
+ report l (`Bad_token (char c, "attribute value", |
721 |
+ "invalid character")) !throw (fun () -> |
722 |
+- add_utf_8 value_buffer c; |
723 |
++ add_utf_8 value_buffer (Uchar.of_int c); |
724 |
+ attribute_value_unquoted_state l' tag name value_buffer) |
725 |
+ |
726 |
+ | None -> |
727 |
+ report (get_location ()) (`Unexpected_eoi "tag") !throw data_state |
728 |
+ |
729 |
+ | Some (_, c) -> |
730 |
+- add_utf_8 value_buffer c; |
731 |
++ add_utf_8 value_buffer (Uchar.of_int c); |
732 |
+ attribute_value_unquoted_state l' tag name value_buffer |
733 |
+ end |
734 |
+ |
735 |
+@@ -1107,7 +1111,7 @@ let tokenize report (input, get_location |
736 |
+ and character_reference_in_attribute allowed l value_buffer k = |
737 |
+ consume_character_reference true (Some allowed) l begin function |
738 |
+ | None -> |
739 |
+- add_utf_8 value_buffer 0x0026; |
740 |
++ add_utf_8 value_buffer (Uchar.of_int 0x0026); |
741 |
+ k () |
742 |
+ |
743 |
+ | Some (`One c) -> |
744 |
+@@ -1176,7 +1180,7 @@ let tokenize report (input, get_location |
745 |
+ emit_comment l' buffer |
746 |
+ |
747 |
+ | Some (_, c) -> |
748 |
+- add_utf_8 buffer c; |
749 |
++ add_utf_8 buffer (Uchar.of_int c); |
750 |
+ consume () |
751 |
+ end |
752 |
+ in |
753 |
+@@ -1239,7 +1243,7 @@ let tokenize report (input, get_location |
754 |
+ emit_comment l' buffer) |
755 |
+ |
756 |
+ | Some (_, c) -> |
757 |
+- add_utf_8 buffer c; |
758 |
++ add_utf_8 buffer (Uchar.of_int c); |
759 |
+ comment_state l' buffer |
760 |
+ end |
761 |
+ |
762 |
+@@ -1266,7 +1270,7 @@ let tokenize report (input, get_location |
763 |
+ |
764 |
+ | Some (_, c) -> |
765 |
+ Buffer.add_char buffer '-'; |
766 |
+- add_utf_8 buffer c; |
767 |
++ add_utf_8 buffer (Uchar.of_int c); |
768 |
+ comment_state l' buffer |
769 |
+ end |
770 |
+ |
771 |
+@@ -1286,7 +1290,7 @@ let tokenize report (input, get_location |
772 |
+ emit_comment l' buffer) |
773 |
+ |
774 |
+ | Some (_, c) -> |
775 |
+- add_utf_8 buffer c; |
776 |
++ add_utf_8 buffer (Uchar.of_int c); |
777 |
+ comment_state l' buffer |
778 |
+ end |
779 |
+ |
780 |
+@@ -1308,7 +1312,7 @@ let tokenize report (input, get_location |
781 |
+ |
782 |
+ | Some (_, c) -> |
783 |
+ Buffer.add_char buffer '-'; |
784 |
+- add_utf_8 buffer c; |
785 |
++ add_utf_8 buffer (Uchar.of_int c); |
786 |
+ comment_state l' buffer |
787 |
+ end |
788 |
+ |
789 |
+@@ -1343,7 +1347,7 @@ let tokenize report (input, get_location |
790 |
+ report l (`Bad_token ("--" ^ (char c), "comment", |
791 |
+ "'--' should be in '-->'")) !throw (fun () -> |
792 |
+ Buffer.add_string buffer "--"; |
793 |
+- add_utf_8 buffer c; |
794 |
++ add_utf_8 buffer (Uchar.of_int c); |
795 |
+ comment_state l' buffer) |
796 |
+ end |
797 |
+ |
798 |
+@@ -1369,7 +1373,7 @@ let tokenize report (input, get_location |
799 |
+ |
800 |
+ | Some (_, c) -> |
801 |
+ Buffer.add_string buffer "--!"; |
802 |
+- add_utf_8 buffer c; |
803 |
++ add_utf_8 buffer (Uchar.of_int c); |
804 |
+ comment_state l' buffer |
805 |
+ end |
806 |
+ |
807 |
+@@ -1420,7 +1424,7 @@ let tokenize report (input, get_location |
808 |
+ |
809 |
+ | Some (_, c) -> |
810 |
+ doctype._doctype_name <- |
811 |
+- add_doctype_char doctype._doctype_name (to_lowercase c); |
812 |
++ add_doctype_char doctype._doctype_name (Uchar.of_int (to_lowercase c)); |
813 |
+ doctype_name_state l' doctype |
814 |
+ end |
815 |
+ |
816 |
+@@ -1445,7 +1449,7 @@ let tokenize report (input, get_location |
817 |
+ |
818 |
+ | Some (_, c) -> |
819 |
+ doctype._doctype_name <- |
820 |
+- add_doctype_char doctype._doctype_name (to_lowercase c); |
821 |
++ add_doctype_char doctype._doctype_name (Uchar.of_int (to_lowercase c)); |
822 |
+ doctype_name_state l' doctype |
823 |
+ end |
824 |
+ |
825 |
+@@ -1574,7 +1578,7 @@ let tokenize report (input, get_location |
826 |
+ emit_doctype ~quirks:true l' doctype) |
827 |
+ |
828 |
+ | Some (_, c) -> |
829 |
+- add doctype c; |
830 |
++ add doctype (Uchar.of_int c); |
831 |
+ doctype_identifier_quoted_state add quote next_state l' doctype |
832 |
+ end |
833 |
+ |
834 |
+Index: markup.ml-0.7.2/src/html_writer.ml |
835 |
+=================================================================== |
836 |
+--- markup.ml-0.7.2.orig/src/html_writer.ml |
837 |
++++ markup.ml-0.7.2/src/html_writer.ml |
838 |
+@@ -8,7 +8,7 @@ let _escape_attribute s = |
839 |
+ Uutf.String.fold_utf_8 (fun () _ -> function |
840 |
+ | `Malformed _ -> () |
841 |
+ | `Uchar c -> |
842 |
+- match c with |
843 |
++ match (Uchar.to_int c) with |
844 |
+ | 0x0026 -> Buffer.add_string buffer "&" |
845 |
+ | 0x00A0 -> Buffer.add_string buffer " " |
846 |
+ | 0x0022 -> Buffer.add_string buffer """ |
847 |
+@@ -21,7 +21,7 @@ let _escape_text s = |
848 |
+ Uutf.String.fold_utf_8 (fun () _ -> function |
849 |
+ | `Malformed _ -> () |
850 |
+ | `Uchar c -> |
851 |
+- match c with |
852 |
++ match (Uchar.to_int c) with |
853 |
+ | 0x0026 -> Buffer.add_string buffer "&" |
854 |
+ | 0x00A0 -> Buffer.add_string buffer " " |
855 |
+ | 0x003C -> Buffer.add_string buffer "<" |
856 |
+Index: markup.ml-0.7.2/src/input.ml |
857 |
+=================================================================== |
858 |
+--- markup.ml-0.7.2.orig/src/input.ml |
859 |
++++ markup.ml-0.7.2/src/input.ml |
860 |
+@@ -27,13 +27,13 @@ let preprocess is_valid_char report sour |
861 |
+ in |
862 |
+ |
863 |
+ let rec iterate () = |
864 |
+- next source throw empty (function |
865 |
++ next source throw empty (fun x -> match Uchar.to_int x with |
866 |
+ | 0xFEFF when !first_char -> first_char := false; iterate () |
867 |
+ |
868 |
+ | 0x0D -> |
869 |
+- next source throw newline (function |
870 |
++ next source throw newline (fun y -> match Uchar.to_int y with |
871 |
+ | 0x0A -> newline () |
872 |
+- | c -> push source c; newline ()) |
873 |
++ | c -> push source (Uchar.of_int c); newline ()) |
874 |
+ |
875 |
+ | 0x0A -> newline () |
876 |
+ |
877 |
+Index: markup.ml-0.7.2/src/input.mli |
878 |
+=================================================================== |
879 |
+--- markup.ml-0.7.2.orig/src/input.mli |
880 |
++++ markup.ml-0.7.2/src/input.mli |
881 |
+@@ -4,5 +4,5 @@ |
882 |
+ open Common |
883 |
+ |
884 |
+ val preprocess : |
885 |
+- (int -> bool) -> Error.parse_handler -> int Kstream.t -> |
886 |
++ (int -> bool) -> Error.parse_handler -> Uchar.t Kstream.t -> |
887 |
+ (location * int) Kstream.t * (unit -> location) |
888 |
+Index: markup.ml-0.7.2/src/markup.ml |
889 |
+=================================================================== |
890 |
+--- markup.ml-0.7.2.orig/src/markup.ml |
891 |
++++ markup.ml-0.7.2/src/markup.ml |
892 |
+@@ -187,7 +187,7 @@ sig |
893 |
+ |
894 |
+ val decode : |
895 |
+ ?report:(location -> Error.t -> unit io) -> t -> |
896 |
+- (char, _) stream -> (int, async) stream |
897 |
++ (char, _) stream -> (Uchar.t, async) stream |
898 |
+ end |
899 |
+ |
900 |
+ val parse_xml : |
901 |
+Index: markup.ml-0.7.2/src/markup.mli |
902 |
+=================================================================== |
903 |
+--- markup.ml-0.7.2.orig/src/markup.mli |
904 |
++++ markup.ml-0.7.2/src/markup.mli |
905 |
+@@ -194,7 +194,7 @@ sig |
906 |
+ |
907 |
+ val decode : |
908 |
+ ?report:(location -> Error.t -> unit) -> t -> |
909 |
+- (char, 's) stream -> (int, 's) stream |
910 |
++ (char, 's) stream -> (Uchar.t, 's) stream |
911 |
+ (** Applies a decoder to a byte stream. Illegal input byte sequences result in |
912 |
+ calls to the error handler [~report] with error kind [`Decoding_error]. |
913 |
+ The illegal bytes are then skipped, and zero or more U+FFFD replacement |
914 |
+@@ -764,7 +764,7 @@ sig |
915 |
+ |
916 |
+ val decode : |
917 |
+ ?report:(location -> Error.t -> unit io) -> Encoding.t -> |
918 |
+- (char, _) stream -> (int, async) stream |
919 |
++ (char, _) stream -> (Uchar.t, async) stream |
920 |
+ end |
921 |
+ |
922 |
+ (** {2 XML} *) |
923 |
+@@ -838,7 +838,7 @@ val kstream : ('a, _) stream -> 'a Kstre |
924 |
+ val of_kstream : 'a Kstream.t -> ('a, _) stream |
925 |
+ |
926 |
+ val preprocess_input_stream : |
927 |
+- (int, 's) stream -> (location * int, 's) stream * (unit -> location) |
928 |
++ (Uchar.t, 's) stream -> (location * int, 's) stream * (unit -> location) |
929 |
+ |
930 |
+ (**/**) |
931 |
+ |
932 |
+Index: markup.ml-0.7.2/src/utility.ml |
933 |
+=================================================================== |
934 |
+--- markup.ml-0.7.2.orig/src/utility.ml |
935 |
++++ markup.ml-0.7.2/src/utility.ml |
936 |
+@@ -346,11 +346,11 @@ let xhtml_entity name = |
937 |
+ |
938 |
+ match lookup 0 with |
939 |
+ | `One c -> |
940 |
+- add_utf_8 buffer c; |
941 |
++ add_utf_8 buffer (Uchar.of_int c); |
942 |
+ Some (Buffer.contents buffer) |
943 |
+ | `Two (c, c') -> |
944 |
+- add_utf_8 buffer c; |
945 |
+- add_utf_8 buffer c'; |
946 |
++ add_utf_8 buffer (Uchar.of_int c); |
947 |
++ add_utf_8 buffer (Uchar.of_int c'); |
948 |
+ Some (Buffer.contents buffer) |
949 |
+ |
950 |
+ with Exit -> None |
951 |
+Index: markup.ml-0.7.2/src/xml_tokenizer.ml |
952 |
+=================================================================== |
953 |
+--- markup.ml-0.7.2.orig/src/xml_tokenizer.ml |
954 |
++++ markup.ml-0.7.2/src/xml_tokenizer.ml |
955 |
+@@ -101,7 +101,7 @@ let tokenize report resolve_reference (i |
956 |
+ end |
957 |
+ |
958 |
+ | _, c when filter c -> |
959 |
+- add_utf_8 buffer c; |
960 |
++ add_utf_8 buffer (Uchar.of_int c); |
961 |
+ read () |
962 |
+ |
963 |
+ | l, c -> |
964 |
+@@ -133,7 +133,7 @@ let tokenize report resolve_reference (i |
965 |
+ |
966 |
+ | _, c when is_name_start_char c -> |
967 |
+ let buffer = Buffer.create 32 in |
968 |
+- add_utf_8 buffer c; |
969 |
++ add_utf_8 buffer (Uchar.of_int c); |
970 |
+ let rec read () = |
971 |
+ next input !throw unexpected_eoi begin function |
972 |
+ | _, 0x003B -> |
973 |
+@@ -146,7 +146,7 @@ let tokenize report resolve_reference (i |
974 |
+ end |
975 |
+ |
976 |
+ | _, c when is_name_char c -> |
977 |
+- add_utf_8 buffer c; |
978 |
++ add_utf_8 buffer (Uchar.of_int c); |
979 |
+ read () |
980 |
+ |
981 |
+ | l, c -> |
982 |
+@@ -218,7 +218,7 @@ let tokenize report resolve_reference (i |
983 |
+ report_if (not @@ is_name_start_char c) l (fun () -> |
984 |
+ `Bad_token (char c, "attribute", "invalid start character")) |
985 |
+ !throw (fun () -> |
986 |
+- add_utf_8 name_buffer c; |
987 |
++ add_utf_8 name_buffer (Uchar.of_int c); |
988 |
+ name_state ()) |
989 |
+ end |
990 |
+ |
991 |
+@@ -235,7 +235,7 @@ let tokenize report resolve_reference (i |
992 |
+ report_if (not @@ is_name_start_char c) l (fun () -> |
993 |
+ `Bad_token (char c, "attribute", "invalid name character")) |
994 |
+ !throw (fun () -> |
995 |
+- add_utf_8 name_buffer c; |
996 |
++ add_utf_8 name_buffer (Uchar.of_int c); |
997 |
+ name_state ()) |
998 |
+ end |
999 |
+ |
1000 |
+@@ -275,14 +275,14 @@ let tokenize report resolve_reference (i |
1001 |
+ report l |
1002 |
+ (`Bad_token ("&", "attribute", "replace with '&'")) |
1003 |
+ !throw (fun () -> |
1004 |
+- add_utf_8 value_buffer 0x0026; |
1005 |
++ add_utf_8 value_buffer (Uchar.of_int 0x0026); |
1006 |
+ state ()) |
1007 |
+ end |
1008 |
+ |
1009 |
+ and handle_lt l state = |
1010 |
+ report l (`Bad_token ("<", "attribute", "replace with '<'")) !throw |
1011 |
+ (fun () -> |
1012 |
+- add_utf_8 value_buffer 0x003C; |
1013 |
++ add_utf_8 value_buffer (Uchar.of_int 0x003C); |
1014 |
+ state ()) |
1015 |
+ |
1016 |
+ and quoted_value_state quote = |
1017 |
+@@ -300,7 +300,7 @@ let tokenize report resolve_reference (i |
1018 |
+ quoted_value_state quote) |
1019 |
+ |
1020 |
+ | _, c -> |
1021 |
+- add_utf_8 value_buffer c; |
1022 |
++ add_utf_8 value_buffer (Uchar.of_int c); |
1023 |
+ quoted_value_state quote |
1024 |
+ end |
1025 |
+ |
1026 |
+@@ -317,7 +317,7 @@ let tokenize report resolve_reference (i |
1027 |
+ handle_lt l unquoted_value_state |
1028 |
+ |
1029 |
+ | _, c -> |
1030 |
+- add_utf_8 value_buffer c; |
1031 |
++ add_utf_8 value_buffer (Uchar.of_int c); |
1032 |
+ unquoted_value_state () |
1033 |
+ end |
1034 |
+ |
1035 |
+@@ -372,7 +372,7 @@ let tokenize report resolve_reference (i |
1036 |
+ report_if (not @@ is_name_start_char c) l (fun () -> |
1037 |
+ `Bad_token (char c, pi, "invalid start character")) !throw |
1038 |
+ (fun () -> |
1039 |
+- add_utf_8 target_buffer c; |
1040 |
++ add_utf_8 target_buffer (Uchar.of_int c); |
1041 |
+ target_state ()) |
1042 |
+ end |
1043 |
+ |
1044 |
+@@ -388,13 +388,13 @@ let tokenize report resolve_reference (i |
1045 |
+ report_if (not @@ is_name_char c) l (fun () -> |
1046 |
+ `Bad_token (char c, pi, "invalid name character")) !throw |
1047 |
+ (fun () -> |
1048 |
+- add_utf_8 target_buffer c; |
1049 |
++ add_utf_8 target_buffer (Uchar.of_int c); |
1050 |
+ target_state ()) |
1051 |
+ end |
1052 |
+ |
1053 |
+ and text_state () = |
1054 |
+ next' pi finish_pi (fun (_, c) -> |
1055 |
+- add_utf_8 text_buffer c; |
1056 |
++ add_utf_8 text_buffer (Uchar.of_int c); |
1057 |
+ text_state ()) |
1058 |
+ |
1059 |
+ and xml_declaration_state () = |
1060 |
+@@ -572,7 +572,7 @@ let tokenize report resolve_reference (i |
1061 |
+ and initial_state () = |
1062 |
+ next input !throw (fun () -> emit_eoi ()) begin function |
1063 |
+ | l, (0x005D as c) -> |
1064 |
+- add_character l c; |
1065 |
++ add_character l (Uchar.of_int c); |
1066 |
+ one_bracket_state l |
1067 |
+ |
1068 |
+ | l, 0x003C -> |
1069 |
+@@ -583,7 +583,7 @@ let tokenize report resolve_reference (i |
1070 |
+ | None -> |
1071 |
+ report l (`Bad_token (char c, "text", "replace with '&'")) |
1072 |
+ !throw (fun () -> |
1073 |
+- add_character l c; |
1074 |
++ add_character l (Uchar.of_int c); |
1075 |
+ initial_state ()) |
1076 |
+ |
1077 |
+ | Some s -> |
1078 |
+@@ -591,14 +591,14 @@ let tokenize report resolve_reference (i |
1079 |
+ initial_state ()) |
1080 |
+ |
1081 |
+ | l, c -> |
1082 |
+- add_character l c; |
1083 |
++ add_character l (Uchar.of_int c); |
1084 |
+ initial_state () |
1085 |
+ end |
1086 |
+ |
1087 |
+ and one_bracket_state l' = |
1088 |
+ next_option input !throw begin function |
1089 |
+ | Some (l, (0x005D as c)) -> |
1090 |
+- add_character l c; |
1091 |
++ add_character l (Uchar.of_int c); |
1092 |
+ two_brackets_state l' l |
1093 |
+ |
1094 |
+ | v -> |
1095 |
+@@ -611,11 +611,11 @@ let tokenize report resolve_reference (i |
1096 |
+ | Some (l, (0x003E as c)) -> |
1097 |
+ report l' (`Bad_token ("]]>", "text", "must end a CDATA section")) |
1098 |
+ !throw (fun () -> |
1099 |
+- add_character l c; |
1100 |
++ add_character l (Uchar.of_int c); |
1101 |
+ initial_state ()) |
1102 |
+ |
1103 |
+ | Some (l, (0x005D as c)) -> |
1104 |
+- add_character l c; |
1105 |
++ add_character l (Uchar.of_int c); |
1106 |
+ two_brackets_state l'' l |
1107 |
+ |
1108 |
+ | v -> |
1109 |
+@@ -626,7 +626,7 @@ let tokenize report resolve_reference (i |
1110 |
+ and begin_markup_state l' = |
1111 |
+ let recover v = |
1112 |
+ lt_in_text l' (fun () -> |
1113 |
+- add_character l' 0x003C; |
1114 |
++ add_character l' (Uchar.of_int 0x003C); |
1115 |
+ push_option input v; |
1116 |
+ initial_state ()) |
1117 |
+ in |
1118 |
+@@ -648,7 +648,7 @@ let tokenize report resolve_reference (i |
1119 |
+ |
1120 |
+ | _, c when is_name_start_char c -> |
1121 |
+ let tag_name_buffer = Buffer.create 32 in |
1122 |
+- add_utf_8 tag_name_buffer c; |
1123 |
++ add_utf_8 tag_name_buffer (Uchar.of_int c); |
1124 |
+ start_tag_state l' tag_name_buffer |
1125 |
+ |
1126 |
+ | l, c as v -> |
1127 |
+@@ -660,7 +660,7 @@ let tokenize report resolve_reference (i |
1128 |
+ and start_tag_state l' buffer = |
1129 |
+ let recover v = |
1130 |
+ lt_in_text l' (fun () -> |
1131 |
+- add_character l' 0x003C; |
1132 |
++ add_character l' (Uchar.of_int 0x003C); |
1133 |
+ add_string l' (Buffer.contents buffer); |
1134 |
+ push_option input v; |
1135 |
+ initial_state ()) |
1136 |
+@@ -680,7 +680,7 @@ let tokenize report resolve_reference (i |
1137 |
+ attributes_state l' (Buffer.contents buffer) [] |
1138 |
+ |
1139 |
+ | _, c when is_name_char c -> |
1140 |
+- add_utf_8 buffer c; |
1141 |
++ add_utf_8 buffer (Uchar.of_int c); |
1142 |
+ start_tag_state l' buffer |
1143 |
+ |
1144 |
+ | l, c as v -> |
1145 |
+@@ -731,8 +731,8 @@ let tokenize report resolve_reference (i |
1146 |
+ and end_tag_state l' = |
1147 |
+ let recover v = |
1148 |
+ lt_in_text l' (fun () -> |
1149 |
+- add_character l' 0x003C; |
1150 |
+- add_character l' 0x002F; |
1151 |
++ add_character l' (Uchar.of_int 0x003C); |
1152 |
++ add_character l' (Uchar.of_int 0x002F); |
1153 |
+ push_option input v; |
1154 |
+ initial_state ()) |
1155 |
+ in |
1156 |
+@@ -743,7 +743,7 @@ let tokenize report resolve_reference (i |
1157 |
+ begin function |
1158 |
+ | _, c when is_name_start_char c -> |
1159 |
+ let name_buffer = Buffer.create 32 in |
1160 |
+- add_utf_8 name_buffer c; |
1161 |
++ add_utf_8 name_buffer (Uchar.of_int c); |
1162 |
+ end_tag_name_state l' name_buffer |
1163 |
+ |
1164 |
+ | l, c as v -> |
1165 |
+@@ -755,8 +755,8 @@ let tokenize report resolve_reference (i |
1166 |
+ and end_tag_name_state l' buffer = |
1167 |
+ let recover v = |
1168 |
+ lt_in_text l' (fun () -> |
1169 |
+- add_character l' 0x003C; |
1170 |
+- add_character l' 0x002F; |
1171 |
++ add_character l' (Uchar.of_int 0x003C); |
1172 |
++ add_character l' (Uchar.of_int 0x002F); |
1173 |
+ add_string l' (Buffer.contents buffer); |
1174 |
+ push_option input v; |
1175 |
+ initial_state ()) |
1176 |
+@@ -773,7 +773,7 @@ let tokenize report resolve_reference (i |
1177 |
+ end_tag_whitespace_state false l' (Buffer.contents buffer) |
1178 |
+ |
1179 |
+ | _, c when is_name_char c -> |
1180 |
+- add_utf_8 buffer c; |
1181 |
++ add_utf_8 buffer (Uchar.of_int c); |
1182 |
+ end_tag_name_state l' buffer |
1183 |
+ |
1184 |
+ | l, c as v -> |
1185 |
+@@ -821,8 +821,8 @@ let tokenize report resolve_reference (i |
1186 |
+ |
1187 |
+ | v -> |
1188 |
+ bad_comment_start "<!" l' (fun () -> |
1189 |
+- add_character l' 0x003C; |
1190 |
+- add_character l' 0x0021; |
1191 |
++ add_character l' (Uchar.of_int 0x003C); |
1192 |
++ add_character l' (Uchar.of_int 0x0021); |
1193 |
+ push_option input v; |
1194 |
+ initial_state ()) |
1195 |
+ end |
1196 |
+@@ -834,9 +834,9 @@ let tokenize report resolve_reference (i |
1197 |
+ |
1198 |
+ | v -> |
1199 |
+ bad_comment_start "<!-" l' (fun () -> |
1200 |
+- add_character l' 0x003C; |
1201 |
+- add_character l' 0x0021; |
1202 |
+- add_character l' 0x002D; |
1203 |
++ add_character l' (Uchar.of_int 0x003C); |
1204 |
++ add_character l' (Uchar.of_int 0x0021); |
1205 |
++ add_character l' (Uchar.of_int 0x002D); |
1206 |
+ push_option input v; |
1207 |
+ initial_state ()) |
1208 |
+ end |
1209 |
+@@ -852,7 +852,7 @@ let tokenize report resolve_reference (i |
1210 |
+ comment_one_dash_state l' l buffer |
1211 |
+ |
1212 |
+ | _, c -> |
1213 |
+- add_utf_8 buffer c; |
1214 |
++ add_utf_8 buffer (Uchar.of_int c); |
1215 |
+ comment_state l' buffer |
1216 |
+ end |
1217 |
+ |
1218 |
+@@ -863,8 +863,8 @@ let tokenize report resolve_reference (i |
1219 |
+ comment_two_dashes_state false l' l'' buffer |
1220 |
+ |
1221 |
+ | _, c -> |
1222 |
+- add_utf_8 buffer 0x002D; |
1223 |
+- add_utf_8 buffer c; |
1224 |
++ add_utf_8 buffer (Uchar.of_int 0x002D); |
1225 |
++ add_utf_8 buffer (Uchar.of_int c); |
1226 |
+ comment_state l' buffer |
1227 |
+ end |
1228 |
+ |
1229 |
+@@ -883,14 +883,14 @@ let tokenize report resolve_reference (i |
1230 |
+ |
1231 |
+ | _, 0x002D -> |
1232 |
+ recover (fun () -> |
1233 |
+- add_utf_8 buffer 0x002D; |
1234 |
++ add_utf_8 buffer (Uchar.of_int 0x002D); |
1235 |
+ comment_two_dashes_state true l' l'' buffer) |
1236 |
+ |
1237 |
+ | _, c -> |
1238 |
+ recover (fun () -> |
1239 |
+- add_utf_8 buffer 0x002D; |
1240 |
+- add_utf_8 buffer 0x002D; |
1241 |
+- add_utf_8 buffer c; |
1242 |
++ add_utf_8 buffer (Uchar.of_int 0x002D); |
1243 |
++ add_utf_8 buffer (Uchar.of_int 0x002D); |
1244 |
++ add_utf_8 buffer (Uchar.of_int c); |
1245 |
+ comment_state l' buffer) |
1246 |
+ end |
1247 |
+ |
1248 |
+@@ -905,9 +905,9 @@ let tokenize report resolve_reference (i |
1249 |
+ !throw (fun () -> |
1250 |
+ lt_in_text l' (fun () -> |
1251 |
+ push_list input cs; |
1252 |
+- add_character l' 0x003C; |
1253 |
+- add_character l' 0x0021; |
1254 |
+- add_character l' 0x005B; |
1255 |
++ add_character l' (Uchar.of_int 0x003C); |
1256 |
++ add_character l' (Uchar.of_int 0x0021); |
1257 |
++ add_character l' (Uchar.of_int 0x005B); |
1258 |
+ initial_state ())) |
1259 |
+ end |
1260 |
+ |
1261 |
+@@ -918,7 +918,7 @@ let tokenize report resolve_reference (i |
1262 |
+ cdata_one_bracket_state l' l |
1263 |
+ |
1264 |
+ | l, c -> |
1265 |
+- add_character l c; |
1266 |
++ add_character l (Uchar.of_int c); |
1267 |
+ cdata_state l' |
1268 |
+ end |
1269 |
+ |
1270 |
+@@ -929,8 +929,8 @@ let tokenize report resolve_reference (i |
1271 |
+ cdata_two_brackets_state l' l'' l |
1272 |
+ |
1273 |
+ | l, c -> |
1274 |
+- add_character l'' 0x005D; |
1275 |
+- add_character l c; |
1276 |
++ add_character l'' (Uchar.of_int 0x005D); |
1277 |
++ add_character l (Uchar.of_int c); |
1278 |
+ cdata_state l' |
1279 |
+ end |
1280 |
+ |
1281 |
+@@ -941,13 +941,13 @@ let tokenize report resolve_reference (i |
1282 |
+ initial_state () |
1283 |
+ |
1284 |
+ | l, 0x005D -> |
1285 |
+- add_character l'' 0x005D; |
1286 |
++ add_character l'' (Uchar.of_int 0x005D); |
1287 |
+ cdata_two_brackets_state l' l''' l |
1288 |
+ |
1289 |
+ | l, c -> |
1290 |
+- add_character l'' 0x005D; |
1291 |
+- add_character l''' 0x005D; |
1292 |
+- add_character l c; |
1293 |
++ add_character l'' (Uchar.of_int 0x005D); |
1294 |
++ add_character l''' (Uchar.of_int 0x005D); |
1295 |
++ add_character l (Uchar.of_int c); |
1296 |
+ cdata_state l' |
1297 |
+ end |
1298 |
+ |
1299 |
+@@ -963,9 +963,9 @@ let tokenize report resolve_reference (i |
1300 |
+ !throw (fun () -> |
1301 |
+ lt_in_text l' (fun () -> |
1302 |
+ push_list input cs; |
1303 |
+- add_character l' 0x003C; |
1304 |
+- add_character l' 0x0021; |
1305 |
+- add_character l' 0x0044; |
1306 |
++ add_character l' (Uchar.of_int 0x003C); |
1307 |
++ add_character l' (Uchar.of_int 0x0021); |
1308 |
++ add_character l' (Uchar.of_int 0x0044); |
1309 |
+ initial_state ())) |
1310 |
+ end |
1311 |
+ |
1312 |
+@@ -980,15 +980,15 @@ let tokenize report resolve_reference (i |
1313 |
+ emit_doctype l' buffer initial_state |
1314 |
+ |
1315 |
+ | _, (0x0022 | 0x0027 as c) -> |
1316 |
+- add_utf_8 buffer c; |
1317 |
++ add_utf_8 buffer (Uchar.of_int c); |
1318 |
+ doctype_quoted_state (fun () -> doctype_state l' buffer) c l' buffer |
1319 |
+ |
1320 |
+ | _, (0x003C as c) -> |
1321 |
+- add_utf_8 buffer c; |
1322 |
++ add_utf_8 buffer (Uchar.of_int c); |
1323 |
+ doctype_item_state (fun () -> doctype_state l' buffer) l' buffer |
1324 |
+ |
1325 |
+ | _, c -> |
1326 |
+- add_utf_8 buffer c; |
1327 |
++ add_utf_8 buffer (Uchar.of_int c); |
1328 |
+ doctype_state l' buffer |
1329 |
+ end |
1330 |
+ |
1331 |
+@@ -996,11 +996,11 @@ let tokenize report resolve_reference (i |
1332 |
+ next input !throw (fun () -> unterminated_doctype l' buffer) |
1333 |
+ begin function |
1334 |
+ | _, c when c = quote -> |
1335 |
+- add_utf_8 buffer c; |
1336 |
++ add_utf_8 buffer (Uchar.of_int c); |
1337 |
+ state () |
1338 |
+ |
1339 |
+ | _, c -> |
1340 |
+- add_utf_8 buffer c; |
1341 |
++ add_utf_8 buffer (Uchar.of_int c); |
1342 |
+ doctype_quoted_state state quote l' buffer |
1343 |
+ end |
1344 |
+ |
1345 |
+@@ -1008,18 +1008,18 @@ let tokenize report resolve_reference (i |
1346 |
+ next input !throw (fun () -> unterminated_doctype l' buffer) |
1347 |
+ begin function |
1348 |
+ | _, (0x0021 as c) -> |
1349 |
+- add_utf_8 buffer c; |
1350 |
++ add_utf_8 buffer (Uchar.of_int c); |
1351 |
+ doctype_declaration_state state l' buffer |
1352 |
+ |
1353 |
+ | l, (0x003F as c) -> |
1354 |
+- add_utf_8 buffer c; |
1355 |
+- let undo = tap (fun (_, c) -> add_utf_8 buffer c) input in |
1356 |
++ add_utf_8 buffer (Uchar.of_int c); |
1357 |
++ let undo = tap (fun (_, c) -> add_utf_8 buffer (Uchar.of_int c)) input in |
1358 |
+ parse_declaration_or_processing_instruction l (fun _ -> |
1359 |
+ undo (); |
1360 |
+ state ()) |
1361 |
+ |
1362 |
+ | _, c -> |
1363 |
+- add_utf_8 buffer c; |
1364 |
++ add_utf_8 buffer (Uchar.of_int c); |
1365 |
+ state () |
1366 |
+ end |
1367 |
+ |
1368 |
+@@ -1027,16 +1027,16 @@ let tokenize report resolve_reference (i |
1369 |
+ next input !throw (fun () -> unterminated_doctype l' buffer) |
1370 |
+ begin function |
1371 |
+ | _, (0x003E as c) -> |
1372 |
+- add_utf_8 buffer c; |
1373 |
++ add_utf_8 buffer (Uchar.of_int c); |
1374 |
+ state () |
1375 |
+ |
1376 |
+ | _, (0x0022 | 0x0027 as c) -> |
1377 |
+- add_utf_8 buffer c; |
1378 |
++ add_utf_8 buffer (Uchar.of_int c); |
1379 |
+ doctype_quoted_state |
1380 |
+ (fun () -> doctype_declaration_state state l' buffer) c l' buffer |
1381 |
+ |
1382 |
+ | _, c -> |
1383 |
+- add_utf_8 buffer c; |
1384 |
++ add_utf_8 buffer (Uchar.of_int c); |
1385 |
+ doctype_declaration_state state l' buffer |
1386 |
+ end |
1387 |
+ |
1388 |
|
1389 |
diff --git a/dev-ml/markup/markup-0.7.2.ebuild b/dev-ml/markup/markup-0.7.2-r1.ebuild |
1390 |
similarity index 82% |
1391 |
rename from dev-ml/markup/markup-0.7.2.ebuild |
1392 |
rename to dev-ml/markup/markup-0.7.2-r1.ebuild |
1393 |
index 235c575..f70ac55 100644 |
1394 |
--- a/dev-ml/markup/markup-0.7.2.ebuild |
1395 |
+++ b/dev-ml/markup/markup-0.7.2-r1.ebuild |
1396 |
@@ -4,21 +4,21 @@ |
1397 |
|
1398 |
EAPI=5 |
1399 |
|
1400 |
-inherit findlib |
1401 |
+inherit findlib eutils |
1402 |
|
1403 |
DESCRIPTION="Error-recovering streaming HTML5 and XML parsers" |
1404 |
HOMEPAGE="https://github.com/aantron/markup.ml" |
1405 |
SRC_URI="https://github.com/aantron/markup.ml/archive/${PV}.tar.gz -> ${P}.tar.gz" |
1406 |
|
1407 |
LICENSE="BSD" |
1408 |
-SLOT="0/${PV}" |
1409 |
+SLOT="0/${PV}p1" |
1410 |
KEYWORDS="~amd64" |
1411 |
IUSE="doc test" |
1412 |
|
1413 |
DEPEND=" |
1414 |
dev-lang/ocaml:=[ocamlopt] |
1415 |
dev-ml/lwt:=[ocamlopt] |
1416 |
- dev-ml/uutf:=[ocamlopt] |
1417 |
+ >=dev-ml/uutf-1.0:=[ocamlopt] |
1418 |
" |
1419 |
RDEPEND="${DEPEND}" |
1420 |
DEPEND="${DEPEND} |
1421 |
@@ -26,6 +26,11 @@ DEPEND="${DEPEND} |
1422 |
dev-ml/ocamlbuild" |
1423 |
S="${WORKDIR}/${PN}.ml-${PV}" |
1424 |
|
1425 |
+src_prepare() { |
1426 |
+ epatch "${FILESDIR}/uutf.patch" \ |
1427 |
+ "${FILESDIR}/test.patch" |
1428 |
+} |
1429 |
+ |
1430 |
src_compile() { |
1431 |
emake |
1432 |
use doc && emake docs |