1 |
commit: 02edd946d92a10dd27a13f2ece1868483d4c2880 |
2 |
Author: Alexis Ballier <aballier <AT> gentoo <DOT> org> |
3 |
AuthorDate: Thu Dec 1 17:54:24 2016 +0000 |
4 |
Commit: Alexis Ballier <aballier <AT> gentoo <DOT> org> |
5 |
CommitDate: Thu Dec 1 17:54:52 2016 +0000 |
6 |
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=02edd946 |
7 |
|
8 |
dev-ml/markup: remove old |
9 |
|
10 |
Package-Manager: portage-2.3.2 |
11 |
|
12 |
dev-ml/markup/Manifest | 1 - |
13 |
dev-ml/markup/files/test.patch | 273 --------- |
14 |
dev-ml/markup/files/uutf.patch | 1085 ---------------------------------- |
15 |
dev-ml/markup/markup-0.7.2-r1.ebuild | 44 -- |
16 |
4 files changed, 1403 deletions(-) |
17 |
|
18 |
diff --git a/dev-ml/markup/Manifest b/dev-ml/markup/Manifest |
19 |
index 1819e63..29247e7 100644 |
20 |
--- a/dev-ml/markup/Manifest |
21 |
+++ b/dev-ml/markup/Manifest |
22 |
@@ -1,2 +1 @@ |
23 |
-DIST markup-0.7.2.tar.gz 275010 SHA256 630a737ab6113e17999aacfd55f73b6671211d7980be86f0c711c0b385887c34 SHA512 72a87f54692a0b751c23e6b52bc4ecaa68334b0c6c067793cbf5b011b7d06ce7563f9aa2daeef3553ab48bb6cb9e592587b5a4f37279eaef7b45e19e5b372f73 WHIRLPOOL 679a01c5d197eadf1a8b74247e276405c182acff4c7781b577fbad9fcdc33be164ff81222e79c4e0e5193d1295ee4896ddda547cce1712bfb5ebda050f5bf5ac |
24 |
DIST markup-0.7.3.tar.gz 275094 SHA256 e1eb3562e0d26ccc33aa5dbe802e4210dbd7c30a8e69b6098b825afb11bb6af1 SHA512 e4577e438241d58c728507c88f14b7f029dbc4aa6b9c5dbf78f03b6c083a430026158c3146a88c14c9cd90a242b1bb4ed838b150bb89433fb6a6f673e5d2bb66 WHIRLPOOL ad967738706d4c017f266ecdef7b0772ce0bc17f9bc7dda228ffabc9cccccc88cde69337e063577d1fcda1e93cf4a7f18bbbf09709ee82f0a4b8382f5e339d2e |
25 |
|
26 |
diff --git a/dev-ml/markup/files/test.patch b/dev-ml/markup/files/test.patch |
27 |
deleted file mode 100644 |
28 |
index f2a5257..00000000 |
29 |
--- a/dev-ml/markup/files/test.patch |
30 |
+++ /dev/null |
31 |
@@ -1,273 +0,0 @@ |
32 |
-Index: markup.ml-0.7.2/test/test_encoding.ml |
33 |
-=================================================================== |
34 |
---- markup.ml-0.7.2.orig/test/test_encoding.ml |
35 |
-+++ markup.ml-0.7.2/test/test_encoding.ml |
36 |
-@@ -15,9 +15,9 @@ let test_ucs_4 (f : Encoding.t) name s1 |
37 |
- expect_error (1, 2) (`Decoding_error (bad_bytes, name)) |
38 |
- begin fun report -> |
39 |
- let chars = s1 |> string |> f ~report in |
40 |
-- next_option chars ok (assert_equal (Some (Char.code 'f'))); |
41 |
-+ next_option chars ok (assert_equal (Some ((Uchar.of_int (Char.code 'f'))))); |
42 |
- next_option chars ok (assert_equal (Some Uutf.u_rep)); |
43 |
-- next_option chars ok (assert_equal (Some (Char.code 'o'))); |
44 |
-+ next_option chars ok (assert_equal (Some ((Uchar.of_int (Char.code 'o'))))); |
45 |
- next_option chars ok (assert_equal None); |
46 |
- next_option chars ok (assert_equal None) |
47 |
- end; |
48 |
-@@ -25,9 +25,9 @@ let test_ucs_4 (f : Encoding.t) name s1 |
49 |
- expect_error (2, 2) (`Decoding_error ("\x00\x00\x00", name)) |
50 |
- begin fun report -> |
51 |
- let chars = s2 |> string |> f ~report in |
52 |
-- next_option chars ok (assert_equal (Some (Char.code 'f'))); |
53 |
-- next_option chars ok (assert_equal (Some 0x000A)); |
54 |
-- next_option chars ok (assert_equal (Some (Char.code 'o'))); |
55 |
-+ next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'f')))); |
56 |
-+ next_option chars ok (assert_equal (Some (Uchar.of_int 0x000A))); |
57 |
-+ next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'o')))); |
58 |
- next_option chars ok (assert_equal (Some Uutf.u_rep)); |
59 |
- next_option chars ok (assert_equal None); |
60 |
- next_option chars ok (assert_equal None) |
61 |
-@@ -38,12 +38,12 @@ let tests = [ |
62 |
- let s = "\xef\xbb\xbffoo\xf0\x9f\x90\x99bar\xa0more" in |
63 |
- expect_error (1, 8) (`Decoding_error ("\xa0", "utf-8")) begin fun report -> |
64 |
- let chars = s |> string |> utf_8 ~report in |
65 |
-- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); |
66 |
-- next_option chars ok (assert_equal (Some 0x1F419)); |
67 |
-- next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r'])); |
68 |
-+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'])); |
69 |
-+ next_option chars ok (assert_equal (Some (Uchar.of_int 0x1F419))); |
70 |
-+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r'])); |
71 |
- next_option chars ok (assert_equal (Some Uutf.u_rep)); |
72 |
- next_n 4 chars ok |
73 |
-- (assert_equal (List.map Char.code ['m'; 'o'; 'r'; 'e'])); |
74 |
-+ (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['m'; 'o'; 'r'; 'e'])); |
75 |
- next_option chars ok (assert_equal None); |
76 |
- next_option chars ok (assert_equal None) |
77 |
- end); |
78 |
-@@ -53,11 +53,11 @@ let tests = [ |
79 |
- expect_error (1, 6) (`Decoding_error ("\xdc\x19", "utf-16be")) |
80 |
- begin fun report -> |
81 |
- let chars = s |> string |> utf_16be ~report in |
82 |
-- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); |
83 |
-- next_option chars ok (assert_equal (Some 0x1F419)); |
84 |
-- next_option chars ok (assert_equal (Some (Char.code 'b'))); |
85 |
-+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'])); |
86 |
-+ next_option chars ok (assert_equal (Some (Uchar.of_int 0x1F419))); |
87 |
-+ next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'b')))); |
88 |
- next_option chars ok (assert_equal (Some Uutf.u_rep)); |
89 |
-- next_n 16 chars ok (assert_equal (List.map Char.code ['a'; 'r'])); |
90 |
-+ next_n 16 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['a'; 'r'])); |
91 |
- next_option chars ok (assert_equal None); |
92 |
- next_option chars ok (assert_equal None) |
93 |
- end); |
94 |
-@@ -67,11 +67,11 @@ let tests = [ |
95 |
- expect_error (1, 6) (`Decoding_error ("\x19\xdc", "utf-16le")) |
96 |
- begin fun report -> |
97 |
- let chars = s |> string |> utf_16le ~report in |
98 |
-- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); |
99 |
-- next_option chars ok (assert_equal (Some 0x1F419)); |
100 |
-- next_option chars ok (assert_equal (Some (Char.code 'b'))); |
101 |
-+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'])); |
102 |
-+ next_option chars ok (assert_equal (Some (Uchar.of_int 0x1F419))); |
103 |
-+ next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'b')))); |
104 |
- next_option chars ok (assert_equal (Some Uutf.u_rep)); |
105 |
-- next_n 16 chars ok (assert_equal (List.map Char.code ['a'; 'r'])); |
106 |
-+ next_n 16 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['a'; 'r'])); |
107 |
- next_option chars ok (assert_equal None); |
108 |
- next_option chars ok (assert_equal None) |
109 |
- end); |
110 |
-@@ -79,7 +79,7 @@ let tests = [ |
111 |
- ("encoding.iso_8859_1" >:: fun _ -> |
112 |
- let chars = string "foo\xa0" |> iso_8859_1 in |
113 |
- next_n 4 chars |
114 |
-- ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'; '\xa0'])); |
115 |
-+ ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'; '\xa0'])); |
116 |
- next_option chars ok (assert_equal None); |
117 |
- next_option chars ok (assert_equal None)); |
118 |
- |
119 |
-@@ -88,26 +88,26 @@ let tests = [ |
120 |
- expect_error (1, 4) (`Decoding_error ("\xa0", "us-ascii")) |
121 |
- begin fun report -> |
122 |
- let chars = s |> string |> us_ascii ~report in |
123 |
-- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); |
124 |
-+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'])); |
125 |
- next_option chars ok (assert_equal (Some Uutf.u_rep)); |
126 |
-- next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r'])); |
127 |
-+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r'])); |
128 |
- next_option chars ok (assert_equal None); |
129 |
- next_option chars ok (assert_equal None) |
130 |
- end); |
131 |
- |
132 |
- ("encoding.windows_1251" >:: fun _ -> |
133 |
- let chars = string "foo\xe0\xe1\xe2bar" |> windows_1251 in |
134 |
-- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); |
135 |
-- next_n 3 chars ok (assert_equal [0x0430; 0x0431; 0x0432]); |
136 |
-- next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r'])); |
137 |
-+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'])); |
138 |
-+ next_n 3 chars ok (assert_equal [Uchar.of_int 0x0430; Uchar.of_int 0x0431; Uchar.of_int 0x0432]); |
139 |
-+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r'])); |
140 |
- next_option chars ok (assert_equal None); |
141 |
- next_option chars ok (assert_equal None)); |
142 |
- |
143 |
- ("encoding.windows_1252" >:: fun _ -> |
144 |
- let chars = string "foo\x80\x83bar" |> windows_1252 in |
145 |
-- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); |
146 |
-- next_n 2 chars ok (assert_equal [0x20AC; 0x0192]); |
147 |
-- next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r'])); |
148 |
-+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'])); |
149 |
-+ next_n 2 chars ok (assert_equal [Uchar.of_int 0x20AC; Uchar.of_int 0x0192]); |
150 |
-+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r'])); |
151 |
- next_option chars ok (assert_equal None); |
152 |
- next_option chars ok (assert_equal None)); |
153 |
- |
154 |
-@@ -137,7 +137,7 @@ let tests = [ |
155 |
- |
156 |
- ("encoding.ebcdic" >:: fun _ -> |
157 |
- let chars = string "\x86\x96\x96" |> ebcdic in |
158 |
-- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); |
159 |
-+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'])); |
160 |
- next_option chars ok (assert_equal None); |
161 |
- next_option chars ok (assert_equal None)); |
162 |
- ] |
163 |
-Index: markup.ml-0.7.2/test/test_html_tokenizer.ml |
164 |
-=================================================================== |
165 |
---- markup.ml-0.7.2.orig/test/test_html_tokenizer.ml |
166 |
-+++ markup.ml-0.7.2/test/test_html_tokenizer.ml |
167 |
-@@ -134,7 +134,7 @@ let tests = [ |
168 |
- expect "�" |
169 |
- [ 1, 1, E (`Bad_token ("�", |
170 |
- reference, "out of range")); |
171 |
-- 1, 1, S (`Char Uutf.u_rep); |
172 |
-+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep)); |
173 |
- 1, 35, S `EOF]; |
174 |
- |
175 |
- expect "�" |
176 |
-@@ -142,22 +142,22 @@ let tests = [ |
177 |
- reference, "missing ';' at end")); |
178 |
- 1, 1, E (`Bad_token ("�", |
179 |
- reference, "out of range")); |
180 |
-- 1, 1, S (`Char Uutf.u_rep); |
181 |
-+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep)); |
182 |
- 1, 34, S `EOF]; |
183 |
- |
184 |
- expect "�" |
185 |
- [ 1, 1, E (`Bad_token ("�", reference, "out of range")); |
186 |
-- 1, 1, S (`Char Uutf.u_rep); |
187 |
-+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep)); |
188 |
- 1, 9, S `EOF]; |
189 |
- |
190 |
- expect "�" |
191 |
- [ 1, 1, E (`Bad_token ("�", reference, "out of range")); |
192 |
-- 1, 1, S (`Char Uutf.u_rep); |
193 |
-+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep)); |
194 |
- 1, 11, S `EOF]; |
195 |
- |
196 |
- expect "�" |
197 |
- [ 1, 1, E (`Bad_token ("�", reference, "out of range")); |
198 |
-- 1, 1, S (`Char Uutf.u_rep); |
199 |
-+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep)); |
200 |
- 1, 5, S `EOF]; |
201 |
- |
202 |
- expect "" |
203 |
-@@ -264,7 +264,7 @@ let tests = [ |
204 |
- expect ~state:`RCDATA "f\x00</foo>" |
205 |
- ([ 1, 1, S (`Char 0x66); |
206 |
- 1, 2, E (`Bad_token ("U+0000", "content", "null")); |
207 |
-- 1, 2, S (`Char Uutf.u_rep)] @ |
208 |
-+ 1, 2, S (`Char (Uchar.to_int Uutf.u_rep))] @ |
209 |
- (char_sequence ~start:3 "</foo>")); |
210 |
- |
211 |
- expect ~state:`RCDATA "<title>f</title >" |
212 |
-@@ -302,7 +302,7 @@ let tests = [ |
213 |
- expect ~state:`RAWTEXT "f\x00</foo>" |
214 |
- ([ 1, 1, S (`Char 0x66); |
215 |
- 1, 2, E (`Bad_token ("U+0000", "content", "null")); |
216 |
-- 1, 2, S (`Char Uutf.u_rep)] @ |
217 |
-+ 1, 2, S (`Char (Uchar.to_int Uutf.u_rep))] @ |
218 |
- (char_sequence ~start:3 "</foo>"))); |
219 |
- |
220 |
- ("html.tokenizer.script-data" >:: fun _ -> |
221 |
-@@ -330,7 +330,7 @@ let tests = [ |
222 |
- expect ~state:`Script_data "f<!--o\x00o" |
223 |
- ((char_sequence ~no_eof:true "f<!--o") @ |
224 |
- [1, 7, E (`Bad_token ("U+0000", "script", "null")); |
225 |
-- 1, 7, S (`Char Uutf.u_rep); |
226 |
-+ 1, 7, S (`Char (Uchar.to_int Uutf.u_rep)); |
227 |
- 1, 8, S (`Char 0x6F); |
228 |
- 1, 9, E (`Unexpected_eoi "script"); |
229 |
- 1, 9, S `EOF]); |
230 |
-@@ -363,7 +363,7 @@ let tests = [ |
231 |
- expect ~state:`Script_data "f<!--a-\x00-" |
232 |
- ((char_sequence ~no_eof:true "f<!--a-") @ |
233 |
- [ 1, 8, E (`Bad_token ("U+0000", "script", "null")); |
234 |
-- 1, 8, S (`Char Uutf.u_rep); |
235 |
-+ 1, 8, S (`Char (Uchar.to_int Uutf.u_rep)); |
236 |
- 1, 9, S (`Char 0x02D); |
237 |
- 1, 10, E (`Unexpected_eoi "script"); |
238 |
- 1, 10, S `EOF]); |
239 |
-@@ -371,7 +371,7 @@ let tests = [ |
240 |
- expect ~state:`Script_data "f<!--a--\x00--" |
241 |
- ((char_sequence ~no_eof:true "f<!--a--") @ |
242 |
- [ 1, 9, E (`Bad_token ("U+0000", "script", "null")); |
243 |
-- 1, 9, S (`Char Uutf.u_rep); |
244 |
-+ 1, 9, S (`Char (Uchar.to_int Uutf.u_rep)); |
245 |
- 1, 10, S (`Char 0x02D); |
246 |
- 1, 11, S (`Char 0x02D); |
247 |
- 1, 12, E (`Unexpected_eoi "script"); |
248 |
-@@ -380,14 +380,14 @@ let tests = [ |
249 |
- expect ~state:`Script_data "f<!--<script>\x00" |
250 |
- ((char_sequence ~no_eof:true "f<!--<script>") @ |
251 |
- [ 1, 14, E (`Bad_token ("U+0000", "script", "null")); |
252 |
-- 1, 14, S (`Char Uutf.u_rep); |
253 |
-+ 1, 14, S (`Char (Uchar.to_int Uutf.u_rep)); |
254 |
- 1, 15, E (`Unexpected_eoi "script"); |
255 |
- 1, 15, S `EOF]); |
256 |
- |
257 |
- expect ~state:`Script_data "f<!--<script>-\x00-" |
258 |
- ((char_sequence ~no_eof:true "f<!--<script>-") @ |
259 |
- [ 1, 15, E (`Bad_token ("U+0000", "script", "null")); |
260 |
-- 1, 15, S (`Char Uutf.u_rep); |
261 |
-+ 1, 15, S (`Char (Uchar.to_int Uutf.u_rep)); |
262 |
- 1, 16, S (`Char 0x2D); |
263 |
- 1, 17, E (`Unexpected_eoi "script"); |
264 |
- 1, 17, S `EOF]); |
265 |
-@@ -395,7 +395,7 @@ let tests = [ |
266 |
- expect ~state:`Script_data "f<!--<script>--\x00--" |
267 |
- ((char_sequence ~no_eof:true "f<!--<script>--") @ |
268 |
- [ 1, 16, E (`Bad_token ("U+0000", "script", "null")); |
269 |
-- 1, 16, S (`Char Uutf.u_rep); |
270 |
-+ 1, 16, S (`Char (Uchar.to_int Uutf.u_rep)); |
271 |
- 1, 17, S (`Char 0x2D); |
272 |
- 1, 18, S (`Char 0x2D); |
273 |
- 1, 19, E (`Unexpected_eoi "script"); |
274 |
-@@ -413,7 +413,7 @@ let tests = [ |
275 |
- expect ~state:`Script_data "f\x00</foo>" |
276 |
- ([ 1, 1, S (`Char 0x66); |
277 |
- 1, 2, E (`Bad_token ("U+0000", "content", "null")); |
278 |
-- 1, 2, S (`Char Uutf.u_rep)] @ |
279 |
-+ 1, 2, S (`Char (Uchar.to_int Uutf.u_rep))] @ |
280 |
- (char_sequence ~start:3 "</foo>"))); |
281 |
- |
282 |
- ("html.tokenizer.plaintext" >:: fun _ -> |
283 |
-@@ -424,7 +424,7 @@ let tests = [ |
284 |
- expect ~state:`PLAINTEXT "f\x00</foo>" |
285 |
- ([ 1, 1, S (`Char 0x66); |
286 |
- 1, 2, E (`Bad_token ("U+0000", "content", "null")); |
287 |
-- 1, 2, S (`Char Uutf.u_rep)] @ |
288 |
-+ 1, 2, S (`Char (Uchar.to_int Uutf.u_rep))] @ |
289 |
- (char_sequence ~start:3 "</foo>"))); |
290 |
- |
291 |
- ("html.tokenizer.comment" >:: fun _ -> |
292 |
-Index: markup.ml-0.7.2/test/test_input.ml |
293 |
-=================================================================== |
294 |
---- markup.ml-0.7.2.orig/test/test_input.ml |
295 |
-+++ markup.ml-0.7.2/test/test_input.ml |
296 |
-@@ -71,7 +71,7 @@ let tests = [ |
297 |
- end); |
298 |
- |
299 |
- ("input.bom" >:: fun _ -> |
300 |
-- [0xFEFF; 0x66] |
301 |
-+ [Uchar.of_int 0xFEFF; Uchar.of_int 0x66] |
302 |
- |> of_list |
303 |
- |> preprocess is_valid_xml_char Error.ignore_errors |
304 |
- |> fst |
305 |
|
306 |
diff --git a/dev-ml/markup/files/uutf.patch b/dev-ml/markup/files/uutf.patch |
307 |
deleted file mode 100644 |
308 |
index f561084..00000000 |
309 |
--- a/dev-ml/markup/files/uutf.patch |
310 |
+++ /dev/null |
311 |
@@ -1,1085 +0,0 @@ |
312 |
-Index: markup.ml-0.7.2/src/common.ml |
313 |
-=================================================================== |
314 |
---- markup.ml-0.7.2.orig/src/common.ml |
315 |
-+++ markup.ml-0.7.2/src/common.ml |
316 |
-@@ -134,7 +134,7 @@ let is_printable = is_in_range 0x0020 0x |
317 |
- let char c = |
318 |
- if is_printable c then begin |
319 |
- let buffer = Buffer.create 4 in |
320 |
-- add_utf_8 buffer c; |
321 |
-+ add_utf_8 buffer (Uchar.of_int c); |
322 |
- Buffer.contents buffer |
323 |
- end |
324 |
- else |
325 |
-Index: markup.ml-0.7.2/src/detect.ml |
326 |
-=================================================================== |
327 |
---- markup.ml-0.7.2.orig/src/detect.ml |
328 |
-+++ markup.ml-0.7.2/src/detect.ml |
329 |
-@@ -222,7 +222,7 @@ let meta_tag_prescan = |
330 |
- let rec iterate () = |
331 |
- next source throw (fun () -> k "") (function |
332 |
- | c when c = quote -> k (Buffer.contents buffer) |
333 |
-- | c -> add_utf_8 buffer (Char.code (Char.lowercase c)); iterate ()) |
334 |
-+ | c -> add_utf_8 buffer (Uchar.of_int (Char.code (Char.lowercase c))); iterate ()) |
335 |
- in |
336 |
- iterate () |
337 |
- in |
338 |
-@@ -236,7 +236,7 @@ let meta_tag_prescan = |
339 |
- push source c; |
340 |
- k (Buffer.contents buffer) |
341 |
- | c -> |
342 |
-- add_utf_8 buffer (Char.code (Char.lowercase c)); |
343 |
-+ add_utf_8 buffer (Uchar.of_int (Char.code (Char.lowercase c))); |
344 |
- iterate ()) |
345 |
- in |
346 |
- iterate () |
347 |
-@@ -315,7 +315,7 @@ let meta_tag_prescan = |
348 |
- k (Buffer.contents buffer) |
349 |
- |
350 |
- | Some c -> |
351 |
-- add_utf_8 buffer (Char.code (Char.lowercase c)); |
352 |
-+ add_utf_8 buffer (Uchar.of_int (Char.code (Char.lowercase c))); |
353 |
- iterate () |
354 |
- end |
355 |
- in |
356 |
-Index: markup.ml-0.7.2/src/encoding.ml |
357 |
-=================================================================== |
358 |
---- markup.ml-0.7.2.orig/src/encoding.ml |
359 |
-+++ markup.ml-0.7.2/src/encoding.ml |
360 |
-@@ -4,7 +4,7 @@ |
361 |
- open Common |
362 |
- open Kstream |
363 |
- |
364 |
--type t = ?report:Error.parse_handler -> char Kstream.t -> int Kstream.t |
365 |
-+type t = ?report:Error.parse_handler -> char Kstream.t -> Uchar.t Kstream.t |
366 |
- |
367 |
- let wrap f = fun ?(report = Error.ignore_errors) s -> f report s |
368 |
- |
369 |
-@@ -24,8 +24,8 @@ let _uutf_decoder encoding name = |
370 |
- k Uutf.u_rep) |
371 |
- | `Await -> |
372 |
- next bytes throw |
373 |
-- (fun () -> Uutf.Manual.src decoder "" 0 0; run ()) |
374 |
-- (fun c -> Uutf.Manual.src decoder (String.make 1 c) 0 1; run ()) |
375 |
-+ (fun () -> Uutf.Manual.src decoder Bytes.empty 0 0; run ()) |
376 |
-+ (fun c -> Uutf.Manual.src decoder (Bytes.make 1 c) 0 1; run ()) |
377 |
- in |
378 |
- run ()) |
379 |
- |> make) |
380 |
-@@ -87,7 +87,7 @@ let _ucs_4_decoder arrange name = |
381 |
- let skip = |
382 |
- if !first then begin |
383 |
- first := false; |
384 |
-- scalar = Uutf.u_bom |
385 |
-+ scalar = Uchar.to_int Uutf.u_bom |
386 |
- end |
387 |
- else |
388 |
- false |
389 |
-@@ -96,9 +96,9 @@ let _ucs_4_decoder arrange name = |
390 |
- if skip then run () |
391 |
- else |
392 |
- if scalar = 0x000A then |
393 |
-- newline k scalar |
394 |
-+ newline k (Uchar.of_int scalar) |
395 |
- else |
396 |
-- char k scalar |
397 |
-+ char k (Uchar.of_int scalar) |
398 |
- |
399 |
- | [] -> empty () |
400 |
- |
401 |
-@@ -130,7 +130,7 @@ let code_page table = |
402 |
- |
403 |
- (fun _ bytes -> |
404 |
- (fun throw empty k -> |
405 |
-- next bytes throw empty (fun c -> k table.(Char.code c))) |
406 |
-+ next bytes throw empty (fun c -> k (Uchar.of_int table.(Char.code c)))) |
407 |
- |> make) |
408 |
- |> wrap |
409 |
- |
410 |
-Index: markup.ml-0.7.2/src/html_parser.ml |
411 |
-=================================================================== |
412 |
---- markup.ml-0.7.2.orig/src/html_parser.ml |
413 |
-+++ markup.ml-0.7.2/src/html_parser.ml |
414 |
-@@ -1022,7 +1022,7 @@ let parse requested_context report (toke |
415 |
- let frameset_ok = ref true in |
416 |
- let head_seen = ref false in |
417 |
- |
418 |
-- let add_character = Text.add text in |
419 |
-+ let add_character = (fun x y -> Text.add text x (Uchar.of_int y)) in |
420 |
- |
421 |
- set_foreign (fun () -> |
422 |
- Stack.current_element_is_foreign context open_elements); |
423 |
-@@ -2717,7 +2717,7 @@ let parse requested_context report (toke |
424 |
- | l, `Char 0 -> |
425 |
- report l (`Bad_token ("U+0000", "foreign content", "null")) !throw |
426 |
- (fun () -> |
427 |
-- add_character l Uutf.u_rep; |
428 |
-+ add_character l (Uchar.to_int Uutf.u_rep); |
429 |
- mode ()) |
430 |
- |
431 |
- | l, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020 as c) -> |
432 |
-Index: markup.ml-0.7.2/src/html_tokenizer.ml |
433 |
-=================================================================== |
434 |
---- markup.ml-0.7.2.orig/src/html_tokenizer.ml |
435 |
-+++ markup.ml-0.7.2/src/html_tokenizer.ml |
436 |
-@@ -252,7 +252,7 @@ let tokenize report (input, get_location |
437 |
- report location |
438 |
- (`Bad_token (prefix ^ text ^ semicolon, "character reference", |
439 |
- "Windows-1252 character")) !throw (fun () -> |
440 |
-- k (Some (`One n))) |
441 |
-+ k (Some (`One (Uchar.of_int n)))) |
442 |
- |
443 |
- else |
444 |
- match n with |
445 |
-@@ -268,9 +268,9 @@ let tokenize report (input, get_location |
446 |
- (`Bad_token (prefix ^ text ^ semicolon, |
447 |
- "character reference", |
448 |
- "invalid HTML character")) !throw (fun () -> |
449 |
-- k (Some (`One n))) |
450 |
-+ k (Some (`One (Uchar.of_int n)))) |
451 |
- |
452 |
-- | n -> k (Some (`One n)) |
453 |
-+ | n -> k (Some (`One (Uchar.of_int n))) |
454 |
- end |
455 |
- end |
456 |
- in |
457 |
-@@ -366,6 +366,10 @@ let tokenize report (input, get_location |
458 |
- | _ -> unterminated ()) |
459 |
- in |
460 |
- |
461 |
-+ let ma = function |
462 |
-+ a, `One x -> (a, `One (Uchar.of_int x)) |
463 |
-+ | a, `Two (x,y) -> (a, `Two (Uchar.of_int x, Uchar.of_int y)) in |
464 |
-+ |
465 |
- let rec match_named best matched replace candidate = |
466 |
- next_option input !throw (function |
467 |
- | None -> finish best matched replace |
468 |
-@@ -377,8 +381,8 @@ let tokenize report (input, get_location |
469 |
- | `None -> finish best matched (v::replace) |
470 |
- | `Continue -> match_named best matched (v::replace) candidate |
471 |
- | `Match_and_continue m -> |
472 |
-- match_named (Some m) (v::(replace @ matched)) [] candidate |
473 |
-- | `Match m -> finish (Some m) (v::matched) []) |
474 |
-+ match_named (Some (ma m)) (v::(replace @ matched)) [] candidate |
475 |
-+ | `Match m -> finish (Some (ma m)) (v::matched) []) |
476 |
- in |
477 |
- match_named None [] [] "") |
478 |
- |
479 |
-@@ -409,11 +413,11 @@ let tokenize report (input, get_location |
480 |
- emit (l, `Char 0x0026) state |
481 |
- |
482 |
- | Some (`One c) -> |
483 |
-- emit (l, `Char c) state |
484 |
-+ emit (l, `Char (Uchar.to_int c)) state |
485 |
- |
486 |
- | Some (`Two (c, c')) -> |
487 |
-- emit (l, `Char c) (fun () -> |
488 |
-- emit (l, `Char c') state) |
489 |
-+ emit (l, `Char (Uchar.to_int c)) (fun () -> |
490 |
-+ emit (l, `Char (Uchar.to_int c')) state) |
491 |
- end |
492 |
- |
493 |
- (* 8.2.4.3. *) |
494 |
-@@ -427,7 +431,7 @@ let tokenize report (input, get_location |
495 |
- |
496 |
- | Some (l, 0) -> |
497 |
- report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () -> |
498 |
-- emit (l, `Char Uutf.u_rep) rcdata_state) |
499 |
-+ emit (l, `Char (Uchar.to_int Uutf.u_rep)) rcdata_state) |
500 |
- |
501 |
- | None -> |
502 |
- emit_eof () |
503 |
-@@ -444,7 +448,7 @@ let tokenize report (input, get_location |
504 |
- |
505 |
- | Some (l, 0) -> |
506 |
- report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () -> |
507 |
-- emit (l, `Char Uutf.u_rep) rawtext_state) |
508 |
-+ emit (l, `Char (Uchar.to_int Uutf.u_rep)) rawtext_state) |
509 |
- |
510 |
- | None -> |
511 |
- emit_eof () |
512 |
-@@ -461,7 +465,7 @@ let tokenize report (input, get_location |
513 |
- |
514 |
- | Some (l, 0) -> |
515 |
- report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () -> |
516 |
-- emit_character l Uutf.u_rep script_data_state) |
517 |
-+ emit_character l (Uchar.to_int Uutf.u_rep) script_data_state) |
518 |
- |
519 |
- | None -> |
520 |
- emit_eof () |
521 |
-@@ -475,7 +479,7 @@ let tokenize report (input, get_location |
522 |
- next_option input !throw begin function |
523 |
- | Some (l, 0) -> |
524 |
- report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () -> |
525 |
-- emit (l, `Char Uutf.u_rep) plaintext_state) |
526 |
-+ emit (l, `Char (Uchar.to_int Uutf.u_rep)) plaintext_state) |
527 |
- |
528 |
- | None -> |
529 |
- emit_eof () |
530 |
-@@ -501,7 +505,7 @@ let tokenize report (input, get_location |
531 |
- end_tag_open_state l' tag |
532 |
- |
533 |
- | Some (_, c) when is_alphabetic c -> |
534 |
-- add_utf_8 tag._tag_name (to_lowercase c); |
535 |
-+ add_utf_8 tag._tag_name (Uchar.of_int (to_lowercase c)); |
536 |
- tag_name_state l' tag |
537 |
- |
538 |
- | Some (_, 0x003F) -> |
539 |
-@@ -529,7 +533,7 @@ let tokenize report (input, get_location |
540 |
- |
541 |
- next_option input !throw begin function |
542 |
- | Some (_, c) when is_alphabetic c -> |
543 |
-- add_utf_8 tag._tag_name (to_lowercase c); |
544 |
-+ add_utf_8 tag._tag_name (Uchar.of_int (to_lowercase c)); |
545 |
- tag_name_state l' tag |
546 |
- |
547 |
- | Some (_, 0x003E) -> |
548 |
-@@ -569,7 +573,7 @@ let tokenize report (input, get_location |
549 |
- report (get_location ()) (`Unexpected_eoi "tag") !throw data_state |
550 |
- |
551 |
- | Some (_, c) -> |
552 |
-- add_utf_8 tag._tag_name (to_lowercase c); |
553 |
-+ add_utf_8 tag._tag_name (Uchar.of_int (to_lowercase c)); |
554 |
- tag_name_state l' tag |
555 |
- end |
556 |
- |
557 |
-@@ -589,7 +593,7 @@ let tokenize report (input, get_location |
558 |
- next_option input !throw begin function |
559 |
- | Some (_, c as v) when is_alphabetic c -> |
560 |
- let name_buffer = Buffer.create 32 in |
561 |
-- add_utf_8 name_buffer (to_lowercase c); |
562 |
-+ add_utf_8 name_buffer (Uchar.of_int (to_lowercase c)); |
563 |
- text_end_tag_name_state state l' (v::cs) name_buffer |
564 |
- |
565 |
- | maybe_v -> |
566 |
-@@ -618,7 +622,7 @@ let tokenize report (input, get_location |
567 |
- emit_tag l' (create_tag ()) |
568 |
- |
569 |
- | Some ((_, c) as v) when is_alphabetic c -> |
570 |
-- add_utf_8 name_buffer (to_lowercase c); |
571 |
-+ add_utf_8 name_buffer (Uchar.of_int (to_lowercase c)); |
572 |
- text_end_tag_name_state state l' (v::cs) name_buffer |
573 |
- |
574 |
- | maybe_v -> |
575 |
-@@ -676,7 +680,7 @@ let tokenize report (input, get_location |
576 |
- |
577 |
- | Some (l, 0) -> |
578 |
- report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () -> |
579 |
-- emit_character l Uutf.u_rep (fun () -> |
580 |
-+ emit_character l (Uchar.to_int Uutf.u_rep) (fun () -> |
581 |
- script_data_escaped_state l')) |
582 |
- |
583 |
- | None -> |
584 |
-@@ -699,7 +703,7 @@ let tokenize report (input, get_location |
585 |
- |
586 |
- | Some (l, 0) -> |
587 |
- report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () -> |
588 |
-- emit_character l Uutf.u_rep (fun () -> |
589 |
-+ emit_character l (Uchar.to_int Uutf.u_rep) (fun () -> |
590 |
- script_data_escaped_state l')) |
591 |
- |
592 |
- | None -> |
593 |
-@@ -725,7 +729,7 @@ let tokenize report (input, get_location |
594 |
- |
595 |
- | Some (l, 0) -> |
596 |
- report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () -> |
597 |
-- emit_character l Uutf.u_rep (fun () -> |
598 |
-+ emit_character l (Uchar.to_int Uutf.u_rep) (fun () -> |
599 |
- script_data_escaped_state l')) |
600 |
- |
601 |
- | None -> |
602 |
-@@ -745,7 +749,7 @@ let tokenize report (input, get_location |
603 |
- |
604 |
- | Some (_, c as v) when is_alphabetic c -> |
605 |
- let tag_buffer = Buffer.create 32 in |
606 |
-- add_utf_8 tag_buffer (to_lowercase c); |
607 |
-+ add_utf_8 tag_buffer (Uchar.of_int (to_lowercase c)); |
608 |
- emit_characters (List.rev (v::cs)) (fun () -> |
609 |
- script_data_double_escape_start_state l' tag_buffer) |
610 |
- |
611 |
-@@ -765,7 +769,7 @@ let tokenize report (input, get_location |
612 |
- else script_data_escaped_state l') |
613 |
- |
614 |
- | Some (l, c) when is_alphabetic c -> |
615 |
-- add_utf_8 tag_buffer (to_lowercase c); |
616 |
-+ add_utf_8 tag_buffer (Uchar.of_int (to_lowercase c)); |
617 |
- emit_character l c (fun () -> |
618 |
- script_data_double_escape_start_state l' tag_buffer) |
619 |
- |
620 |
-@@ -787,7 +791,7 @@ let tokenize report (input, get_location |
621 |
- |
622 |
- | Some (l, 0) -> |
623 |
- report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () -> |
624 |
-- emit_character l Uutf.u_rep (fun () -> |
625 |
-+ emit_character l (Uchar.to_int Uutf.u_rep) (fun () -> |
626 |
- script_data_double_escaped_state l')) |
627 |
- |
628 |
- | None -> |
629 |
-@@ -811,7 +815,7 @@ let tokenize report (input, get_location |
630 |
- |
631 |
- | Some (l, 0) -> |
632 |
- report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () -> |
633 |
-- emit_character l Uutf.u_rep (fun () -> |
634 |
-+ emit_character l (Uchar.to_int Uutf.u_rep) (fun () -> |
635 |
- script_data_double_escaped_state l')) |
636 |
- |
637 |
- | None -> |
638 |
-@@ -838,7 +842,7 @@ let tokenize report (input, get_location |
639 |
- |
640 |
- | Some (l, 0) -> |
641 |
- report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () -> |
642 |
-- emit_character l Uutf.u_rep (fun () -> |
643 |
-+ emit_character l (Uchar.to_int Uutf.u_rep) (fun () -> |
644 |
- script_data_double_escaped_state l')) |
645 |
- |
646 |
- | None -> |
647 |
-@@ -872,7 +876,7 @@ let tokenize report (input, get_location |
648 |
- else script_data_double_escaped_state l') |
649 |
- |
650 |
- | Some (l, c) when is_alphabetic c -> |
651 |
-- add_utf_8 tag_buffer (to_lowercase c); |
652 |
-+ add_utf_8 tag_buffer (Uchar.of_int (to_lowercase c)); |
653 |
- emit_character l c (fun () -> |
654 |
- script_data_double_escape_end_state l' tag_buffer) |
655 |
- |
656 |
-@@ -910,10 +914,10 @@ let tokenize report (input, get_location |
657 |
- | Some (l, (0x0022 | 0x0027 | 0x003C | 0x003D as c)) -> |
658 |
- report l (`Bad_token (char c, "attribute name", |
659 |
- "invalid start character")) !throw (fun () -> |
660 |
-- start_attribute c) |
661 |
-+ start_attribute (Uchar.of_int c)) |
662 |
- |
663 |
- | Some (_, c) -> |
664 |
-- start_attribute (to_lowercase c) |
665 |
-+ start_attribute (Uchar.of_int (to_lowercase c)) |
666 |
- end |
667 |
- |
668 |
- (* 8.2.4.35. *) |
669 |
-@@ -942,14 +946,14 @@ let tokenize report (input, get_location |
670 |
- | Some (l, (0x0022 | 0x0027 | 0x003C as c)) -> |
671 |
- report l (`Bad_token (char c, "attribute name", |
672 |
- "invalid name character")) !throw (fun () -> |
673 |
-- add_utf_8 name_buffer c; |
674 |
-+ add_utf_8 name_buffer (Uchar.of_int c); |
675 |
- attribute_name_state l' tag name_buffer) |
676 |
- |
677 |
- | None -> |
678 |
- report (get_location ()) (`Unexpected_eoi "tag") !throw data_state |
679 |
- |
680 |
- | Some (_, c) -> |
681 |
-- add_utf_8 name_buffer (to_lowercase c); |
682 |
-+ add_utf_8 name_buffer (Uchar.of_int (to_lowercase c)); |
683 |
- attribute_name_state l' tag name_buffer |
684 |
- end |
685 |
- |
686 |
-@@ -985,13 +989,13 @@ let tokenize report (input, get_location |
687 |
- | Some (l, (0x0022 | 0x0027 | 0x003C as c)) -> |
688 |
- report l (`Bad_token (char c, "attribute name", |
689 |
- "invalid start character")) !throw (fun () -> |
690 |
-- start_next_attribute c) |
691 |
-+ start_next_attribute (Uchar.of_int c)) |
692 |
- |
693 |
- | None -> |
694 |
- report (get_location ()) (`Unexpected_eoi "tag") !throw data_state |
695 |
- |
696 |
- | Some (_, c) -> |
697 |
-- start_next_attribute (to_lowercase c) |
698 |
-+ start_next_attribute (Uchar.of_int (to_lowercase c)) |
699 |
- end |
700 |
- |
701 |
- (* 8.2.4.37. *) |
702 |
-@@ -1030,13 +1034,13 @@ let tokenize report (input, get_location |
703 |
- | Some (l, (0x003C | 0x003D | 0x0060 as c)) -> |
704 |
- report l (`Bad_token (char c, "attribute value", |
705 |
- "invalid start character")) !throw (fun () -> |
706 |
-- start_value attribute_value_unquoted_state (Some c)) |
707 |
-+ start_value attribute_value_unquoted_state (Some (Uchar.of_int c))) |
708 |
- |
709 |
- | None -> |
710 |
- report (get_location ()) (`Unexpected_eoi "tag") !throw data_state |
711 |
- |
712 |
- | Some (_, c) -> |
713 |
-- start_value attribute_value_unquoted_state (Some c) |
714 |
-+ start_value attribute_value_unquoted_state (Some (Uchar.of_int c)) |
715 |
- end |
716 |
- |
717 |
- (* 8.2.4.38 and 8.2.4.39. *) |
718 |
-@@ -1062,7 +1066,7 @@ let tokenize report (input, get_location |
719 |
- data_state |
720 |
- |
721 |
- | Some (_, c) -> |
722 |
-- add_utf_8 value_buffer c; |
723 |
-+ add_utf_8 value_buffer (Uchar.of_int c); |
724 |
- attribute_value_quoted_state quote l' tag name value_buffer |
725 |
- end |
726 |
- |
727 |
-@@ -1092,14 +1096,14 @@ let tokenize report (input, get_location |
728 |
- | Some (l, (0x0022 | 0x0027 | 0x003C | 0x003D | 0x0060 as c)) -> |
729 |
- report l (`Bad_token (char c, "attribute value", |
730 |
- "invalid character")) !throw (fun () -> |
731 |
-- add_utf_8 value_buffer c; |
732 |
-+ add_utf_8 value_buffer (Uchar.of_int c); |
733 |
- attribute_value_unquoted_state l' tag name value_buffer) |
734 |
- |
735 |
- | None -> |
736 |
- report (get_location ()) (`Unexpected_eoi "tag") !throw data_state |
737 |
- |
738 |
- | Some (_, c) -> |
739 |
-- add_utf_8 value_buffer c; |
740 |
-+ add_utf_8 value_buffer (Uchar.of_int c); |
741 |
- attribute_value_unquoted_state l' tag name value_buffer |
742 |
- end |
743 |
- |
744 |
-@@ -1107,7 +1111,7 @@ let tokenize report (input, get_location |
745 |
- and character_reference_in_attribute allowed l value_buffer k = |
746 |
- consume_character_reference true (Some allowed) l begin function |
747 |
- | None -> |
748 |
-- add_utf_8 value_buffer 0x0026; |
749 |
-+ add_utf_8 value_buffer (Uchar.of_int 0x0026); |
750 |
- k () |
751 |
- |
752 |
- | Some (`One c) -> |
753 |
-@@ -1176,7 +1180,7 @@ let tokenize report (input, get_location |
754 |
- emit_comment l' buffer |
755 |
- |
756 |
- | Some (_, c) -> |
757 |
-- add_utf_8 buffer c; |
758 |
-+ add_utf_8 buffer (Uchar.of_int c); |
759 |
- consume () |
760 |
- end |
761 |
- in |
762 |
-@@ -1239,7 +1243,7 @@ let tokenize report (input, get_location |
763 |
- emit_comment l' buffer) |
764 |
- |
765 |
- | Some (_, c) -> |
766 |
-- add_utf_8 buffer c; |
767 |
-+ add_utf_8 buffer (Uchar.of_int c); |
768 |
- comment_state l' buffer |
769 |
- end |
770 |
- |
771 |
-@@ -1266,7 +1270,7 @@ let tokenize report (input, get_location |
772 |
- |
773 |
- | Some (_, c) -> |
774 |
- Buffer.add_char buffer '-'; |
775 |
-- add_utf_8 buffer c; |
776 |
-+ add_utf_8 buffer (Uchar.of_int c); |
777 |
- comment_state l' buffer |
778 |
- end |
779 |
- |
780 |
-@@ -1286,7 +1290,7 @@ let tokenize report (input, get_location |
781 |
- emit_comment l' buffer) |
782 |
- |
783 |
- | Some (_, c) -> |
784 |
-- add_utf_8 buffer c; |
785 |
-+ add_utf_8 buffer (Uchar.of_int c); |
786 |
- comment_state l' buffer |
787 |
- end |
788 |
- |
789 |
-@@ -1308,7 +1312,7 @@ let tokenize report (input, get_location |
790 |
- |
791 |
- | Some (_, c) -> |
792 |
- Buffer.add_char buffer '-'; |
793 |
-- add_utf_8 buffer c; |
794 |
-+ add_utf_8 buffer (Uchar.of_int c); |
795 |
- comment_state l' buffer |
796 |
- end |
797 |
- |
798 |
-@@ -1343,7 +1347,7 @@ let tokenize report (input, get_location |
799 |
- report l (`Bad_token ("--" ^ (char c), "comment", |
800 |
- "'--' should be in '-->'")) !throw (fun () -> |
801 |
- Buffer.add_string buffer "--"; |
802 |
-- add_utf_8 buffer c; |
803 |
-+ add_utf_8 buffer (Uchar.of_int c); |
804 |
- comment_state l' buffer) |
805 |
- end |
806 |
- |
807 |
-@@ -1369,7 +1373,7 @@ let tokenize report (input, get_location |
808 |
- |
809 |
- | Some (_, c) -> |
810 |
- Buffer.add_string buffer "--!"; |
811 |
-- add_utf_8 buffer c; |
812 |
-+ add_utf_8 buffer (Uchar.of_int c); |
813 |
- comment_state l' buffer |
814 |
- end |
815 |
- |
816 |
-@@ -1420,7 +1424,7 @@ let tokenize report (input, get_location |
817 |
- |
818 |
- | Some (_, c) -> |
819 |
- doctype._doctype_name <- |
820 |
-- add_doctype_char doctype._doctype_name (to_lowercase c); |
821 |
-+ add_doctype_char doctype._doctype_name (Uchar.of_int (to_lowercase c)); |
822 |
- doctype_name_state l' doctype |
823 |
- end |
824 |
- |
825 |
-@@ -1445,7 +1449,7 @@ let tokenize report (input, get_location |
826 |
- |
827 |
- | Some (_, c) -> |
828 |
- doctype._doctype_name <- |
829 |
-- add_doctype_char doctype._doctype_name (to_lowercase c); |
830 |
-+ add_doctype_char doctype._doctype_name (Uchar.of_int (to_lowercase c)); |
831 |
- doctype_name_state l' doctype |
832 |
- end |
833 |
- |
834 |
-@@ -1574,7 +1578,7 @@ let tokenize report (input, get_location |
835 |
- emit_doctype ~quirks:true l' doctype) |
836 |
- |
837 |
- | Some (_, c) -> |
838 |
-- add doctype c; |
839 |
-+ add doctype (Uchar.of_int c); |
840 |
- doctype_identifier_quoted_state add quote next_state l' doctype |
841 |
- end |
842 |
- |
843 |
-Index: markup.ml-0.7.2/src/html_writer.ml |
844 |
-=================================================================== |
845 |
---- markup.ml-0.7.2.orig/src/html_writer.ml |
846 |
-+++ markup.ml-0.7.2/src/html_writer.ml |
847 |
-@@ -8,7 +8,7 @@ let _escape_attribute s = |
848 |
- Uutf.String.fold_utf_8 (fun () _ -> function |
849 |
- | `Malformed _ -> () |
850 |
- | `Uchar c -> |
851 |
-- match c with |
852 |
-+ match (Uchar.to_int c) with |
853 |
- | 0x0026 -> Buffer.add_string buffer "&" |
854 |
- | 0x00A0 -> Buffer.add_string buffer " " |
855 |
- | 0x0022 -> Buffer.add_string buffer """ |
856 |
-@@ -21,7 +21,7 @@ let _escape_text s = |
857 |
- Uutf.String.fold_utf_8 (fun () _ -> function |
858 |
- | `Malformed _ -> () |
859 |
- | `Uchar c -> |
860 |
-- match c with |
861 |
-+ match (Uchar.to_int c) with |
862 |
- | 0x0026 -> Buffer.add_string buffer "&" |
863 |
- | 0x00A0 -> Buffer.add_string buffer " " |
864 |
- | 0x003C -> Buffer.add_string buffer "<" |
865 |
-Index: markup.ml-0.7.2/src/input.ml |
866 |
-=================================================================== |
867 |
---- markup.ml-0.7.2.orig/src/input.ml |
868 |
-+++ markup.ml-0.7.2/src/input.ml |
869 |
-@@ -27,13 +27,13 @@ let preprocess is_valid_char report sour |
870 |
- in |
871 |
- |
872 |
- let rec iterate () = |
873 |
-- next source throw empty (function |
874 |
-+ next source throw empty (fun x -> match Uchar.to_int x with |
875 |
- | 0xFEFF when !first_char -> first_char := false; iterate () |
876 |
- |
877 |
- | 0x0D -> |
878 |
-- next source throw newline (function |
879 |
-+ next source throw newline (fun y -> match Uchar.to_int y with |
880 |
- | 0x0A -> newline () |
881 |
-- | c -> push source c; newline ()) |
882 |
-+ | c -> push source (Uchar.of_int c); newline ()) |
883 |
- |
884 |
- | 0x0A -> newline () |
885 |
- |
886 |
-Index: markup.ml-0.7.2/src/input.mli |
887 |
-=================================================================== |
888 |
---- markup.ml-0.7.2.orig/src/input.mli |
889 |
-+++ markup.ml-0.7.2/src/input.mli |
890 |
-@@ -4,5 +4,5 @@ |
891 |
- open Common |
892 |
- |
893 |
- val preprocess : |
894 |
-- (int -> bool) -> Error.parse_handler -> int Kstream.t -> |
895 |
-+ (int -> bool) -> Error.parse_handler -> Uchar.t Kstream.t -> |
896 |
- (location * int) Kstream.t * (unit -> location) |
897 |
-Index: markup.ml-0.7.2/src/markup.ml |
898 |
-=================================================================== |
899 |
---- markup.ml-0.7.2.orig/src/markup.ml |
900 |
-+++ markup.ml-0.7.2/src/markup.ml |
901 |
-@@ -187,7 +187,7 @@ sig |
902 |
- |
903 |
- val decode : |
904 |
- ?report:(location -> Error.t -> unit io) -> t -> |
905 |
-- (char, _) stream -> (int, async) stream |
906 |
-+ (char, _) stream -> (Uchar.t, async) stream |
907 |
- end |
908 |
- |
909 |
- val parse_xml : |
910 |
-Index: markup.ml-0.7.2/src/markup.mli |
911 |
-=================================================================== |
912 |
---- markup.ml-0.7.2.orig/src/markup.mli |
913 |
-+++ markup.ml-0.7.2/src/markup.mli |
914 |
-@@ -194,7 +194,7 @@ sig |
915 |
- |
916 |
- val decode : |
917 |
- ?report:(location -> Error.t -> unit) -> t -> |
918 |
-- (char, 's) stream -> (int, 's) stream |
919 |
-+ (char, 's) stream -> (Uchar.t, 's) stream |
920 |
- (** Applies a decoder to a byte stream. Illegal input byte sequences result in |
921 |
- calls to the error handler [~report] with error kind [`Decoding_error]. |
922 |
- The illegal bytes are then skipped, and zero or more U+FFFD replacement |
923 |
-@@ -764,7 +764,7 @@ sig |
924 |
- |
925 |
- val decode : |
926 |
- ?report:(location -> Error.t -> unit io) -> Encoding.t -> |
927 |
-- (char, _) stream -> (int, async) stream |
928 |
-+ (char, _) stream -> (Uchar.t, async) stream |
929 |
- end |
930 |
- |
931 |
- (** {2 XML} *) |
932 |
-@@ -838,7 +838,7 @@ val kstream : ('a, _) stream -> 'a Kstre |
933 |
- val of_kstream : 'a Kstream.t -> ('a, _) stream |
934 |
- |
935 |
- val preprocess_input_stream : |
936 |
-- (int, 's) stream -> (location * int, 's) stream * (unit -> location) |
937 |
-+ (Uchar.t, 's) stream -> (location * int, 's) stream * (unit -> location) |
938 |
- |
939 |
- (**/**) |
940 |
- |
941 |
-Index: markup.ml-0.7.2/src/utility.ml |
942 |
-=================================================================== |
943 |
---- markup.ml-0.7.2.orig/src/utility.ml |
944 |
-+++ markup.ml-0.7.2/src/utility.ml |
945 |
-@@ -346,11 +346,11 @@ let xhtml_entity name = |
946 |
- |
947 |
- match lookup 0 with |
948 |
- | `One c -> |
949 |
-- add_utf_8 buffer c; |
950 |
-+ add_utf_8 buffer (Uchar.of_int c); |
951 |
- Some (Buffer.contents buffer) |
952 |
- | `Two (c, c') -> |
953 |
-- add_utf_8 buffer c; |
954 |
-- add_utf_8 buffer c'; |
955 |
-+ add_utf_8 buffer (Uchar.of_int c); |
956 |
-+ add_utf_8 buffer (Uchar.of_int c'); |
957 |
- Some (Buffer.contents buffer) |
958 |
- |
959 |
- with Exit -> None |
960 |
-Index: markup.ml-0.7.2/src/xml_tokenizer.ml |
961 |
-=================================================================== |
962 |
---- markup.ml-0.7.2.orig/src/xml_tokenizer.ml |
963 |
-+++ markup.ml-0.7.2/src/xml_tokenizer.ml |
964 |
-@@ -101,7 +101,7 @@ let tokenize report resolve_reference (i |
965 |
- end |
966 |
- |
967 |
- | _, c when filter c -> |
968 |
-- add_utf_8 buffer c; |
969 |
-+ add_utf_8 buffer (Uchar.of_int c); |
970 |
- read () |
971 |
- |
972 |
- | l, c -> |
973 |
-@@ -133,7 +133,7 @@ let tokenize report resolve_reference (i |
974 |
- |
975 |
- | _, c when is_name_start_char c -> |
976 |
- let buffer = Buffer.create 32 in |
977 |
-- add_utf_8 buffer c; |
978 |
-+ add_utf_8 buffer (Uchar.of_int c); |
979 |
- let rec read () = |
980 |
- next input !throw unexpected_eoi begin function |
981 |
- | _, 0x003B -> |
982 |
-@@ -146,7 +146,7 @@ let tokenize report resolve_reference (i |
983 |
- end |
984 |
- |
985 |
- | _, c when is_name_char c -> |
986 |
-- add_utf_8 buffer c; |
987 |
-+ add_utf_8 buffer (Uchar.of_int c); |
988 |
- read () |
989 |
- |
990 |
- | l, c -> |
991 |
-@@ -218,7 +218,7 @@ let tokenize report resolve_reference (i |
992 |
- report_if (not @@ is_name_start_char c) l (fun () -> |
993 |
- `Bad_token (char c, "attribute", "invalid start character")) |
994 |
- !throw (fun () -> |
995 |
-- add_utf_8 name_buffer c; |
996 |
-+ add_utf_8 name_buffer (Uchar.of_int c); |
997 |
- name_state ()) |
998 |
- end |
999 |
- |
1000 |
-@@ -235,7 +235,7 @@ let tokenize report resolve_reference (i |
1001 |
- report_if (not @@ is_name_start_char c) l (fun () -> |
1002 |
- `Bad_token (char c, "attribute", "invalid name character")) |
1003 |
- !throw (fun () -> |
1004 |
-- add_utf_8 name_buffer c; |
1005 |
-+ add_utf_8 name_buffer (Uchar.of_int c); |
1006 |
- name_state ()) |
1007 |
- end |
1008 |
- |
1009 |
-@@ -275,14 +275,14 @@ let tokenize report resolve_reference (i |
1010 |
- report l |
1011 |
- (`Bad_token ("&", "attribute", "replace with '&'")) |
1012 |
- !throw (fun () -> |
1013 |
-- add_utf_8 value_buffer 0x0026; |
1014 |
-+ add_utf_8 value_buffer (Uchar.of_int 0x0026); |
1015 |
- state ()) |
1016 |
- end |
1017 |
- |
1018 |
- and handle_lt l state = |
1019 |
- report l (`Bad_token ("<", "attribute", "replace with '<'")) !throw |
1020 |
- (fun () -> |
1021 |
-- add_utf_8 value_buffer 0x003C; |
1022 |
-+ add_utf_8 value_buffer (Uchar.of_int 0x003C); |
1023 |
- state ()) |
1024 |
- |
1025 |
- and quoted_value_state quote = |
1026 |
-@@ -300,7 +300,7 @@ let tokenize report resolve_reference (i |
1027 |
- quoted_value_state quote) |
1028 |
- |
1029 |
- | _, c -> |
1030 |
-- add_utf_8 value_buffer c; |
1031 |
-+ add_utf_8 value_buffer (Uchar.of_int c); |
1032 |
- quoted_value_state quote |
1033 |
- end |
1034 |
- |
1035 |
-@@ -317,7 +317,7 @@ let tokenize report resolve_reference (i |
1036 |
- handle_lt l unquoted_value_state |
1037 |
- |
1038 |
- | _, c -> |
1039 |
-- add_utf_8 value_buffer c; |
1040 |
-+ add_utf_8 value_buffer (Uchar.of_int c); |
1041 |
- unquoted_value_state () |
1042 |
- end |
1043 |
- |
1044 |
-@@ -372,7 +372,7 @@ let tokenize report resolve_reference (i |
1045 |
- report_if (not @@ is_name_start_char c) l (fun () -> |
1046 |
- `Bad_token (char c, pi, "invalid start character")) !throw |
1047 |
- (fun () -> |
1048 |
-- add_utf_8 target_buffer c; |
1049 |
-+ add_utf_8 target_buffer (Uchar.of_int c); |
1050 |
- target_state ()) |
1051 |
- end |
1052 |
- |
1053 |
-@@ -388,13 +388,13 @@ let tokenize report resolve_reference (i |
1054 |
- report_if (not @@ is_name_char c) l (fun () -> |
1055 |
- `Bad_token (char c, pi, "invalid name character")) !throw |
1056 |
- (fun () -> |
1057 |
-- add_utf_8 target_buffer c; |
1058 |
-+ add_utf_8 target_buffer (Uchar.of_int c); |
1059 |
- target_state ()) |
1060 |
- end |
1061 |
- |
1062 |
- and text_state () = |
1063 |
- next' pi finish_pi (fun (_, c) -> |
1064 |
-- add_utf_8 text_buffer c; |
1065 |
-+ add_utf_8 text_buffer (Uchar.of_int c); |
1066 |
- text_state ()) |
1067 |
- |
1068 |
- and xml_declaration_state () = |
1069 |
-@@ -572,7 +572,7 @@ let tokenize report resolve_reference (i |
1070 |
- and initial_state () = |
1071 |
- next input !throw (fun () -> emit_eoi ()) begin function |
1072 |
- | l, (0x005D as c) -> |
1073 |
-- add_character l c; |
1074 |
-+ add_character l (Uchar.of_int c); |
1075 |
- one_bracket_state l |
1076 |
- |
1077 |
- | l, 0x003C -> |
1078 |
-@@ -583,7 +583,7 @@ let tokenize report resolve_reference (i |
1079 |
- | None -> |
1080 |
- report l (`Bad_token (char c, "text", "replace with '&'")) |
1081 |
- !throw (fun () -> |
1082 |
-- add_character l c; |
1083 |
-+ add_character l (Uchar.of_int c); |
1084 |
- initial_state ()) |
1085 |
- |
1086 |
- | Some s -> |
1087 |
-@@ -591,14 +591,14 @@ let tokenize report resolve_reference (i |
1088 |
- initial_state ()) |
1089 |
- |
1090 |
- | l, c -> |
1091 |
-- add_character l c; |
1092 |
-+ add_character l (Uchar.of_int c); |
1093 |
- initial_state () |
1094 |
- end |
1095 |
- |
1096 |
- and one_bracket_state l' = |
1097 |
- next_option input !throw begin function |
1098 |
- | Some (l, (0x005D as c)) -> |
1099 |
-- add_character l c; |
1100 |
-+ add_character l (Uchar.of_int c); |
1101 |
- two_brackets_state l' l |
1102 |
- |
1103 |
- | v -> |
1104 |
-@@ -611,11 +611,11 @@ let tokenize report resolve_reference (i |
1105 |
- | Some (l, (0x003E as c)) -> |
1106 |
- report l' (`Bad_token ("]]>", "text", "must end a CDATA section")) |
1107 |
- !throw (fun () -> |
1108 |
-- add_character l c; |
1109 |
-+ add_character l (Uchar.of_int c); |
1110 |
- initial_state ()) |
1111 |
- |
1112 |
- | Some (l, (0x005D as c)) -> |
1113 |
-- add_character l c; |
1114 |
-+ add_character l (Uchar.of_int c); |
1115 |
- two_brackets_state l'' l |
1116 |
- |
1117 |
- | v -> |
1118 |
-@@ -626,7 +626,7 @@ let tokenize report resolve_reference (i |
1119 |
- and begin_markup_state l' = |
1120 |
- let recover v = |
1121 |
- lt_in_text l' (fun () -> |
1122 |
-- add_character l' 0x003C; |
1123 |
-+ add_character l' (Uchar.of_int 0x003C); |
1124 |
- push_option input v; |
1125 |
- initial_state ()) |
1126 |
- in |
1127 |
-@@ -648,7 +648,7 @@ let tokenize report resolve_reference (i |
1128 |
- |
1129 |
- | _, c when is_name_start_char c -> |
1130 |
- let tag_name_buffer = Buffer.create 32 in |
1131 |
-- add_utf_8 tag_name_buffer c; |
1132 |
-+ add_utf_8 tag_name_buffer (Uchar.of_int c); |
1133 |
- start_tag_state l' tag_name_buffer |
1134 |
- |
1135 |
- | l, c as v -> |
1136 |
-@@ -660,7 +660,7 @@ let tokenize report resolve_reference (i |
1137 |
- and start_tag_state l' buffer = |
1138 |
- let recover v = |
1139 |
- lt_in_text l' (fun () -> |
1140 |
-- add_character l' 0x003C; |
1141 |
-+ add_character l' (Uchar.of_int 0x003C); |
1142 |
- add_string l' (Buffer.contents buffer); |
1143 |
- push_option input v; |
1144 |
- initial_state ()) |
1145 |
-@@ -680,7 +680,7 @@ let tokenize report resolve_reference (i |
1146 |
- attributes_state l' (Buffer.contents buffer) [] |
1147 |
- |
1148 |
- | _, c when is_name_char c -> |
1149 |
-- add_utf_8 buffer c; |
1150 |
-+ add_utf_8 buffer (Uchar.of_int c); |
1151 |
- start_tag_state l' buffer |
1152 |
- |
1153 |
- | l, c as v -> |
1154 |
-@@ -731,8 +731,8 @@ let tokenize report resolve_reference (i |
1155 |
- and end_tag_state l' = |
1156 |
- let recover v = |
1157 |
- lt_in_text l' (fun () -> |
1158 |
-- add_character l' 0x003C; |
1159 |
-- add_character l' 0x002F; |
1160 |
-+ add_character l' (Uchar.of_int 0x003C); |
1161 |
-+ add_character l' (Uchar.of_int 0x002F); |
1162 |
- push_option input v; |
1163 |
- initial_state ()) |
1164 |
- in |
1165 |
-@@ -743,7 +743,7 @@ let tokenize report resolve_reference (i |
1166 |
- begin function |
1167 |
- | _, c when is_name_start_char c -> |
1168 |
- let name_buffer = Buffer.create 32 in |
1169 |
-- add_utf_8 name_buffer c; |
1170 |
-+ add_utf_8 name_buffer (Uchar.of_int c); |
1171 |
- end_tag_name_state l' name_buffer |
1172 |
- |
1173 |
- | l, c as v -> |
1174 |
-@@ -755,8 +755,8 @@ let tokenize report resolve_reference (i |
1175 |
- and end_tag_name_state l' buffer = |
1176 |
- let recover v = |
1177 |
- lt_in_text l' (fun () -> |
1178 |
-- add_character l' 0x003C; |
1179 |
-- add_character l' 0x002F; |
1180 |
-+ add_character l' (Uchar.of_int 0x003C); |
1181 |
-+ add_character l' (Uchar.of_int 0x002F); |
1182 |
- add_string l' (Buffer.contents buffer); |
1183 |
- push_option input v; |
1184 |
- initial_state ()) |
1185 |
-@@ -773,7 +773,7 @@ let tokenize report resolve_reference (i |
1186 |
- end_tag_whitespace_state false l' (Buffer.contents buffer) |
1187 |
- |
1188 |
- | _, c when is_name_char c -> |
1189 |
-- add_utf_8 buffer c; |
1190 |
-+ add_utf_8 buffer (Uchar.of_int c); |
1191 |
- end_tag_name_state l' buffer |
1192 |
- |
1193 |
- | l, c as v -> |
1194 |
-@@ -821,8 +821,8 @@ let tokenize report resolve_reference (i |
1195 |
- |
1196 |
- | v -> |
1197 |
- bad_comment_start "<!" l' (fun () -> |
1198 |
-- add_character l' 0x003C; |
1199 |
-- add_character l' 0x0021; |
1200 |
-+ add_character l' (Uchar.of_int 0x003C); |
1201 |
-+ add_character l' (Uchar.of_int 0x0021); |
1202 |
- push_option input v; |
1203 |
- initial_state ()) |
1204 |
- end |
1205 |
-@@ -834,9 +834,9 @@ let tokenize report resolve_reference (i |
1206 |
- |
1207 |
- | v -> |
1208 |
- bad_comment_start "<!-" l' (fun () -> |
1209 |
-- add_character l' 0x003C; |
1210 |
-- add_character l' 0x0021; |
1211 |
-- add_character l' 0x002D; |
1212 |
-+ add_character l' (Uchar.of_int 0x003C); |
1213 |
-+ add_character l' (Uchar.of_int 0x0021); |
1214 |
-+ add_character l' (Uchar.of_int 0x002D); |
1215 |
- push_option input v; |
1216 |
- initial_state ()) |
1217 |
- end |
1218 |
-@@ -852,7 +852,7 @@ let tokenize report resolve_reference (i |
1219 |
- comment_one_dash_state l' l buffer |
1220 |
- |
1221 |
- | _, c -> |
1222 |
-- add_utf_8 buffer c; |
1223 |
-+ add_utf_8 buffer (Uchar.of_int c); |
1224 |
- comment_state l' buffer |
1225 |
- end |
1226 |
- |
1227 |
-@@ -863,8 +863,8 @@ let tokenize report resolve_reference (i |
1228 |
- comment_two_dashes_state false l' l'' buffer |
1229 |
- |
1230 |
- | _, c -> |
1231 |
-- add_utf_8 buffer 0x002D; |
1232 |
-- add_utf_8 buffer c; |
1233 |
-+ add_utf_8 buffer (Uchar.of_int 0x002D); |
1234 |
-+ add_utf_8 buffer (Uchar.of_int c); |
1235 |
- comment_state l' buffer |
1236 |
- end |
1237 |
- |
1238 |
-@@ -883,14 +883,14 @@ let tokenize report resolve_reference (i |
1239 |
- |
1240 |
- | _, 0x002D -> |
1241 |
- recover (fun () -> |
1242 |
-- add_utf_8 buffer 0x002D; |
1243 |
-+ add_utf_8 buffer (Uchar.of_int 0x002D); |
1244 |
- comment_two_dashes_state true l' l'' buffer) |
1245 |
- |
1246 |
- | _, c -> |
1247 |
- recover (fun () -> |
1248 |
-- add_utf_8 buffer 0x002D; |
1249 |
-- add_utf_8 buffer 0x002D; |
1250 |
-- add_utf_8 buffer c; |
1251 |
-+ add_utf_8 buffer (Uchar.of_int 0x002D); |
1252 |
-+ add_utf_8 buffer (Uchar.of_int 0x002D); |
1253 |
-+ add_utf_8 buffer (Uchar.of_int c); |
1254 |
- comment_state l' buffer) |
1255 |
- end |
1256 |
- |
1257 |
-@@ -905,9 +905,9 @@ let tokenize report resolve_reference (i |
1258 |
- !throw (fun () -> |
1259 |
- lt_in_text l' (fun () -> |
1260 |
- push_list input cs; |
1261 |
-- add_character l' 0x003C; |
1262 |
-- add_character l' 0x0021; |
1263 |
-- add_character l' 0x005B; |
1264 |
-+ add_character l' (Uchar.of_int 0x003C); |
1265 |
-+ add_character l' (Uchar.of_int 0x0021); |
1266 |
-+ add_character l' (Uchar.of_int 0x005B); |
1267 |
- initial_state ())) |
1268 |
- end |
1269 |
- |
1270 |
-@@ -918,7 +918,7 @@ let tokenize report resolve_reference (i |
1271 |
- cdata_one_bracket_state l' l |
1272 |
- |
1273 |
- | l, c -> |
1274 |
-- add_character l c; |
1275 |
-+ add_character l (Uchar.of_int c); |
1276 |
- cdata_state l' |
1277 |
- end |
1278 |
- |
1279 |
-@@ -929,8 +929,8 @@ let tokenize report resolve_reference (i |
1280 |
- cdata_two_brackets_state l' l'' l |
1281 |
- |
1282 |
- | l, c -> |
1283 |
-- add_character l'' 0x005D; |
1284 |
-- add_character l c; |
1285 |
-+ add_character l'' (Uchar.of_int 0x005D); |
1286 |
-+ add_character l (Uchar.of_int c); |
1287 |
- cdata_state l' |
1288 |
- end |
1289 |
- |
1290 |
-@@ -941,13 +941,13 @@ let tokenize report resolve_reference (i |
1291 |
- initial_state () |
1292 |
- |
1293 |
- | l, 0x005D -> |
1294 |
-- add_character l'' 0x005D; |
1295 |
-+ add_character l'' (Uchar.of_int 0x005D); |
1296 |
- cdata_two_brackets_state l' l''' l |
1297 |
- |
1298 |
- | l, c -> |
1299 |
-- add_character l'' 0x005D; |
1300 |
-- add_character l''' 0x005D; |
1301 |
-- add_character l c; |
1302 |
-+ add_character l'' (Uchar.of_int 0x005D); |
1303 |
-+ add_character l''' (Uchar.of_int 0x005D); |
1304 |
-+ add_character l (Uchar.of_int c); |
1305 |
- cdata_state l' |
1306 |
- end |
1307 |
- |
1308 |
-@@ -963,9 +963,9 @@ let tokenize report resolve_reference (i |
1309 |
- !throw (fun () -> |
1310 |
- lt_in_text l' (fun () -> |
1311 |
- push_list input cs; |
1312 |
-- add_character l' 0x003C; |
1313 |
-- add_character l' 0x0021; |
1314 |
-- add_character l' 0x0044; |
1315 |
-+ add_character l' (Uchar.of_int 0x003C); |
1316 |
-+ add_character l' (Uchar.of_int 0x0021); |
1317 |
-+ add_character l' (Uchar.of_int 0x0044); |
1318 |
- initial_state ())) |
1319 |
- end |
1320 |
- |
1321 |
-@@ -980,15 +980,15 @@ let tokenize report resolve_reference (i |
1322 |
- emit_doctype l' buffer initial_state |
1323 |
- |
1324 |
- | _, (0x0022 | 0x0027 as c) -> |
1325 |
-- add_utf_8 buffer c; |
1326 |
-+ add_utf_8 buffer (Uchar.of_int c); |
1327 |
- doctype_quoted_state (fun () -> doctype_state l' buffer) c l' buffer |
1328 |
- |
1329 |
- | _, (0x003C as c) -> |
1330 |
-- add_utf_8 buffer c; |
1331 |
-+ add_utf_8 buffer (Uchar.of_int c); |
1332 |
- doctype_item_state (fun () -> doctype_state l' buffer) l' buffer |
1333 |
- |
1334 |
- | _, c -> |
1335 |
-- add_utf_8 buffer c; |
1336 |
-+ add_utf_8 buffer (Uchar.of_int c); |
1337 |
- doctype_state l' buffer |
1338 |
- end |
1339 |
- |
1340 |
-@@ -996,11 +996,11 @@ let tokenize report resolve_reference (i |
1341 |
- next input !throw (fun () -> unterminated_doctype l' buffer) |
1342 |
- begin function |
1343 |
- | _, c when c = quote -> |
1344 |
-- add_utf_8 buffer c; |
1345 |
-+ add_utf_8 buffer (Uchar.of_int c); |
1346 |
- state () |
1347 |
- |
1348 |
- | _, c -> |
1349 |
-- add_utf_8 buffer c; |
1350 |
-+ add_utf_8 buffer (Uchar.of_int c); |
1351 |
- doctype_quoted_state state quote l' buffer |
1352 |
- end |
1353 |
- |
1354 |
-@@ -1008,18 +1008,18 @@ let tokenize report resolve_reference (i |
1355 |
- next input !throw (fun () -> unterminated_doctype l' buffer) |
1356 |
- begin function |
1357 |
- | _, (0x0021 as c) -> |
1358 |
-- add_utf_8 buffer c; |
1359 |
-+ add_utf_8 buffer (Uchar.of_int c); |
1360 |
- doctype_declaration_state state l' buffer |
1361 |
- |
1362 |
- | l, (0x003F as c) -> |
1363 |
-- add_utf_8 buffer c; |
1364 |
-- let undo = tap (fun (_, c) -> add_utf_8 buffer c) input in |
1365 |
-+ add_utf_8 buffer (Uchar.of_int c); |
1366 |
-+ let undo = tap (fun (_, c) -> add_utf_8 buffer (Uchar.of_int c)) input in |
1367 |
- parse_declaration_or_processing_instruction l (fun _ -> |
1368 |
- undo (); |
1369 |
- state ()) |
1370 |
- |
1371 |
- | _, c -> |
1372 |
-- add_utf_8 buffer c; |
1373 |
-+ add_utf_8 buffer (Uchar.of_int c); |
1374 |
- state () |
1375 |
- end |
1376 |
- |
1377 |
-@@ -1027,16 +1027,16 @@ let tokenize report resolve_reference (i |
1378 |
- next input !throw (fun () -> unterminated_doctype l' buffer) |
1379 |
- begin function |
1380 |
- | _, (0x003E as c) -> |
1381 |
-- add_utf_8 buffer c; |
1382 |
-+ add_utf_8 buffer (Uchar.of_int c); |
1383 |
- state () |
1384 |
- |
1385 |
- | _, (0x0022 | 0x0027 as c) -> |
1386 |
-- add_utf_8 buffer c; |
1387 |
-+ add_utf_8 buffer (Uchar.of_int c); |
1388 |
- doctype_quoted_state |
1389 |
- (fun () -> doctype_declaration_state state l' buffer) c l' buffer |
1390 |
- |
1391 |
- | _, c -> |
1392 |
-- add_utf_8 buffer c; |
1393 |
-+ add_utf_8 buffer (Uchar.of_int c); |
1394 |
- doctype_declaration_state state l' buffer |
1395 |
- end |
1396 |
- |
1397 |
|
1398 |
diff --git a/dev-ml/markup/markup-0.7.2-r1.ebuild b/dev-ml/markup/markup-0.7.2-r1.ebuild |
1399 |
deleted file mode 100644 |
1400 |
index f70ac55..00000000 |
1401 |
--- a/dev-ml/markup/markup-0.7.2-r1.ebuild |
1402 |
+++ /dev/null |
1403 |
@@ -1,44 +0,0 @@ |
1404 |
-# Copyright 1999-2016 Gentoo Foundation |
1405 |
-# Distributed under the terms of the GNU General Public License v2 |
1406 |
-# $Id$ |
1407 |
- |
1408 |
-EAPI=5 |
1409 |
- |
1410 |
-inherit findlib eutils |
1411 |
- |
1412 |
-DESCRIPTION="Error-recovering streaming HTML5 and XML parsers" |
1413 |
-HOMEPAGE="https://github.com/aantron/markup.ml" |
1414 |
-SRC_URI="https://github.com/aantron/markup.ml/archive/${PV}.tar.gz -> ${P}.tar.gz" |
1415 |
- |
1416 |
-LICENSE="BSD" |
1417 |
-SLOT="0/${PV}p1" |
1418 |
-KEYWORDS="~amd64" |
1419 |
-IUSE="doc test" |
1420 |
- |
1421 |
-DEPEND=" |
1422 |
- dev-lang/ocaml:=[ocamlopt] |
1423 |
- dev-ml/lwt:=[ocamlopt] |
1424 |
- >=dev-ml/uutf-1.0:=[ocamlopt] |
1425 |
-" |
1426 |
-RDEPEND="${DEPEND}" |
1427 |
-DEPEND="${DEPEND} |
1428 |
- test? ( dev-ml/ounit ) |
1429 |
- dev-ml/ocamlbuild" |
1430 |
-S="${WORKDIR}/${PN}.ml-${PV}" |
1431 |
- |
1432 |
-src_prepare() { |
1433 |
- epatch "${FILESDIR}/uutf.patch" \ |
1434 |
- "${FILESDIR}/test.patch" |
1435 |
-} |
1436 |
- |
1437 |
-src_compile() { |
1438 |
- emake |
1439 |
- use doc && emake docs |
1440 |
-} |
1441 |
- |
1442 |
-src_install() { |
1443 |
- findlib_src_preinst |
1444 |
- emake ocamlfind-install |
1445 |
- dodoc README.md |
1446 |
- use doc && dohtml doc/html/* |
1447 |
-} |