Gentoo Archives: gentoo-commits

From: "Michał Górny" <mgorny@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] repo/gentoo:master commit in: dev-python/html5lib/, dev-python/html5lib/files/
Date: Thu, 04 Jan 2018 20:35:09
Message-Id: 1515098093.9f660cebb2308bb021a128fdd699ea70ec383c6a.mgorny@gentoo
1 commit: 9f660cebb2308bb021a128fdd699ea70ec383c6a
2 Author: Michał Górny <mgorny <AT> gentoo <DOT> org>
3 AuthorDate: Thu Jan 4 20:03:07 2018 +0000
4 Commit: Michał Górny <mgorny <AT> gentoo <DOT> org>
5 CommitDate: Thu Jan 4 20:34:53 2018 +0000
6 URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=9f660ceb
7
8 dev-python/html5lib: Clean old up
9
10 .../html5lib-0.9999999-lxml-3.5.0-backport.patch | 117 ---------------------
11 ...html5lib-0.9999999-python3.6-sanitizer-re.patch | 50 ---------
12 dev-python/html5lib/html5lib-0.9999999-r2.ebuild | 32 ------
13 3 files changed, 199 deletions(-)
14
15 diff --git a/dev-python/html5lib/files/html5lib-0.9999999-lxml-3.5.0-backport.patch b/dev-python/html5lib/files/html5lib-0.9999999-lxml-3.5.0-backport.patch
16 deleted file mode 100644
17 index fecfab9a4fb..00000000000
18 --- a/dev-python/html5lib/files/html5lib-0.9999999-lxml-3.5.0-backport.patch
19 +++ /dev/null
20 @@ -1,117 +0,0 @@
21 -From 46046c0f7125911ff8205f09a7574573bb953105 Mon Sep 17 00:00:00 2001
22 -From: Geoffrey Sneddon <geoffers@×××××.com>
23 -Date: Mon, 23 Nov 2015 15:17:07 +0000
24 -Subject: [PATCH 1/3] Make lxml tree-builder coerce comments to work with lxml
25 - 3.5.
26 -
27 ----
28 - html5lib/ihatexml.py | 2 ++
29 - html5lib/treebuilders/etree_lxml.py | 2 +-
30 - 2 files changed, 3 insertions(+), 1 deletion(-)
31 -
32 -diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py
33 -index 0fc7930..b5b2e98 100644
34 ---- a/html5lib/ihatexml.py
35 -+++ b/html5lib/ihatexml.py
36 -@@ -225,6 +225,8 @@ def coerceComment(self, data):
37 - while "--" in data:
38 - warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
39 - data = data.replace("--", "- -")
40 -+ if data.endswith("-"):
41 -+ data += " "
42 - return data
43 -
44 - def coerceCharacters(self, data):
45 -diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
46 -index 35d08ef..17007e3 100644
47 ---- a/html5lib/treebuilders/etree_lxml.py
48 -+++ b/html5lib/treebuilders/etree_lxml.py
49 -@@ -189,7 +189,7 @@ class TreeBuilder(_base.TreeBuilder):
50 -
51 - def __init__(self, namespaceHTMLElements, fullTree=False):
52 - builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
53 -- infosetFilter = self.infosetFilter = ihatexml.InfosetFilter()
54 -+ infosetFilter = self.infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
55 - self.namespaceHTMLElements = namespaceHTMLElements
56 -
57 - class Attributes(dict):
58 -
59 -From 1c22e1ce93dd4acc81a66cfa03cf9720fbd741c7 Mon Sep 17 00:00:00 2001
60 -From: Geoffrey Sneddon <geoffers@×××××.com>
61 -Date: Mon, 23 Nov 2015 15:35:21 +0000
62 -Subject: [PATCH 2/3] fixup! Make lxml tree-builder coerce comments to work
63 - with lxml 3.5.
64 -
65 ----
66 - html5lib/ihatexml.py | 1 +
67 - html5lib/treebuilders/etree_lxml.py | 7 ++++---
68 - 2 files changed, 5 insertions(+), 3 deletions(-)
69 -
70 -diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py
71 -index b5b2e98..5a81a12 100644
72 ---- a/html5lib/ihatexml.py
73 -+++ b/html5lib/ihatexml.py
74 -@@ -226,6 +226,7 @@ def coerceComment(self, data):
75 - warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
76 - data = data.replace("--", "- -")
77 - if data.endswith("-"):
78 -+ warnings.warn("Comments cannot contain end in a dash", DataLossWarning)
79 - data += " "
80 - return data
81 -
82 -diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
83 -index 17007e3..c6c981f 100644
84 ---- a/html5lib/treebuilders/etree_lxml.py
85 -+++ b/html5lib/treebuilders/etree_lxml.py
86 -@@ -54,7 +54,7 @@ def _getChildNodes(self):
87 - def testSerializer(element):
88 - rv = []
89 - finalText = None
90 -- infosetFilter = ihatexml.InfosetFilter()
91 -+ infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
92 -
93 - def serializeElement(element, indent=0):
94 - if not hasattr(element, "tag"):
95 -@@ -257,7 +257,7 @@ def _getData(self):
96 - data = property(_getData, _setData)
97 -
98 - self.elementClass = Element
99 -- self.commentClass = builder.Comment
100 -+ self.commentClass = Comment
101 - # self.fragmentClass = builder.DocumentFragment
102 - _base.TreeBuilder.__init__(self, namespaceHTMLElements)
103 -
104 -@@ -344,7 +344,8 @@ def insertRoot(self, token):
105 -
106 - # Append the initial comments:
107 - for comment_token in self.initial_comments:
108 -- root.addprevious(etree.Comment(comment_token["data"]))
109 -+ comment = self.commentClass(comment_token["data"])
110 -+ root.addprevious(comment._element)
111 -
112 - # Create the root document and add the ElementTree to it
113 - self.document = self.documentClass()
114 -
115 -From 235a6d7ac7e0a3e2b431766e051094c2d3110ba3 Mon Sep 17 00:00:00 2001
116 -From: Geoffrey Sneddon <geoffers@×××××.com>
117 -Date: Mon, 23 Nov 2015 15:42:12 +0000
118 -Subject: [PATCH 3/3] fixup! Make lxml tree-builder coerce comments to work
119 - with lxml 3.5.
120 -
121 ----
122 - html5lib/ihatexml.py | 2 +-
123 - 1 file changed, 1 insertion(+), 1 deletion(-)
124 -
125 -diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py
126 -index 5a81a12..5da5d93 100644
127 ---- a/html5lib/ihatexml.py
128 -+++ b/html5lib/ihatexml.py
129 -@@ -226,7 +226,7 @@ def coerceComment(self, data):
130 - warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
131 - data = data.replace("--", "- -")
132 - if data.endswith("-"):
133 -- warnings.warn("Comments cannot contain end in a dash", DataLossWarning)
134 -+ warnings.warn("Comments cannot end in a dash", DataLossWarning)
135 - data += " "
136 - return data
137 -
138
139 diff --git a/dev-python/html5lib/files/html5lib-0.9999999-python3.6-sanitizer-re.patch b/dev-python/html5lib/files/html5lib-0.9999999-python3.6-sanitizer-re.patch
140 deleted file mode 100644
141 index 2fbef2ad0b9..00000000000
142 --- a/dev-python/html5lib/files/html5lib-0.9999999-python3.6-sanitizer-re.patch
143 +++ /dev/null
144 @@ -1,50 +0,0 @@
145 ---- a/html5lib/sanitizer.py
146 -+++ b/html5lib/sanitizer.py
147 -@@ -203,7 +203,7 @@
148 - for attr in self.attr_val_is_uri:
149 - if attr not in attrs:
150 - continue
151 -- val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
152 -+ val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '',
153 - unescape(attrs[attr])).lower()
154 - # remove replacement characters from unescaped characters
155 - val_unescaped = val_unescaped.replace("\ufffd", "")
156 -@@ -228,7 +228,7 @@
157 - ' ',
158 - unescape(attrs[attr]))
159 - if (token["name"] in self.svg_allow_local_href and
160 -- 'xlink:href' in attrs and re.search('^\s*[^#\s].*',
161 -+ 'xlink:href' in attrs and re.search(r'^\s*[^#\s].*',
162 - attrs['xlink:href'])):
163 - del attrs['xlink:href']
164 - if 'style' in attrs:
165 -@@ -257,16 +257,16 @@
166 -
167 - def sanitize_css(self, style):
168 - # disallow urls
169 -- style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
170 -+ style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
171 -
172 - # gauntlet
173 -- if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
174 -+ if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
175 - return ''
176 -- if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
177 -+ if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
178 - return ''
179 -
180 - clean = []
181 -- for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style):
182 -+ for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
183 - if not value:
184 - continue
185 - if prop.lower() in self.allowed_css_properties:
186 -@@ -275,7 +275,7 @@
187 - 'padding']:
188 - for keyword in value.split():
189 - if keyword not in self.acceptable_css_keywords and \
190 -- not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
191 -+ not re.match(r"^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
192 - break
193 - else:
194 - clean.append(prop + ': ' + value + ';')
195
196 diff --git a/dev-python/html5lib/html5lib-0.9999999-r2.ebuild b/dev-python/html5lib/html5lib-0.9999999-r2.ebuild
197 deleted file mode 100644
198 index 5b23d984fc3..00000000000
199 --- a/dev-python/html5lib/html5lib-0.9999999-r2.ebuild
200 +++ /dev/null
201 @@ -1,32 +0,0 @@
202 -# Copyright 1999-2017 Gentoo Foundation
203 -# Distributed under the terms of the GNU General Public License v2
204 -
205 -EAPI=6
206 -
207 -PYTHON_COMPAT=( python2_7 python3_{4,5,6} pypy pypy3 )
208 -PYTHON_REQ_USE="xml(+)"
209 -
210 -inherit distutils-r1
211 -
212 -DESCRIPTION="HTML parser based on the HTML5 specification"
213 -HOMEPAGE="https://github.com/html5lib/html5lib-python/ https://html5lib.readthedocs.org"
214 -SRC_URI="mirror://pypi/${PN:0:1}/${PN}/${P}.tar.gz"
215 -
216 -LICENSE="MIT"
217 -SLOT="0"
218 -KEYWORDS="~alpha ~amd64 ~arm ~arm64 ~hppa ~ia64 ~mips ~ppc ~ppc64 ~s390 ~sh ~sparc ~x86 ~amd64-fbsd ~x86-fbsd ~amd64-linux ~x86-linux"
219 -IUSE="test"
220 -
221 -RDEPEND="dev-python/six[${PYTHON_USEDEP}]"
222 -DEPEND="${RDEPEND}
223 - dev-python/setuptools[${PYTHON_USEDEP}]
224 - test? ( dev-python/nose[${PYTHON_USEDEP}] )"
225 -
226 -PATCHES=(
227 - "${FILESDIR}"/${P}-lxml-3.5.0-backport.patch
228 - "${FILESDIR}"/${P}-python3.6-sanitizer-re.patch
229 -)
230 -
231 -python_test() {
232 - nosetests --verbosity=3 || die "Tests fail with ${EPYTHON}"
233 -}