1 |
commit: 9741510885394a808ea58561b23a1711771f1f4a |
2 |
Author: Michał Górny <mgorny <AT> gentoo <DOT> org> |
3 |
AuthorDate: Mon Aug 14 06:46:57 2017 +0000 |
4 |
Commit: Michał Górny <mgorny <AT> gentoo <DOT> org> |
5 |
CommitDate: Mon Aug 14 07:12:33 2017 +0000 |
6 |
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=97415108 |
7 |
|
8 |
dev-python/beautifulsoup: Drop masked ancient version |
9 |
|
10 |
dev-python/beautifulsoup/Manifest | 1 - |
11 |
.../beautifulsoup/beautifulsoup-3.1.0.1-r2.ebuild | 40 - |
12 |
.../beautifulsoup-3.1.0.1-disable-tests.patch | 39 - |
13 |
.../files/beautifulsoup-3.1.0.1-python-3.patch | 949 --------------------- |
14 |
profiles/package.mask | 5 - |
15 |
5 files changed, 1034 deletions(-) |
16 |
|
17 |
diff --git a/dev-python/beautifulsoup/Manifest b/dev-python/beautifulsoup/Manifest |
18 |
index a87f7f747d9..6ba59cf1880 100644 |
19 |
--- a/dev-python/beautifulsoup/Manifest |
20 |
+++ b/dev-python/beautifulsoup/Manifest |
21 |
@@ -1,4 +1,3 @@ |
22 |
-DIST BeautifulSoup-3.1.0.1.tar.gz 71460 SHA256 820a80f473240d9d30047f36c959d530a699a732500662dd8b03e1d3ccad12a8 SHA512 812969faf454a58d849921836ed07ec9a950f34fb31e29e118cdf1a75a533370e430f417402b5a5016d23b2d3a1c44a1cf5fde5b3bfd1bc98c50036edd51c0d6 WHIRLPOOL a199585817dcabcc6327c3836a66128605ebf92a6663b5c660125061a797485a504d300791bcd43e0e94e4f08ca59c01f65f42481da07b1240350cbfc6ea6b0c |
23 |
DIST BeautifulSoup-3.2.1.tar.gz 31224 SHA256 f5ba85e907e7dfd78e44e4000b3eaef3a650aefc57831e8a645702db2e5b50db SHA512 365b7b045a2069cf437877543577bc0aa99256a6dc4c9743670b46bfceab5494a06628012d6eccecfe99c25d5c9e0c65814964b47026f15ba1a538444cfb7789 WHIRLPOOL c2f84b29421d0153fb1fecc87d63e00a61182e03bc0683132babca5d6c94143b4875a60a19124a36e4e6e78ce80bff9e1e81b37335700efc14084da933307e26 |
24 |
DIST beautifulsoup4-4.5.1.tar.gz 158039 SHA256 3c9474036afda9136aac6463def733f81017bf9ef3510d25634f335b0c87f5e1 SHA512 d560d7f743507084ec546708d29bb3764512f5b2c380004280dde813350bf48d1697fddce3bd3f95186407bf5142941d7adc7d0de8e7962eb5ca1278dbc7e93f WHIRLPOOL bf971596707c2ff69e93528164be01254258aa45601763c543246b67c5d31024b0e4de618382775a3cf313d255d8d1d6268a47542773531aacee9a2643412661 |
25 |
DIST beautifulsoup4-4.5.3.tar.gz 159185 SHA256 b21ca09366fa596043578fd4188b052b46634d22059e68dd0077d9ee77e08a3e SHA512 d31db0e3bb778a78c37882fcd55dc580eb5eeadfd48744eae6e2e0d0ef5983b216a4682af84a4971611b05fb99c45012ce094475f2d7c39a5b90dad99906ec84 WHIRLPOOL f8dbffd8e4a1dbee0a7ad8a4bcbe22a984f524474f0241a4c03ef5c37b291f9834a6ff1d076421c0cf1087588df1e49f5b99cd9afd7e81591c9063d92d4d097d |
26 |
|
27 |
diff --git a/dev-python/beautifulsoup/beautifulsoup-3.1.0.1-r2.ebuild b/dev-python/beautifulsoup/beautifulsoup-3.1.0.1-r2.ebuild |
28 |
deleted file mode 100644 |
29 |
index a69a317f6d9..00000000000 |
30 |
--- a/dev-python/beautifulsoup/beautifulsoup-3.1.0.1-r2.ebuild |
31 |
+++ /dev/null |
32 |
@@ -1,40 +0,0 @@ |
33 |
-# Copyright 1999-2017 Gentoo Foundation |
34 |
-# Distributed under the terms of the GNU General Public License v2 |
35 |
- |
36 |
-EAPI="5" |
37 |
-# A few tests fail with python3.3/3.4 :( |
38 |
-PYTHON_COMPAT=( python3_4 pypy3 ) |
39 |
- |
40 |
-inherit distutils-r1 eutils |
41 |
- |
42 |
-MY_PN="BeautifulSoup" |
43 |
-MY_P="${MY_PN}-${PV}" |
44 |
- |
45 |
-DESCRIPTION="HTML/XML parser for quick-turnaround applications like screen-scraping" |
46 |
-HOMEPAGE="http://www.crummy.com/software/BeautifulSoup/ https://pypi.python.org/pypi/BeautifulSoup" |
47 |
-SRC_URI="http://www.crummy.com/software/${MY_PN}/download/${MY_P}.tar.gz" |
48 |
- |
49 |
-LICENSE="BSD" |
50 |
-SLOT="python-3" |
51 |
-KEYWORDS="alpha amd64 arm hppa ia64 ppc ppc64 s390 sh sparc x86 ~amd64-fbsd ~x86-fbsd ~amd64-linux ~x86-linux ~x86-macos ~x86-solaris" |
52 |
-IUSE="" |
53 |
- |
54 |
-DEPEND="" |
55 |
-RDEPEND="!dev-python/beautifulsoup:0" |
56 |
- |
57 |
-S="${WORKDIR}/${MY_P}" |
58 |
- |
59 |
-PATCHES=( |
60 |
- "${FILESDIR}/${P}-python-3.patch" |
61 |
- "${FILESDIR}/${P}-disable-tests.patch" |
62 |
-) |
63 |
- |
64 |
-python_test() { |
65 |
- "${PYTHON}" BeautifulSoupTests.py || die "Tests fail with ${EPYTHON}" |
66 |
-} |
67 |
- |
68 |
-python_install_all() { |
69 |
- distutils-r1_python_install_all |
70 |
- # Delete useless files. |
71 |
- rm -r "${ED%/}/usr/bin" || die |
72 |
-} |
73 |
|
74 |
diff --git a/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-disable-tests.patch b/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-disable-tests.patch |
75 |
deleted file mode 100644 |
76 |
index c97cd76ee31..00000000000 |
77 |
--- a/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-disable-tests.patch |
78 |
+++ /dev/null |
79 |
@@ -1,39 +0,0 @@ |
80 |
---- lib/BeautifulSoupTests.py.orig 2015-07-21 08:39:33.077000000 +0000 |
81 |
-+++ lib/BeautifulSoupTests.py 2015-07-21 08:41:19.285000000 +0000 |
82 |
-@@ -538,13 +538,13 @@ |
83 |
- text = "<td nowrap>foo</td>" |
84 |
- self.assertSoupEquals(text, text) |
85 |
- |
86 |
-- def testCData(self): |
87 |
-- xml = "<root>foo<![CDATA[foobar]]>bar</root>" |
88 |
-- self.assertSoupEquals(xml, xml) |
89 |
-- r = re.compile("foo.*bar") |
90 |
-- soup = BeautifulSoup(xml) |
91 |
-- self.assertEquals(soup.find(text=r).string, "foobar") |
92 |
-- self.assertEquals(soup.find(text=r).__class__, CData) |
93 |
-+ #def testCData(self): |
94 |
-+ # xml = "<root>foo<![CDATA[foobar]]>bar</root>" |
95 |
-+ # self.assertSoupEquals(xml, xml) |
96 |
-+ # r = re.compile("foo.*bar") |
97 |
-+ # soup = BeautifulSoup(xml) |
98 |
-+ # self.assertEquals(soup.find(text=r).string, "foobar") |
99 |
-+ # self.assertEquals(soup.find(text=r).__class__, CData) |
100 |
- |
101 |
- def testComments(self): |
102 |
- xml = "foo<!--foobar-->baz" |
103 |
-@@ -607,11 +607,11 @@ |
104 |
- def testWhitespaceInDeclaration(self): |
105 |
- self.assertSoupEquals('<! DOCTYPE>', '<!DOCTYPE>') |
106 |
- |
107 |
-- def testJunkInDeclaration(self): |
108 |
-- self.assertSoupEquals('<! Foo = -8>a', '<!Foo = -8>a') |
109 |
-+ #def testJunkInDeclaration(self): |
110 |
-+ # self.assertSoupEquals('<! Foo = -8>a', '<!Foo = -8>a') |
111 |
- |
112 |
-- def testIncompleteDeclaration(self): |
113 |
-- self.assertSoupEquals('a<!b <p>c') |
114 |
-+ #def testIncompleteDeclaration(self): |
115 |
-+ # self.assertSoupEquals('a<!b <p>c') |
116 |
- |
117 |
- def testEntityReplacement(self): |
118 |
- self.assertSoupEquals('<b>hello there</b>') |
119 |
|
120 |
diff --git a/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-python-3.patch b/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-python-3.patch |
121 |
deleted file mode 100644 |
122 |
index adcbb43dd07..00000000000 |
123 |
--- a/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-python-3.patch |
124 |
+++ /dev/null |
125 |
@@ -1,949 +0,0 @@ |
126 |
---- BeautifulSoup.py |
127 |
-+++ BeautifulSoup.py |
128 |
-@@ -76,7 +76,7 @@ |
129 |
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT. |
130 |
- |
131 |
- """ |
132 |
--from __future__ import generators |
133 |
-+ |
134 |
- |
135 |
- __author__ = "Leonard Richardson (leonardr@××××××××.org)" |
136 |
- __version__ = "3.1.0.1" |
137 |
-@@ -84,12 +84,12 @@ |
138 |
- __license__ = "New-style BSD" |
139 |
- |
140 |
- import codecs |
141 |
--import markupbase |
142 |
-+import _markupbase |
143 |
- import types |
144 |
- import re |
145 |
--from HTMLParser import HTMLParser, HTMLParseError |
146 |
-+from html.parser import HTMLParser, HTMLParseError |
147 |
- try: |
148 |
-- from htmlentitydefs import name2codepoint |
149 |
-+ from html.entities import name2codepoint |
150 |
- except ImportError: |
151 |
- name2codepoint = {} |
152 |
- try: |
153 |
-@@ -98,18 +98,18 @@ |
154 |
- from sets import Set as set |
155 |
- |
156 |
- #These hacks make Beautiful Soup able to parse XML with namespaces |
157 |
--markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match |
158 |
-+_markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match |
159 |
- |
160 |
- DEFAULT_OUTPUT_ENCODING = "utf-8" |
161 |
- |
162 |
- # First, the classes that represent markup elements. |
163 |
- |
164 |
--def sob(unicode, encoding): |
165 |
-+def sob(str, encoding): |
166 |
- """Returns either the given Unicode string or its encoding.""" |
167 |
- if encoding is None: |
168 |
-- return unicode |
169 |
-+ return str |
170 |
- else: |
171 |
-- return unicode.encode(encoding) |
172 |
-+ return str.encode(encoding) |
173 |
- |
174 |
- class PageElement: |
175 |
- """Contains the navigational information for some part of the page |
176 |
-@@ -178,8 +178,8 @@ |
177 |
- return lastChild |
178 |
- |
179 |
- def insert(self, position, newChild): |
180 |
-- if (isinstance(newChild, basestring) |
181 |
-- or isinstance(newChild, unicode)) \ |
182 |
-+ if (isinstance(newChild, str) |
183 |
-+ or isinstance(newChild, str)) \ |
184 |
- and not isinstance(newChild, NavigableString): |
185 |
- newChild = NavigableString(newChild) |
186 |
- |
187 |
-@@ -334,7 +334,7 @@ |
188 |
- g = generator() |
189 |
- while True: |
190 |
- try: |
191 |
-- i = g.next() |
192 |
-+ i = g.__next__() |
193 |
- except StopIteration: |
194 |
- break |
195 |
- if i: |
196 |
-@@ -385,22 +385,22 @@ |
197 |
- def toEncoding(self, s, encoding=None): |
198 |
- """Encodes an object to a string in some encoding, or to Unicode. |
199 |
- .""" |
200 |
-- if isinstance(s, unicode): |
201 |
-+ if isinstance(s, str): |
202 |
- if encoding: |
203 |
- s = s.encode(encoding) |
204 |
- elif isinstance(s, str): |
205 |
- if encoding: |
206 |
- s = s.encode(encoding) |
207 |
- else: |
208 |
-- s = unicode(s) |
209 |
-+ s = str(s) |
210 |
- else: |
211 |
- if encoding: |
212 |
- s = self.toEncoding(str(s), encoding) |
213 |
- else: |
214 |
-- s = unicode(s) |
215 |
-+ s = str(s) |
216 |
- return s |
217 |
- |
218 |
--class NavigableString(unicode, PageElement): |
219 |
-+class NavigableString(str, PageElement): |
220 |
- |
221 |
- def __new__(cls, value): |
222 |
- """Create a new NavigableString. |
223 |
-@@ -410,12 +410,12 @@ |
224 |
- passed in to the superclass's __new__ or the superclass won't know |
225 |
- how to handle non-ASCII characters. |
226 |
- """ |
227 |
-- if isinstance(value, unicode): |
228 |
-- return unicode.__new__(cls, value) |
229 |
-- return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) |
230 |
-+ if isinstance(value, str): |
231 |
-+ return str.__new__(cls, value) |
232 |
-+ return str.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) |
233 |
- |
234 |
- def __getnewargs__(self): |
235 |
-- return (unicode(self),) |
236 |
-+ return (str(self),) |
237 |
- |
238 |
- def __getattr__(self, attr): |
239 |
- """text.string gives you text. This is for backwards |
240 |
-@@ -424,7 +424,7 @@ |
241 |
- if attr == 'string': |
242 |
- return self |
243 |
- else: |
244 |
-- raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) |
245 |
-+ raise AttributeError("'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)) |
246 |
- |
247 |
- def encode(self, encoding=DEFAULT_OUTPUT_ENCODING): |
248 |
- return self.decode().encode(encoding) |
249 |
-@@ -435,23 +435,23 @@ |
250 |
- class CData(NavigableString): |
251 |
- |
252 |
- def decodeGivenEventualEncoding(self, eventualEncoding): |
253 |
-- return u'<![CDATA[' + self + u']]>' |
254 |
-+ return '<![CDATA[' + self + ']]>' |
255 |
- |
256 |
- class ProcessingInstruction(NavigableString): |
257 |
- |
258 |
- def decodeGivenEventualEncoding(self, eventualEncoding): |
259 |
- output = self |
260 |
-- if u'%SOUP-ENCODING%' in output: |
261 |
-+ if '%SOUP-ENCODING%' in output: |
262 |
- output = self.substituteEncoding(output, eventualEncoding) |
263 |
-- return u'<?' + output + u'?>' |
264 |
-+ return '<?' + output + '?>' |
265 |
- |
266 |
- class Comment(NavigableString): |
267 |
- def decodeGivenEventualEncoding(self, eventualEncoding): |
268 |
-- return u'<!--' + self + u'-->' |
269 |
-+ return '<!--' + self + '-->' |
270 |
- |
271 |
- class Declaration(NavigableString): |
272 |
- def decodeGivenEventualEncoding(self, eventualEncoding): |
273 |
-- return u'<!' + self + u'>' |
274 |
-+ return '<!' + self + '>' |
275 |
- |
276 |
- class Tag(PageElement): |
277 |
- |
278 |
-@@ -460,7 +460,7 @@ |
279 |
- def _invert(h): |
280 |
- "Cheap function to invert a hash." |
281 |
- i = {} |
282 |
-- for k,v in h.items(): |
283 |
-+ for k,v in list(h.items()): |
284 |
- i[v] = k |
285 |
- return i |
286 |
- |
287 |
-@@ -479,23 +479,23 @@ |
288 |
- escaped.""" |
289 |
- x = match.group(1) |
290 |
- if self.convertHTMLEntities and x in name2codepoint: |
291 |
-- return unichr(name2codepoint[x]) |
292 |
-+ return chr(name2codepoint[x]) |
293 |
- elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: |
294 |
- if self.convertXMLEntities: |
295 |
- return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] |
296 |
- else: |
297 |
-- return u'&%s;' % x |
298 |
-+ return '&%s;' % x |
299 |
- elif len(x) > 0 and x[0] == '#': |
300 |
- # Handle numeric entities |
301 |
- if len(x) > 1 and x[1] == 'x': |
302 |
-- return unichr(int(x[2:], 16)) |
303 |
-+ return chr(int(x[2:], 16)) |
304 |
- else: |
305 |
-- return unichr(int(x[1:])) |
306 |
-+ return chr(int(x[1:])) |
307 |
- |
308 |
- elif self.escapeUnrecognizedEntities: |
309 |
-- return u'&%s;' % x |
310 |
-+ return '&%s;' % x |
311 |
- else: |
312 |
-- return u'&%s;' % x |
313 |
-+ return '&%s;' % x |
314 |
- |
315 |
- def __init__(self, parser, name, attrs=None, parent=None, |
316 |
- previous=None): |
317 |
-@@ -524,7 +524,7 @@ |
318 |
- return kval |
319 |
- return (k, re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", |
320 |
- self._convertEntities, val)) |
321 |
-- self.attrs = map(convert, self.attrs) |
322 |
-+ self.attrs = list(map(convert, self.attrs)) |
323 |
- |
324 |
- def get(self, key, default=None): |
325 |
- """Returns the value of the 'key' attribute for the tag, or |
326 |
-@@ -533,7 +533,7 @@ |
327 |
- return self._getAttrMap().get(key, default) |
328 |
- |
329 |
- def has_key(self, key): |
330 |
-- return self._getAttrMap().has_key(key) |
331 |
-+ return key in self._getAttrMap() |
332 |
- |
333 |
- def __getitem__(self, key): |
334 |
- """tag[key] returns the value of the 'key' attribute for the tag, |
335 |
-@@ -551,7 +551,7 @@ |
336 |
- def __contains__(self, x): |
337 |
- return x in self.contents |
338 |
- |
339 |
-- def __nonzero__(self): |
340 |
-+ def __bool__(self): |
341 |
- "A tag is non-None even if it has no contents." |
342 |
- return True |
343 |
- |
344 |
-@@ -577,14 +577,14 @@ |
345 |
- #We don't break because bad HTML can define the same |
346 |
- #attribute multiple times. |
347 |
- self._getAttrMap() |
348 |
-- if self.attrMap.has_key(key): |
349 |
-+ if key in self.attrMap: |
350 |
- del self.attrMap[key] |
351 |
- |
352 |
- def __call__(self, *args, **kwargs): |
353 |
- """Calling a tag like a function is the same as calling its |
354 |
- findAll() method. Eg. tag('a') returns a list of all the A tags |
355 |
- found within this tag.""" |
356 |
-- return apply(self.findAll, args, kwargs) |
357 |
-+ return self.findAll(*args, **kwargs) |
358 |
- |
359 |
- def __getattr__(self, tag): |
360 |
- #print "Getattr %s.%s" % (self.__class__, tag) |
361 |
-@@ -592,7 +592,7 @@ |
362 |
- return self.find(tag[:-3]) |
363 |
- elif tag.find('__') != 0: |
364 |
- return self.find(tag) |
365 |
-- raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag) |
366 |
-+ raise AttributeError("'%s' object has no attribute '%s'" % (self.__class__, tag)) |
367 |
- |
368 |
- def __eq__(self, other): |
369 |
- """Returns true iff this tag has the same name, the same attributes, |
370 |
-@@ -868,7 +868,7 @@ |
371 |
- if isinstance(markupName, Tag): |
372 |
- markup = markupName |
373 |
- markupAttrs = markup |
374 |
-- callFunctionWithTagData = callable(self.name) \ |
375 |
-+ callFunctionWithTagData = hasattr(self.name, '__call__') \ |
376 |
- and not isinstance(markupName, Tag) |
377 |
- |
378 |
- if (not self.name) \ |
379 |
-@@ -880,7 +880,7 @@ |
380 |
- else: |
381 |
- match = True |
382 |
- markupAttrMap = None |
383 |
-- for attr, matchAgainst in self.attrs.items(): |
384 |
-+ for attr, matchAgainst in list(self.attrs.items()): |
385 |
- if not markupAttrMap: |
386 |
- if hasattr(markupAttrs, 'get'): |
387 |
- markupAttrMap = markupAttrs |
388 |
-@@ -921,16 +921,16 @@ |
389 |
- if self._matches(markup, self.text): |
390 |
- found = markup |
391 |
- else: |
392 |
-- raise Exception, "I don't know how to match against a %s" \ |
393 |
-- % markup.__class__ |
394 |
-+ raise Exception("I don't know how to match against a %s" \ |
395 |
-+ % markup.__class__) |
396 |
- return found |
397 |
- |
398 |
- def _matches(self, markup, matchAgainst): |
399 |
- #print "Matching %s against %s" % (markup, matchAgainst) |
400 |
- result = False |
401 |
-- if matchAgainst == True and type(matchAgainst) == types.BooleanType: |
402 |
-+ if matchAgainst == True and type(matchAgainst) == bool: |
403 |
- result = markup != None |
404 |
-- elif callable(matchAgainst): |
405 |
-+ elif hasattr(matchAgainst, '__call__'): |
406 |
- result = matchAgainst(markup) |
407 |
- else: |
408 |
- #Custom match methods take the tag as an argument, but all |
409 |
-@@ -938,7 +938,7 @@ |
410 |
- if isinstance(markup, Tag): |
411 |
- markup = markup.name |
412 |
- if markup is not None and not isString(markup): |
413 |
-- markup = unicode(markup) |
414 |
-+ markup = str(markup) |
415 |
- #Now we know that chunk is either a string, or None. |
416 |
- if hasattr(matchAgainst, 'match'): |
417 |
- # It's a regexp object. |
418 |
-@@ -947,10 +947,10 @@ |
419 |
- and (markup is not None or not isString(matchAgainst))): |
420 |
- result = markup in matchAgainst |
421 |
- elif hasattr(matchAgainst, 'items'): |
422 |
-- result = markup.has_key(matchAgainst) |
423 |
-+ result = matchAgainst in markup |
424 |
- elif matchAgainst and isString(markup): |
425 |
-- if isinstance(markup, unicode): |
426 |
-- matchAgainst = unicode(matchAgainst) |
427 |
-+ if isinstance(markup, str): |
428 |
-+ matchAgainst = str(matchAgainst) |
429 |
- else: |
430 |
- matchAgainst = str(matchAgainst) |
431 |
- |
432 |
-@@ -971,13 +971,13 @@ |
433 |
- """Convenience method that works with all 2.x versions of Python |
434 |
- to determine whether or not something is listlike.""" |
435 |
- return ((hasattr(l, '__iter__') and not isString(l)) |
436 |
-- or (type(l) in (types.ListType, types.TupleType))) |
437 |
-+ or (type(l) in (list, tuple))) |
438 |
- |
439 |
- def isString(s): |
440 |
- """Convenience method that works with all 2.x versions of Python |
441 |
- to determine whether or not something is stringlike.""" |
442 |
- try: |
443 |
-- return isinstance(s, unicode) or isinstance(s, basestring) |
444 |
-+ return isinstance(s, str) or isinstance(s, str) |
445 |
- except NameError: |
446 |
- return isinstance(s, str) |
447 |
- |
448 |
-@@ -989,7 +989,7 @@ |
449 |
- for portion in args: |
450 |
- if hasattr(portion, 'items'): |
451 |
- #It's a map. Merge it. |
452 |
-- for k,v in portion.items(): |
453 |
-+ for k,v in list(portion.items()): |
454 |
- built[k] = v |
455 |
- elif isList(portion) and not isString(portion): |
456 |
- #It's a list. Map each item to the default. |
457 |
-@@ -1034,7 +1034,7 @@ |
458 |
- object, possibly one with a %SOUP-ENCODING% slot into which an |
459 |
- encoding will be plugged later.""" |
460 |
- if text[:3] == "xml": |
461 |
-- text = u"xml version='1.0' encoding='%SOUP-ENCODING%'" |
462 |
-+ text = "xml version='1.0' encoding='%SOUP-ENCODING%'" |
463 |
- self._toStringSubclass(text, ProcessingInstruction) |
464 |
- |
465 |
- def handle_comment(self, text): |
466 |
-@@ -1044,7 +1044,7 @@ |
467 |
- def handle_charref(self, ref): |
468 |
- "Handle character references as data." |
469 |
- if self.soup.convertEntities: |
470 |
-- data = unichr(int(ref)) |
471 |
-+ data = chr(int(ref)) |
472 |
- else: |
473 |
- data = '&#%s;' % ref |
474 |
- self.handle_data(data) |
475 |
-@@ -1056,7 +1056,7 @@ |
476 |
- data = None |
477 |
- if self.soup.convertHTMLEntities: |
478 |
- try: |
479 |
-- data = unichr(name2codepoint[ref]) |
480 |
-+ data = chr(name2codepoint[ref]) |
481 |
- except KeyError: |
482 |
- pass |
483 |
- |
484 |
-@@ -1147,7 +1147,7 @@ |
485 |
- lambda x: '<!' + x.group(1) + '>') |
486 |
- ] |
487 |
- |
488 |
-- ROOT_TAG_NAME = u'[document]' |
489 |
-+ ROOT_TAG_NAME = '[document]' |
490 |
- |
491 |
- HTML_ENTITIES = "html" |
492 |
- XML_ENTITIES = "xml" |
493 |
-@@ -1236,14 +1236,14 @@ |
494 |
- def _feed(self, inDocumentEncoding=None, isHTML=False): |
495 |
- # Convert the document to Unicode. |
496 |
- markup = self.markup |
497 |
-- if isinstance(markup, unicode): |
498 |
-+ if isinstance(markup, str): |
499 |
- if not hasattr(self, 'originalEncoding'): |
500 |
- self.originalEncoding = None |
501 |
- else: |
502 |
- dammit = UnicodeDammit\ |
503 |
- (markup, [self.fromEncoding, inDocumentEncoding], |
504 |
- smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) |
505 |
-- markup = dammit.unicode |
506 |
-+ markup = dammit.str |
507 |
- self.originalEncoding = dammit.originalEncoding |
508 |
- self.declaredHTMLEncoding = dammit.declaredHTMLEncoding |
509 |
- if markup: |
510 |
-@@ -1269,8 +1269,8 @@ |
511 |
- def isSelfClosingTag(self, name): |
512 |
- """Returns true iff the given string is the name of a |
513 |
- self-closing tag according to this parser.""" |
514 |
-- return self.SELF_CLOSING_TAGS.has_key(name) \ |
515 |
-- or self.instanceSelfClosingTags.has_key(name) |
516 |
-+ return name in self.SELF_CLOSING_TAGS \ |
517 |
-+ or name in self.instanceSelfClosingTags |
518 |
- |
519 |
- def reset(self): |
520 |
- Tag.__init__(self, self, self.ROOT_TAG_NAME) |
521 |
-@@ -1305,7 +1305,7 @@ |
522 |
- |
523 |
- def endData(self, containerClass=NavigableString): |
524 |
- if self.currentData: |
525 |
-- currentData = u''.join(self.currentData) |
526 |
-+ currentData = ''.join(self.currentData) |
527 |
- if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and |
528 |
- not set([tag.name for tag in self.tagStack]).intersection( |
529 |
- self.PRESERVE_WHITESPACE_TAGS)): |
530 |
-@@ -1368,7 +1368,7 @@ |
531 |
- |
532 |
- nestingResetTriggers = self.NESTABLE_TAGS.get(name) |
533 |
- isNestable = nestingResetTriggers != None |
534 |
-- isResetNesting = self.RESET_NESTING_TAGS.has_key(name) |
535 |
-+ isResetNesting = name in self.RESET_NESTING_TAGS |
536 |
- popTo = None |
537 |
- inclusive = True |
538 |
- for i in range(len(self.tagStack)-1, 0, -1): |
539 |
-@@ -1381,7 +1381,7 @@ |
540 |
- if (nestingResetTriggers != None |
541 |
- and p.name in nestingResetTriggers) \ |
542 |
- or (nestingResetTriggers == None and isResetNesting |
543 |
-- and self.RESET_NESTING_TAGS.has_key(p.name)): |
544 |
-+ and p.name in self.RESET_NESTING_TAGS): |
545 |
- |
546 |
- #If we encounter one of the nesting reset triggers |
547 |
- #peculiar to this tag, or we encounter another tag |
548 |
-@@ -1399,7 +1399,7 @@ |
549 |
- if self.quoteStack: |
550 |
- #This is not a real tag. |
551 |
- #print "<%s> is not real!" % name |
552 |
-- attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs)) |
553 |
-+ attrs = ''.join([' %s="%s"' % (x_y[0], x_y[1]) for x_y in attrs]) |
554 |
- self.handle_data('<%s%s>' % (name, attrs)) |
555 |
- return |
556 |
- self.endData() |
557 |
-@@ -1493,7 +1493,7 @@ |
558 |
- BeautifulStoneSoup before writing your own subclass.""" |
559 |
- |
560 |
- def __init__(self, *args, **kwargs): |
561 |
-- if not kwargs.has_key('smartQuotesTo'): |
562 |
-+ if 'smartQuotesTo' not in kwargs: |
563 |
- kwargs['smartQuotesTo'] = self.HTML_ENTITIES |
564 |
- kwargs['isHTML'] = True |
565 |
- BeautifulStoneSoup.__init__(self, *args, **kwargs) |
566 |
-@@ -1677,7 +1677,7 @@ |
567 |
- parent._getAttrMap() |
568 |
- if (isinstance(tag, Tag) and len(tag.contents) == 1 and |
569 |
- isinstance(tag.contents[0], NavigableString) and |
570 |
-- not parent.attrMap.has_key(tag.name)): |
571 |
-+ tag.name not in parent.attrMap): |
572 |
- parent[tag.name] = tag.contents[0] |
573 |
- BeautifulStoneSoup.popTag(self) |
574 |
- |
575 |
-@@ -1751,9 +1751,9 @@ |
576 |
- self._detectEncoding(markup, isHTML) |
577 |
- self.smartQuotesTo = smartQuotesTo |
578 |
- self.triedEncodings = [] |
579 |
-- if markup == '' or isinstance(markup, unicode): |
580 |
-+ if markup == '' or isinstance(markup, str): |
581 |
- self.originalEncoding = None |
582 |
-- self.unicode = unicode(markup) |
583 |
-+ self.str = str(markup) |
584 |
- return |
585 |
- |
586 |
- u = None |
587 |
-@@ -1766,7 +1766,7 @@ |
588 |
- if u: break |
589 |
- |
590 |
- # If no luck and we have auto-detection library, try that: |
591 |
-- if not u and chardet and not isinstance(self.markup, unicode): |
592 |
-+ if not u and chardet and not isinstance(self.markup, str): |
593 |
- u = self._convertFrom(chardet.detect(self.markup)['encoding']) |
594 |
- |
595 |
- # As a last resort, try utf-8 and windows-1252: |
596 |
-@@ -1775,7 +1775,7 @@ |
597 |
- u = self._convertFrom(proposed_encoding) |
598 |
- if u: break |
599 |
- |
600 |
-- self.unicode = u |
601 |
-+ self.str = u |
602 |
- if not u: self.originalEncoding = None |
603 |
- |
604 |
- def _subMSChar(self, match): |
605 |
-@@ -1783,7 +1783,7 @@ |
606 |
- entity.""" |
607 |
- orig = match.group(1) |
608 |
- sub = self.MS_CHARS.get(orig) |
609 |
-- if type(sub) == types.TupleType: |
610 |
-+ if type(sub) == tuple: |
611 |
- if self.smartQuotesTo == 'xml': |
612 |
- sub = '&#x'.encode() + sub[1].encode() + ';'.encode() |
613 |
- else: |
614 |
-@@ -1804,7 +1804,7 @@ |
615 |
- if self.smartQuotesTo and proposed.lower() in("windows-1252", |
616 |
- "iso-8859-1", |
617 |
- "iso-8859-2"): |
618 |
-- smart_quotes_re = "([\x80-\x9f])" |
619 |
-+ smart_quotes_re = b"([\x80-\x9f])" |
620 |
- smart_quotes_compiled = re.compile(smart_quotes_re) |
621 |
- markup = smart_quotes_compiled.sub(self._subMSChar, markup) |
622 |
- |
623 |
-@@ -1813,7 +1813,7 @@ |
624 |
- u = self._toUnicode(markup, proposed) |
625 |
- self.markup = u |
626 |
- self.originalEncoding = proposed |
627 |
-- except Exception, e: |
628 |
-+ except Exception as e: |
629 |
- # print "That didn't work!" |
630 |
- # print e |
631 |
- return None |
632 |
-@@ -1842,7 +1842,7 @@ |
633 |
- elif data[:4] == '\xff\xfe\x00\x00': |
634 |
- encoding = 'utf-32le' |
635 |
- data = data[4:] |
636 |
-- newdata = unicode(data, encoding) |
637 |
-+ newdata = str(data, encoding) |
638 |
- return newdata |
639 |
- |
640 |
- def _detectEncoding(self, xml_data, isHTML=False): |
641 |
-@@ -1855,41 +1855,41 @@ |
642 |
- elif xml_data[:4] == '\x00\x3c\x00\x3f': |
643 |
- # UTF-16BE |
644 |
- sniffed_xml_encoding = 'utf-16be' |
645 |
-- xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') |
646 |
-+ xml_data = str(xml_data, 'utf-16be').encode('utf-8') |
647 |
- elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \ |
648 |
- and (xml_data[2:4] != '\x00\x00'): |
649 |
- # UTF-16BE with BOM |
650 |
- sniffed_xml_encoding = 'utf-16be' |
651 |
-- xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') |
652 |
-+ xml_data = str(xml_data[2:], 'utf-16be').encode('utf-8') |
653 |
- elif xml_data[:4] == '\x3c\x00\x3f\x00': |
654 |
- # UTF-16LE |
655 |
- sniffed_xml_encoding = 'utf-16le' |
656 |
-- xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') |
657 |
-+ xml_data = str(xml_data, 'utf-16le').encode('utf-8') |
658 |
- elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \ |
659 |
- (xml_data[2:4] != '\x00\x00'): |
660 |
- # UTF-16LE with BOM |
661 |
- sniffed_xml_encoding = 'utf-16le' |
662 |
-- xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') |
663 |
-+ xml_data = str(xml_data[2:], 'utf-16le').encode('utf-8') |
664 |
- elif xml_data[:4] == '\x00\x00\x00\x3c': |
665 |
- # UTF-32BE |
666 |
- sniffed_xml_encoding = 'utf-32be' |
667 |
-- xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') |
668 |
-+ xml_data = str(xml_data, 'utf-32be').encode('utf-8') |
669 |
- elif xml_data[:4] == '\x3c\x00\x00\x00': |
670 |
- # UTF-32LE |
671 |
- sniffed_xml_encoding = 'utf-32le' |
672 |
-- xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') |
673 |
-+ xml_data = str(xml_data, 'utf-32le').encode('utf-8') |
674 |
- elif xml_data[:4] == '\x00\x00\xfe\xff': |
675 |
- # UTF-32BE with BOM |
676 |
- sniffed_xml_encoding = 'utf-32be' |
677 |
-- xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') |
678 |
-+ xml_data = str(xml_data[4:], 'utf-32be').encode('utf-8') |
679 |
- elif xml_data[:4] == '\xff\xfe\x00\x00': |
680 |
- # UTF-32LE with BOM |
681 |
- sniffed_xml_encoding = 'utf-32le' |
682 |
-- xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') |
683 |
-+ xml_data = str(xml_data[4:], 'utf-32le').encode('utf-8') |
684 |
- elif xml_data[:3] == '\xef\xbb\xbf': |
685 |
- # UTF-8 with BOM |
686 |
- sniffed_xml_encoding = 'utf-8' |
687 |
-- xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') |
688 |
-+ xml_data = str(xml_data[3:], 'utf-8').encode('utf-8') |
689 |
- else: |
690 |
- sniffed_xml_encoding = 'ascii' |
691 |
- pass |
692 |
-@@ -1954,41 +1954,41 @@ |
693 |
- 250,251,252,253,254,255) |
694 |
- import string |
695 |
- c.EBCDIC_TO_ASCII_MAP = string.maketrans( \ |
696 |
-- ''.join(map(chr, range(256))), ''.join(map(chr, emap))) |
697 |
-+ ''.join(map(chr, list(range(256)))), ''.join(map(chr, emap))) |
698 |
- return s.translate(c.EBCDIC_TO_ASCII_MAP) |
699 |
- |
700 |
-- MS_CHARS = { '\x80' : ('euro', '20AC'), |
701 |
-- '\x81' : ' ', |
702 |
-- '\x82' : ('sbquo', '201A'), |
703 |
-- '\x83' : ('fnof', '192'), |
704 |
-- '\x84' : ('bdquo', '201E'), |
705 |
-- '\x85' : ('hellip', '2026'), |
706 |
-- '\x86' : ('dagger', '2020'), |
707 |
-- '\x87' : ('Dagger', '2021'), |
708 |
-- '\x88' : ('circ', '2C6'), |
709 |
-- '\x89' : ('permil', '2030'), |
710 |
-- '\x8A' : ('Scaron', '160'), |
711 |
-- '\x8B' : ('lsaquo', '2039'), |
712 |
-- '\x8C' : ('OElig', '152'), |
713 |
-- '\x8D' : '?', |
714 |
-- '\x8E' : ('#x17D', '17D'), |
715 |
-- '\x8F' : '?', |
716 |
-- '\x90' : '?', |
717 |
-- '\x91' : ('lsquo', '2018'), |
718 |
-- '\x92' : ('rsquo', '2019'), |
719 |
-- '\x93' : ('ldquo', '201C'), |
720 |
-- '\x94' : ('rdquo', '201D'), |
721 |
-- '\x95' : ('bull', '2022'), |
722 |
-- '\x96' : ('ndash', '2013'), |
723 |
-- '\x97' : ('mdash', '2014'), |
724 |
-- '\x98' : ('tilde', '2DC'), |
725 |
-- '\x99' : ('trade', '2122'), |
726 |
-- '\x9a' : ('scaron', '161'), |
727 |
-- '\x9b' : ('rsaquo', '203A'), |
728 |
-- '\x9c' : ('oelig', '153'), |
729 |
-- '\x9d' : '?', |
730 |
-- '\x9e' : ('#x17E', '17E'), |
731 |
-- '\x9f' : ('Yuml', ''),} |
732 |
-+ MS_CHARS = { b'\x80' : ('euro', '20AC'), |
733 |
-+ b'\x81' : ' ', |
734 |
-+ b'\x82' : ('sbquo', '201A'), |
735 |
-+ b'\x83' : ('fnof', '192'), |
736 |
-+ b'\x84' : ('bdquo', '201E'), |
737 |
-+ b'\x85' : ('hellip', '2026'), |
738 |
-+ b'\x86' : ('dagger', '2020'), |
739 |
-+ b'\x87' : ('Dagger', '2021'), |
740 |
-+ b'\x88' : ('circ', '2C6'), |
741 |
-+ b'\x89' : ('permil', '2030'), |
742 |
-+ b'\x8A' : ('Scaron', '160'), |
743 |
-+ b'\x8B' : ('lsaquo', '2039'), |
744 |
-+ b'\x8C' : ('OElig', '152'), |
745 |
-+ b'\x8D' : '?', |
746 |
-+ b'\x8E' : ('#x17D', '17D'), |
747 |
-+ b'\x8F' : '?', |
748 |
-+ b'\x90' : '?', |
749 |
-+ b'\x91' : ('lsquo', '2018'), |
750 |
-+ b'\x92' : ('rsquo', '2019'), |
751 |
-+ b'\x93' : ('ldquo', '201C'), |
752 |
-+ b'\x94' : ('rdquo', '201D'), |
753 |
-+ b'\x95' : ('bull', '2022'), |
754 |
-+ b'\x96' : ('ndash', '2013'), |
755 |
-+ b'\x97' : ('mdash', '2014'), |
756 |
-+ b'\x98' : ('tilde', '2DC'), |
757 |
-+ b'\x99' : ('trade', '2122'), |
758 |
-+ b'\x9a' : ('scaron', '161'), |
759 |
-+ b'\x9b' : ('rsaquo', '203A'), |
760 |
-+ b'\x9c' : ('oelig', '153'), |
761 |
-+ b'\x9d' : '?', |
762 |
-+ b'\x9e' : ('#x17E', '17E'), |
763 |
-+ b'\x9f' : ('Yuml', ''),} |
764 |
- |
765 |
- ####################################################################### |
766 |
- |
767 |
-@@ -1997,4 +1997,4 @@ |
768 |
- if __name__ == '__main__': |
769 |
- import sys |
770 |
- soup = BeautifulSoup(sys.stdin) |
771 |
-- print soup.prettify() |
772 |
-+ print(soup.prettify()) |
773 |
---- BeautifulSoupTests.py |
774 |
-+++ BeautifulSoupTests.py |
775 |
-@@ -82,7 +82,7 @@ |
776 |
- def testFindAllText(self): |
777 |
- soup = BeautifulSoup("<html>\xbb</html>") |
778 |
- self.assertEqual(soup.findAll(text=re.compile('.*')), |
779 |
-- [u'\xbb']) |
780 |
-+ ['\xbb']) |
781 |
- |
782 |
- def testFindAllByRE(self): |
783 |
- import re |
784 |
-@@ -215,7 +215,7 @@ |
785 |
- soup = BeautifulSoup(self.x, parseOnlyThese=strainer) |
786 |
- self.assertEquals(len(soup), 10) |
787 |
- |
788 |
-- strainer = SoupStrainer(text=lambda(x):x[8]=='3') |
789 |
-+ strainer = SoupStrainer(text=lambda x:x[8]=='3') |
790 |
- soup = BeautifulSoup(self.x, parseOnlyThese=strainer) |
791 |
- self.assertEquals(len(soup), 3) |
792 |
- |
793 |
-@@ -256,7 +256,7 @@ |
794 |
- self.assertEqual(copied.decode(), self.soup.decode()) |
795 |
- |
796 |
- def testUnicodePickle(self): |
797 |
-- import cPickle as pickle |
798 |
-+ import pickle as pickle |
799 |
- html = "<b>" + chr(0xc3) + "</b>" |
800 |
- soup = BeautifulSoup(html) |
801 |
- dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL) |
802 |
-@@ -586,23 +586,23 @@ |
803 |
- self.assertEquals(soup.decode(), "<<sacré bleu!>>") |
804 |
- |
805 |
- soup = BeautifulStoneSoup(text, convertEntities=htmlEnt) |
806 |
-- self.assertEquals(soup.decode(), u"<<sacr\xe9 bleu!>>") |
807 |
-+ self.assertEquals(soup.decode(), "<<sacr\xe9 bleu!>>") |
808 |
- |
809 |
- # Make sure the "XML", "HTML", and "XHTML" settings work. |
810 |
- text = "<™'" |
811 |
- soup = BeautifulStoneSoup(text, convertEntities=xmlEnt) |
812 |
-- self.assertEquals(soup.decode(), u"<™'") |
813 |
-+ self.assertEquals(soup.decode(), "<™'") |
814 |
- |
815 |
- soup = BeautifulStoneSoup(text, convertEntities=htmlEnt) |
816 |
-- self.assertEquals(soup.decode(), u"<\u2122'") |
817 |
-+ self.assertEquals(soup.decode(), "<\u2122'") |
818 |
- |
819 |
- soup = BeautifulStoneSoup(text, convertEntities=xhtmlEnt) |
820 |
-- self.assertEquals(soup.decode(), u"<\u2122'") |
821 |
-+ self.assertEquals(soup.decode(), "<\u2122'") |
822 |
- |
823 |
- def testNonBreakingSpaces(self): |
824 |
- soup = BeautifulSoup("<a> </a>", |
825 |
- convertEntities=BeautifulStoneSoup.HTML_ENTITIES) |
826 |
-- self.assertEquals(soup.decode(), u"<a>\xa0\xa0</a>") |
827 |
-+ self.assertEquals(soup.decode(), "<a>\xa0\xa0</a>") |
828 |
- |
829 |
- def testWhitespaceInDeclaration(self): |
830 |
- self.assertSoupEquals('<! DOCTYPE>', '<!DOCTYPE>') |
831 |
-@@ -617,27 +617,27 @@ |
832 |
- self.assertSoupEquals('<b>hello there</b>') |
833 |
- |
834 |
- def testEntitiesInAttributeValues(self): |
835 |
-- self.assertSoupEquals('<x t="xñ">', '<x t="x\xc3\xb1"></x>', |
836 |
-+ self.assertSoupEquals('<x t="xñ">', b'<x t="x\xc3\xb1"></x>', |
837 |
- encoding='utf-8') |
838 |
-- self.assertSoupEquals('<x t="xñ">', '<x t="x\xc3\xb1"></x>', |
839 |
-+ self.assertSoupEquals('<x t="xñ">', b'<x t="x\xc3\xb1"></x>', |
840 |
- encoding='utf-8') |
841 |
- |
842 |
- soup = BeautifulSoup('<x t=">™">', |
843 |
- convertEntities=BeautifulStoneSoup.HTML_ENTITIES) |
844 |
-- self.assertEquals(soup.decode(), u'<x t=">\u2122"></x>') |
845 |
-+ self.assertEquals(soup.decode(), '<x t=">\u2122"></x>') |
846 |
- |
847 |
- uri = "http://crummy.com?sacré&bleu" |
848 |
- link = '<a href="%s"></a>' % uri |
849 |
- |
850 |
- soup = BeautifulSoup(link, convertEntities=BeautifulSoup.HTML_ENTITIES) |
851 |
- self.assertEquals(soup.decode(), |
852 |
-- link.replace("é", u"\xe9")) |
853 |
-+ link.replace("é", "\xe9")) |
854 |
- |
855 |
- uri = "http://crummy.com?sacré&bleu" |
856 |
- link = '<a href="%s"></a>' % uri |
857 |
- soup = BeautifulSoup(link, convertEntities=BeautifulSoup.HTML_ENTITIES) |
858 |
- self.assertEquals(soup.a['href'], |
859 |
-- uri.replace("é", u"\xe9")) |
860 |
-+ uri.replace("é", "\xe9")) |
861 |
- |
862 |
- def testNakedAmpersands(self): |
863 |
- html = {'convertEntities':BeautifulStoneSoup.HTML_ENTITIES} |
864 |
-@@ -663,13 +663,13 @@ |
865 |
- smart quote fixes.""" |
866 |
- |
867 |
- def testUnicodeDammitStandalone(self): |
868 |
-- markup = "<foo>\x92</foo>" |
869 |
-+ markup = b"<foo>\x92</foo>" |
870 |
- dammit = UnicodeDammit(markup) |
871 |
-- self.assertEquals(dammit.unicode, "<foo>’</foo>") |
872 |
-+ self.assertEquals(dammit.str, "<foo>’</foo>") |
873 |
- |
874 |
-- hebrew = "\xed\xe5\xec\xf9" |
875 |
-+ hebrew = b"\xed\xe5\xec\xf9" |
876 |
- dammit = UnicodeDammit(hebrew, ["iso-8859-8"]) |
877 |
-- self.assertEquals(dammit.unicode, u'\u05dd\u05d5\u05dc\u05e9') |
878 |
-+ self.assertEquals(dammit.str, '\u05dd\u05d5\u05dc\u05e9') |
879 |
- self.assertEquals(dammit.originalEncoding, 'iso-8859-8') |
880 |
- |
881 |
- def testGarbageInGarbageOut(self): |
882 |
-@@ -677,13 +677,13 @@ |
883 |
- asciiSoup = BeautifulStoneSoup(ascii) |
884 |
- self.assertEquals(ascii, asciiSoup.decode()) |
885 |
- |
886 |
-- unicodeData = u"<foo>\u00FC</foo>" |
887 |
-+ unicodeData = "<foo>\u00FC</foo>" |
888 |
- utf8 = unicodeData.encode("utf-8") |
889 |
-- self.assertEquals(utf8, '<foo>\xc3\xbc</foo>') |
890 |
-+ self.assertEquals(utf8, b'<foo>\xc3\xbc</foo>') |
891 |
- |
892 |
- unicodeSoup = BeautifulStoneSoup(unicodeData) |
893 |
- self.assertEquals(unicodeData, unicodeSoup.decode()) |
894 |
-- self.assertEquals(unicodeSoup.foo.string, u'\u00FC') |
895 |
-+ self.assertEquals(unicodeSoup.foo.string, '\u00FC') |
896 |
- |
897 |
- utf8Soup = BeautifulStoneSoup(utf8, fromEncoding='utf-8') |
898 |
- self.assertEquals(utf8, utf8Soup.encode('utf-8')) |
899 |
-@@ -696,18 +696,18 @@ |
900 |
- |
901 |
- def testHandleInvalidCodec(self): |
902 |
- for bad_encoding in ['.utf8', '...', 'utF---16.!']: |
903 |
-- soup = BeautifulSoup(u"Räksmörgås".encode("utf-8"), |
904 |
-+ soup = BeautifulSoup("Räksmörgås".encode("utf-8"), |
905 |
- fromEncoding=bad_encoding) |
906 |
- self.assertEquals(soup.originalEncoding, 'utf-8') |
907 |
- |
908 |
- def testUnicodeSearch(self): |
909 |
-- html = u'<html><body><h1>Räksmörgås</h1></body></html>' |
910 |
-+ html = '<html><body><h1>Räksmörgås</h1></body></html>' |
911 |
- soup = BeautifulSoup(html) |
912 |
-- self.assertEqual(soup.find(text=u'Räksmörgås'),u'Räksmörgås') |
913 |
-+ self.assertEqual(soup.find(text='Räksmörgås'),'Räksmörgås') |
914 |
- |
915 |
- def testRewrittenXMLHeader(self): |
916 |
-- euc_jp = '<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n' |
917 |
-- utf8 = "<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n" |
918 |
-+ euc_jp = b'<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n' |
919 |
-+ utf8 = b"<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n" |
920 |
- soup = BeautifulStoneSoup(euc_jp) |
921 |
- if soup.originalEncoding != "euc-jp": |
922 |
- raise Exception("Test failed when parsing euc-jp document. " |
923 |
-@@ -718,12 +718,12 @@ |
924 |
- self.assertEquals(soup.originalEncoding, "euc-jp") |
925 |
- self.assertEquals(soup.renderContents('utf-8'), utf8) |
926 |
- |
927 |
-- old_text = "<?xml encoding='windows-1252'><foo>\x92</foo>" |
928 |
-+ old_text = b"<?xml encoding='windows-1252'><foo>\x92</foo>" |
929 |
- new_text = "<?xml version='1.0' encoding='utf-8'?><foo>’</foo>" |
930 |
- self.assertSoupEquals(old_text, new_text) |
931 |
- |
932 |
- def testRewrittenMetaTag(self): |
933 |
-- no_shift_jis_html = '''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>''' |
934 |
-+ no_shift_jis_html = b'''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>''' |
935 |
- soup = BeautifulSoup(no_shift_jis_html) |
936 |
- |
937 |
- # Beautiful Soup used to try to rewrite the meta tag even if the |
938 |
-@@ -733,16 +733,16 @@ |
939 |
- soup = BeautifulSoup(no_shift_jis_html, parseOnlyThese=strainer) |
940 |
- self.assertEquals(soup.contents[0].name, 'pre') |
941 |
- |
942 |
-- meta_tag = ('<meta content="text/html; charset=x-sjis" ' |
943 |
-- 'http-equiv="Content-type" />') |
944 |
-+ meta_tag = (b'<meta content="text/html; charset=x-sjis" ' |
945 |
-+ b'http-equiv="Content-type" />') |
946 |
- shift_jis_html = ( |
947 |
-- '<html><head>\n%s\n' |
948 |
-- '<meta http-equiv="Content-language" content="ja" />' |
949 |
-- '</head><body><pre>\n' |
950 |
-- '\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f' |
951 |
-- '\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c' |
952 |
-- '\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n' |
953 |
-- '</pre></body></html>') % meta_tag |
954 |
-+ b'<html><head>\n' + meta_tag + b'\n' |
955 |
-+ b'<meta http-equiv="Content-language" content="ja" />' |
956 |
-+ b'</head><body><pre>\n' |
957 |
-+ b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f' |
958 |
-+ b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c' |
959 |
-+ b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n' |
960 |
-+ b'</pre></body></html>') |
961 |
- soup = BeautifulSoup(shift_jis_html) |
962 |
- if soup.originalEncoding != "shift-jis": |
963 |
- raise Exception("Test failed when parsing shift-jis document " |
964 |
-@@ -755,59 +755,59 @@ |
965 |
- content_type_tag = soup.meta['content'] |
966 |
- self.assertEquals(content_type_tag[content_type_tag.find('charset='):], |
967 |
- 'charset=%SOUP-ENCODING%') |
968 |
-- content_type = str(soup.meta) |
969 |
-+ content_type = soup.meta.decode() |
970 |
- index = content_type.find('charset=') |
971 |
- self.assertEqual(content_type[index:index+len('charset=utf8')+1], |
972 |
- 'charset=utf-8') |
973 |
- content_type = soup.meta.encode('shift-jis') |
974 |
-- index = content_type.find('charset=') |
975 |
-+ index = content_type.find(b'charset=') |
976 |
- self.assertEqual(content_type[index:index+len('charset=shift-jis')], |
977 |
- 'charset=shift-jis'.encode()) |
978 |
- |
979 |
- self.assertEquals(soup.encode('utf-8'), ( |
980 |
-- '<html><head>\n' |
981 |
-- '<meta content="text/html; charset=utf-8" ' |
982 |
-- 'http-equiv="Content-type" />\n' |
983 |
-- '<meta http-equiv="Content-language" content="ja" />' |
984 |
-- '</head><body><pre>\n' |
985 |
-- '\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3' |
986 |
-- '\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3' |
987 |
-- '\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6' |
988 |
-- '\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3' |
989 |
-- '\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n' |
990 |
-- '</pre></body></html>')) |
991 |
-+ b'<html><head>\n' |
992 |
-+ b'<meta content="text/html; charset=utf-8" ' |
993 |
-+ b'http-equiv="Content-type" />\n' |
994 |
-+ b'<meta http-equiv="Content-language" content="ja" />' |
995 |
-+ b'</head><body><pre>\n' |
996 |
-+ b'\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3' |
997 |
-+ b'\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3' |
998 |
-+ b'\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6' |
999 |
-+ b'\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3' |
1000 |
-+ b'\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n' |
1001 |
-+ b'</pre></body></html>')) |
1002 |
- self.assertEquals(soup.encode("shift-jis"), |
1003 |
- shift_jis_html.replace('x-sjis'.encode(), |
1004 |
- 'shift-jis'.encode())) |
1005 |
- |
1006 |
-- isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>""" |
1007 |
-+ isolatin = b"""<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>""" |
1008 |
- soup = BeautifulSoup(isolatin) |
1009 |
- |
1010 |
- utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode()) |
1011 |
-- utf8 = utf8.replace("\xe9", "\xc3\xa9") |
1012 |
-+ utf8 = utf8.replace(b"\xe9", b"\xc3\xa9") |
1013 |
- self.assertSoupEquals(soup.encode("utf-8"), utf8, encoding='utf-8') |
1014 |
- |
1015 |
- def testHebrew(self): |
1016 |
-- iso_8859_8= '<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n' |
1017 |
-- utf8 = '<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n' |
1018 |
-+ iso_8859_8= b'<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n' |
1019 |
-+ utf8 = b'<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n' |
1020 |
- soup = BeautifulStoneSoup(iso_8859_8, fromEncoding="iso-8859-8") |
1021 |
- self.assertEquals(soup.encode('utf-8'), utf8) |
1022 |
- |
1023 |
- def testSmartQuotesNotSoSmartAnymore(self): |
1024 |
-- self.assertSoupEquals("\x91Foo\x92 <!--blah-->", |
1025 |
-+ self.assertSoupEquals(b"\x91Foo\x92 <!--blah-->", |
1026 |
- '‘Foo’ <!--blah-->') |
1027 |
- |
1028 |
- def testDontConvertSmartQuotesWhenAlsoConvertingEntities(self): |
1029 |
-- smartQuotes = "Il a dit, \x8BSacré bleu!\x9b" |
1030 |
-+ smartQuotes = b"Il a dit, \x8BSacré bleu!\x9b" |
1031 |
- soup = BeautifulSoup(smartQuotes) |
1032 |
- self.assertEquals(soup.decode(), |
1033 |
- 'Il a dit, ‹Sacré bleu!›') |
1034 |
- soup = BeautifulSoup(smartQuotes, convertEntities="html") |
1035 |
- self.assertEquals(soup.encode('utf-8'), |
1036 |
-- 'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba') |
1037 |
-+ b'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba') |
1038 |
- |
1039 |
- def testDontSeeSmartQuotesWhereThereAreNone(self): |
1040 |
-- utf_8 = "\343\202\261\343\203\274\343\202\277\343\202\244 Watch" |
1041 |
-+ utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch" |
1042 |
- self.assertSoupEquals(utf_8, encoding='utf-8') |
1043 |
- |
1044 |
- |
1045 |
---- setup.py |
1046 |
-+++ setup.py |
1047 |
-@@ -19,19 +19,19 @@ |
1048 |
- suite = loader.loadTestsFromModule(BeautifulSoupTests) |
1049 |
- suite.run(result) |
1050 |
- if not result.wasSuccessful(): |
1051 |
-- print "Unit tests have failed!" |
1052 |
-+ print("Unit tests have failed!") |
1053 |
- for l in result.errors, result.failures: |
1054 |
- for case, error in l: |
1055 |
-- print "-" * 80 |
1056 |
-+ print("-" * 80) |
1057 |
- desc = case.shortDescription() |
1058 |
- if desc: |
1059 |
-- print desc |
1060 |
-- print error |
1061 |
-- print '''If you see an error like: "'ascii' codec can't encode character...", see\nthe Beautiful Soup documentation:\n http://www.crummy.com/software/BeautifulSoup/documentation.html#Why%20can't%20Beautiful%20Soup%20print%20out%20the%20non-ASCII%20characters%20I%20gave%20it?''' |
1062 |
-- print "This might or might not be a problem depending on what you plan to do with\nBeautiful Soup." |
1063 |
-+ print(desc) |
1064 |
-+ print(error) |
1065 |
-+ print('''If you see an error like: "'ascii' codec can't encode character...", see\nthe Beautiful Soup documentation:\n http://www.crummy.com/software/BeautifulSoup/documentation.html#Why%20can't%20Beautiful%20Soup%20print%20out%20the%20non-ASCII%20characters%20I%20gave%20it?''') |
1066 |
-+ print("This might or might not be a problem depending on what you plan to do with\nBeautiful Soup.") |
1067 |
- if sys.argv[1] == 'sdist': |
1068 |
-- print |
1069 |
-- print "I'm not going to make a source distribution since the tests don't pass." |
1070 |
-+ print() |
1071 |
-+ print("I'm not going to make a source distribution since the tests don't pass.") |
1072 |
- sys.exit(1) |
1073 |
- |
1074 |
- setup(name="BeautifulSoup", |
1075 |
|
1076 |
diff --git a/profiles/package.mask b/profiles/package.mask |
1077 |
index 6d5d46bffac..81ca42e20b7 100644 |
1078 |
--- a/profiles/package.mask |
1079 |
+++ b/profiles/package.mask |
1080 |
@@ -481,11 +481,6 @@ media-plugins/vdr-skinnopacity |
1081 |
# #623706. Removal in a month. |
1082 |
net-libs/dhcpcd-dbus |
1083 |
|
1084 |
-# Pacho Ramos <pacho@g.o> (14 Jul 2017) |
1085 |
-# Not compatible with python >= 3.5 but neither needed by anything in the |
1086 |
-# tree anymore, bug #624670. Removal in a month. |
1087 |
-=dev-python/beautifulsoup-3.1.0.1-r2 |
1088 |
- |
1089 |
# Lars Wendler <polynomial-c@g.o> (07 Jul 2017) |
1090 |
# Masked until >=net-fs/samba-4.7 is in the tree and |
1091 |
# unmasked. (bug #624106) |