Gentoo Archives: gentoo-commits

From: "Michał Górny" <mgorny@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] repo/gentoo:master commit in: dev-python/beautifulsoup/files/, profiles/, dev-python/beautifulsoup/
Date: Mon, 14 Aug 2017 07:13:38
Message-Id: 1502694753.9741510885394a808ea58561b23a1711771f1f4a.mgorny@gentoo
1 commit: 9741510885394a808ea58561b23a1711771f1f4a
2 Author: Michał Górny <mgorny <AT> gentoo <DOT> org>
3 AuthorDate: Mon Aug 14 06:46:57 2017 +0000
4 Commit: Michał Górny <mgorny <AT> gentoo <DOT> org>
5 CommitDate: Mon Aug 14 07:12:33 2017 +0000
6 URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=97415108
7
8 dev-python/beautifulsoup: Drop masked ancient version
9
10 dev-python/beautifulsoup/Manifest | 1 -
11 .../beautifulsoup/beautifulsoup-3.1.0.1-r2.ebuild | 40 -
12 .../beautifulsoup-3.1.0.1-disable-tests.patch | 39 -
13 .../files/beautifulsoup-3.1.0.1-python-3.patch | 949 ---------------------
14 profiles/package.mask | 5 -
15 5 files changed, 1034 deletions(-)
16
17 diff --git a/dev-python/beautifulsoup/Manifest b/dev-python/beautifulsoup/Manifest
18 index a87f7f747d9..6ba59cf1880 100644
19 --- a/dev-python/beautifulsoup/Manifest
20 +++ b/dev-python/beautifulsoup/Manifest
21 @@ -1,4 +1,3 @@
22 -DIST BeautifulSoup-3.1.0.1.tar.gz 71460 SHA256 820a80f473240d9d30047f36c959d530a699a732500662dd8b03e1d3ccad12a8 SHA512 812969faf454a58d849921836ed07ec9a950f34fb31e29e118cdf1a75a533370e430f417402b5a5016d23b2d3a1c44a1cf5fde5b3bfd1bc98c50036edd51c0d6 WHIRLPOOL a199585817dcabcc6327c3836a66128605ebf92a6663b5c660125061a797485a504d300791bcd43e0e94e4f08ca59c01f65f42481da07b1240350cbfc6ea6b0c
23 DIST BeautifulSoup-3.2.1.tar.gz 31224 SHA256 f5ba85e907e7dfd78e44e4000b3eaef3a650aefc57831e8a645702db2e5b50db SHA512 365b7b045a2069cf437877543577bc0aa99256a6dc4c9743670b46bfceab5494a06628012d6eccecfe99c25d5c9e0c65814964b47026f15ba1a538444cfb7789 WHIRLPOOL c2f84b29421d0153fb1fecc87d63e00a61182e03bc0683132babca5d6c94143b4875a60a19124a36e4e6e78ce80bff9e1e81b37335700efc14084da933307e26
24 DIST beautifulsoup4-4.5.1.tar.gz 158039 SHA256 3c9474036afda9136aac6463def733f81017bf9ef3510d25634f335b0c87f5e1 SHA512 d560d7f743507084ec546708d29bb3764512f5b2c380004280dde813350bf48d1697fddce3bd3f95186407bf5142941d7adc7d0de8e7962eb5ca1278dbc7e93f WHIRLPOOL bf971596707c2ff69e93528164be01254258aa45601763c543246b67c5d31024b0e4de618382775a3cf313d255d8d1d6268a47542773531aacee9a2643412661
25 DIST beautifulsoup4-4.5.3.tar.gz 159185 SHA256 b21ca09366fa596043578fd4188b052b46634d22059e68dd0077d9ee77e08a3e SHA512 d31db0e3bb778a78c37882fcd55dc580eb5eeadfd48744eae6e2e0d0ef5983b216a4682af84a4971611b05fb99c45012ce094475f2d7c39a5b90dad99906ec84 WHIRLPOOL f8dbffd8e4a1dbee0a7ad8a4bcbe22a984f524474f0241a4c03ef5c37b291f9834a6ff1d076421c0cf1087588df1e49f5b99cd9afd7e81591c9063d92d4d097d
26
27 diff --git a/dev-python/beautifulsoup/beautifulsoup-3.1.0.1-r2.ebuild b/dev-python/beautifulsoup/beautifulsoup-3.1.0.1-r2.ebuild
28 deleted file mode 100644
29 index a69a317f6d9..00000000000
30 --- a/dev-python/beautifulsoup/beautifulsoup-3.1.0.1-r2.ebuild
31 +++ /dev/null
32 @@ -1,40 +0,0 @@
33 -# Copyright 1999-2017 Gentoo Foundation
34 -# Distributed under the terms of the GNU General Public License v2
35 -
36 -EAPI="5"
37 -# A few tests fail with python3.3/3.4 :(
38 -PYTHON_COMPAT=( python3_4 pypy3 )
39 -
40 -inherit distutils-r1 eutils
41 -
42 -MY_PN="BeautifulSoup"
43 -MY_P="${MY_PN}-${PV}"
44 -
45 -DESCRIPTION="HTML/XML parser for quick-turnaround applications like screen-scraping"
46 -HOMEPAGE="http://www.crummy.com/software/BeautifulSoup/ https://pypi.python.org/pypi/BeautifulSoup"
47 -SRC_URI="http://www.crummy.com/software/${MY_PN}/download/${MY_P}.tar.gz"
48 -
49 -LICENSE="BSD"
50 -SLOT="python-3"
51 -KEYWORDS="alpha amd64 arm hppa ia64 ppc ppc64 s390 sh sparc x86 ~amd64-fbsd ~x86-fbsd ~amd64-linux ~x86-linux ~x86-macos ~x86-solaris"
52 -IUSE=""
53 -
54 -DEPEND=""
55 -RDEPEND="!dev-python/beautifulsoup:0"
56 -
57 -S="${WORKDIR}/${MY_P}"
58 -
59 -PATCHES=(
60 - "${FILESDIR}/${P}-python-3.patch"
61 - "${FILESDIR}/${P}-disable-tests.patch"
62 -)
63 -
64 -python_test() {
65 - "${PYTHON}" BeautifulSoupTests.py || die "Tests fail with ${EPYTHON}"
66 -}
67 -
68 -python_install_all() {
69 - distutils-r1_python_install_all
70 - # Delete useless files.
71 - rm -r "${ED%/}/usr/bin" || die
72 -}
73
74 diff --git a/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-disable-tests.patch b/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-disable-tests.patch
75 deleted file mode 100644
76 index c97cd76ee31..00000000000
77 --- a/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-disable-tests.patch
78 +++ /dev/null
79 @@ -1,39 +0,0 @@
80 ---- lib/BeautifulSoupTests.py.orig 2015-07-21 08:39:33.077000000 +0000
81 -+++ lib/BeautifulSoupTests.py 2015-07-21 08:41:19.285000000 +0000
82 -@@ -538,13 +538,13 @@
83 - text = "<td nowrap>foo</td>"
84 - self.assertSoupEquals(text, text)
85 -
86 -- def testCData(self):
87 -- xml = "<root>foo<![CDATA[foobar]]>bar</root>"
88 -- self.assertSoupEquals(xml, xml)
89 -- r = re.compile("foo.*bar")
90 -- soup = BeautifulSoup(xml)
91 -- self.assertEquals(soup.find(text=r).string, "foobar")
92 -- self.assertEquals(soup.find(text=r).__class__, CData)
93 -+ #def testCData(self):
94 -+ # xml = "<root>foo<![CDATA[foobar]]>bar</root>"
95 -+ # self.assertSoupEquals(xml, xml)
96 -+ # r = re.compile("foo.*bar")
97 -+ # soup = BeautifulSoup(xml)
98 -+ # self.assertEquals(soup.find(text=r).string, "foobar")
99 -+ # self.assertEquals(soup.find(text=r).__class__, CData)
100 -
101 - def testComments(self):
102 - xml = "foo<!--foobar-->baz"
103 -@@ -607,11 +607,11 @@
104 - def testWhitespaceInDeclaration(self):
105 - self.assertSoupEquals('<! DOCTYPE>', '<!DOCTYPE>')
106 -
107 -- def testJunkInDeclaration(self):
108 -- self.assertSoupEquals('<! Foo = -8>a', '<!Foo = -8>a')
109 -+ #def testJunkInDeclaration(self):
110 -+ # self.assertSoupEquals('<! Foo = -8>a', '<!Foo = -8>a')
111 -
112 -- def testIncompleteDeclaration(self):
113 -- self.assertSoupEquals('a<!b <p>c')
114 -+ #def testIncompleteDeclaration(self):
115 -+ # self.assertSoupEquals('a<!b <p>c')
116 -
117 - def testEntityReplacement(self):
118 - self.assertSoupEquals('<b>hello&nbsp;there</b>')
119
120 diff --git a/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-python-3.patch b/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-python-3.patch
121 deleted file mode 100644
122 index adcbb43dd07..00000000000
123 --- a/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-python-3.patch
124 +++ /dev/null
125 @@ -1,949 +0,0 @@
126 ---- BeautifulSoup.py
127 -+++ BeautifulSoup.py
128 -@@ -76,7 +76,7 @@
129 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT.
130 -
131 - """
132 --from __future__ import generators
133 -+
134 -
135 - __author__ = "Leonard Richardson (leonardr@××××××××.org)"
136 - __version__ = "3.1.0.1"
137 -@@ -84,12 +84,12 @@
138 - __license__ = "New-style BSD"
139 -
140 - import codecs
141 --import markupbase
142 -+import _markupbase
143 - import types
144 - import re
145 --from HTMLParser import HTMLParser, HTMLParseError
146 -+from html.parser import HTMLParser, HTMLParseError
147 - try:
148 -- from htmlentitydefs import name2codepoint
149 -+ from html.entities import name2codepoint
150 - except ImportError:
151 - name2codepoint = {}
152 - try:
153 -@@ -98,18 +98,18 @@
154 - from sets import Set as set
155 -
156 - #These hacks make Beautiful Soup able to parse XML with namespaces
157 --markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match
158 -+_markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match
159 -
160 - DEFAULT_OUTPUT_ENCODING = "utf-8"
161 -
162 - # First, the classes that represent markup elements.
163 -
164 --def sob(unicode, encoding):
165 -+def sob(str, encoding):
166 - """Returns either the given Unicode string or its encoding."""
167 - if encoding is None:
168 -- return unicode
169 -+ return str
170 - else:
171 -- return unicode.encode(encoding)
172 -+ return str.encode(encoding)
173 -
174 - class PageElement:
175 - """Contains the navigational information for some part of the page
176 -@@ -178,8 +178,8 @@
177 - return lastChild
178 -
179 - def insert(self, position, newChild):
180 -- if (isinstance(newChild, basestring)
181 -- or isinstance(newChild, unicode)) \
182 -+ if (isinstance(newChild, str)
183 -+ or isinstance(newChild, str)) \
184 - and not isinstance(newChild, NavigableString):
185 - newChild = NavigableString(newChild)
186 -
187 -@@ -334,7 +334,7 @@
188 - g = generator()
189 - while True:
190 - try:
191 -- i = g.next()
192 -+ i = g.__next__()
193 - except StopIteration:
194 - break
195 - if i:
196 -@@ -385,22 +385,22 @@
197 - def toEncoding(self, s, encoding=None):
198 - """Encodes an object to a string in some encoding, or to Unicode.
199 - ."""
200 -- if isinstance(s, unicode):
201 -+ if isinstance(s, str):
202 - if encoding:
203 - s = s.encode(encoding)
204 - elif isinstance(s, str):
205 - if encoding:
206 - s = s.encode(encoding)
207 - else:
208 -- s = unicode(s)
209 -+ s = str(s)
210 - else:
211 - if encoding:
212 - s = self.toEncoding(str(s), encoding)
213 - else:
214 -- s = unicode(s)
215 -+ s = str(s)
216 - return s
217 -
218 --class NavigableString(unicode, PageElement):
219 -+class NavigableString(str, PageElement):
220 -
221 - def __new__(cls, value):
222 - """Create a new NavigableString.
223 -@@ -410,12 +410,12 @@
224 - passed in to the superclass's __new__ or the superclass won't know
225 - how to handle non-ASCII characters.
226 - """
227 -- if isinstance(value, unicode):
228 -- return unicode.__new__(cls, value)
229 -- return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
230 -+ if isinstance(value, str):
231 -+ return str.__new__(cls, value)
232 -+ return str.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
233 -
234 - def __getnewargs__(self):
235 -- return (unicode(self),)
236 -+ return (str(self),)
237 -
238 - def __getattr__(self, attr):
239 - """text.string gives you text. This is for backwards
240 -@@ -424,7 +424,7 @@
241 - if attr == 'string':
242 - return self
243 - else:
244 -- raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)
245 -+ raise AttributeError("'%s' object has no attribute '%s'" % (self.__class__.__name__, attr))
246 -
247 - def encode(self, encoding=DEFAULT_OUTPUT_ENCODING):
248 - return self.decode().encode(encoding)
249 -@@ -435,23 +435,23 @@
250 - class CData(NavigableString):
251 -
252 - def decodeGivenEventualEncoding(self, eventualEncoding):
253 -- return u'<![CDATA[' + self + u']]>'
254 -+ return '<![CDATA[' + self + ']]>'
255 -
256 - class ProcessingInstruction(NavigableString):
257 -
258 - def decodeGivenEventualEncoding(self, eventualEncoding):
259 - output = self
260 -- if u'%SOUP-ENCODING%' in output:
261 -+ if '%SOUP-ENCODING%' in output:
262 - output = self.substituteEncoding(output, eventualEncoding)
263 -- return u'<?' + output + u'?>'
264 -+ return '<?' + output + '?>'
265 -
266 - class Comment(NavigableString):
267 - def decodeGivenEventualEncoding(self, eventualEncoding):
268 -- return u'<!--' + self + u'-->'
269 -+ return '<!--' + self + '-->'
270 -
271 - class Declaration(NavigableString):
272 - def decodeGivenEventualEncoding(self, eventualEncoding):
273 -- return u'<!' + self + u'>'
274 -+ return '<!' + self + '>'
275 -
276 - class Tag(PageElement):
277 -
278 -@@ -460,7 +460,7 @@
279 - def _invert(h):
280 - "Cheap function to invert a hash."
281 - i = {}
282 -- for k,v in h.items():
283 -+ for k,v in list(h.items()):
284 - i[v] = k
285 - return i
286 -
287 -@@ -479,23 +479,23 @@
288 - escaped."""
289 - x = match.group(1)
290 - if self.convertHTMLEntities and x in name2codepoint:
291 -- return unichr(name2codepoint[x])
292 -+ return chr(name2codepoint[x])
293 - elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS:
294 - if self.convertXMLEntities:
295 - return self.XML_ENTITIES_TO_SPECIAL_CHARS[x]
296 - else:
297 -- return u'&%s;' % x
298 -+ return '&%s;' % x
299 - elif len(x) > 0 and x[0] == '#':
300 - # Handle numeric entities
301 - if len(x) > 1 and x[1] == 'x':
302 -- return unichr(int(x[2:], 16))
303 -+ return chr(int(x[2:], 16))
304 - else:
305 -- return unichr(int(x[1:]))
306 -+ return chr(int(x[1:]))
307 -
308 - elif self.escapeUnrecognizedEntities:
309 -- return u'&amp;%s;' % x
310 -+ return '&amp;%s;' % x
311 - else:
312 -- return u'&%s;' % x
313 -+ return '&%s;' % x
314 -
315 - def __init__(self, parser, name, attrs=None, parent=None,
316 - previous=None):
317 -@@ -524,7 +524,7 @@
318 - return kval
319 - return (k, re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);",
320 - self._convertEntities, val))
321 -- self.attrs = map(convert, self.attrs)
322 -+ self.attrs = list(map(convert, self.attrs))
323 -
324 - def get(self, key, default=None):
325 - """Returns the value of the 'key' attribute for the tag, or
326 -@@ -533,7 +533,7 @@
327 - return self._getAttrMap().get(key, default)
328 -
329 - def has_key(self, key):
330 -- return self._getAttrMap().has_key(key)
331 -+ return key in self._getAttrMap()
332 -
333 - def __getitem__(self, key):
334 - """tag[key] returns the value of the 'key' attribute for the tag,
335 -@@ -551,7 +551,7 @@
336 - def __contains__(self, x):
337 - return x in self.contents
338 -
339 -- def __nonzero__(self):
340 -+ def __bool__(self):
341 - "A tag is non-None even if it has no contents."
342 - return True
343 -
344 -@@ -577,14 +577,14 @@
345 - #We don't break because bad HTML can define the same
346 - #attribute multiple times.
347 - self._getAttrMap()
348 -- if self.attrMap.has_key(key):
349 -+ if key in self.attrMap:
350 - del self.attrMap[key]
351 -
352 - def __call__(self, *args, **kwargs):
353 - """Calling a tag like a function is the same as calling its
354 - findAll() method. Eg. tag('a') returns a list of all the A tags
355 - found within this tag."""
356 -- return apply(self.findAll, args, kwargs)
357 -+ return self.findAll(*args, **kwargs)
358 -
359 - def __getattr__(self, tag):
360 - #print "Getattr %s.%s" % (self.__class__, tag)
361 -@@ -592,7 +592,7 @@
362 - return self.find(tag[:-3])
363 - elif tag.find('__') != 0:
364 - return self.find(tag)
365 -- raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag)
366 -+ raise AttributeError("'%s' object has no attribute '%s'" % (self.__class__, tag))
367 -
368 - def __eq__(self, other):
369 - """Returns true iff this tag has the same name, the same attributes,
370 -@@ -868,7 +868,7 @@
371 - if isinstance(markupName, Tag):
372 - markup = markupName
373 - markupAttrs = markup
374 -- callFunctionWithTagData = callable(self.name) \
375 -+ callFunctionWithTagData = hasattr(self.name, '__call__') \
376 - and not isinstance(markupName, Tag)
377 -
378 - if (not self.name) \
379 -@@ -880,7 +880,7 @@
380 - else:
381 - match = True
382 - markupAttrMap = None
383 -- for attr, matchAgainst in self.attrs.items():
384 -+ for attr, matchAgainst in list(self.attrs.items()):
385 - if not markupAttrMap:
386 - if hasattr(markupAttrs, 'get'):
387 - markupAttrMap = markupAttrs
388 -@@ -921,16 +921,16 @@
389 - if self._matches(markup, self.text):
390 - found = markup
391 - else:
392 -- raise Exception, "I don't know how to match against a %s" \
393 -- % markup.__class__
394 -+ raise Exception("I don't know how to match against a %s" \
395 -+ % markup.__class__)
396 - return found
397 -
398 - def _matches(self, markup, matchAgainst):
399 - #print "Matching %s against %s" % (markup, matchAgainst)
400 - result = False
401 -- if matchAgainst == True and type(matchAgainst) == types.BooleanType:
402 -+ if matchAgainst == True and type(matchAgainst) == bool:
403 - result = markup != None
404 -- elif callable(matchAgainst):
405 -+ elif hasattr(matchAgainst, '__call__'):
406 - result = matchAgainst(markup)
407 - else:
408 - #Custom match methods take the tag as an argument, but all
409 -@@ -938,7 +938,7 @@
410 - if isinstance(markup, Tag):
411 - markup = markup.name
412 - if markup is not None and not isString(markup):
413 -- markup = unicode(markup)
414 -+ markup = str(markup)
415 - #Now we know that chunk is either a string, or None.
416 - if hasattr(matchAgainst, 'match'):
417 - # It's a regexp object.
418 -@@ -947,10 +947,10 @@
419 - and (markup is not None or not isString(matchAgainst))):
420 - result = markup in matchAgainst
421 - elif hasattr(matchAgainst, 'items'):
422 -- result = markup.has_key(matchAgainst)
423 -+ result = matchAgainst in markup
424 - elif matchAgainst and isString(markup):
425 -- if isinstance(markup, unicode):
426 -- matchAgainst = unicode(matchAgainst)
427 -+ if isinstance(markup, str):
428 -+ matchAgainst = str(matchAgainst)
429 - else:
430 - matchAgainst = str(matchAgainst)
431 -
432 -@@ -971,13 +971,13 @@
433 - """Convenience method that works with all 2.x versions of Python
434 - to determine whether or not something is listlike."""
435 - return ((hasattr(l, '__iter__') and not isString(l))
436 -- or (type(l) in (types.ListType, types.TupleType)))
437 -+ or (type(l) in (list, tuple)))
438 -
439 - def isString(s):
440 - """Convenience method that works with all 2.x versions of Python
441 - to determine whether or not something is stringlike."""
442 - try:
443 -- return isinstance(s, unicode) or isinstance(s, basestring)
444 -+ return isinstance(s, str) or isinstance(s, str)
445 - except NameError:
446 - return isinstance(s, str)
447 -
448 -@@ -989,7 +989,7 @@
449 - for portion in args:
450 - if hasattr(portion, 'items'):
451 - #It's a map. Merge it.
452 -- for k,v in portion.items():
453 -+ for k,v in list(portion.items()):
454 - built[k] = v
455 - elif isList(portion) and not isString(portion):
456 - #It's a list. Map each item to the default.
457 -@@ -1034,7 +1034,7 @@
458 - object, possibly one with a %SOUP-ENCODING% slot into which an
459 - encoding will be plugged later."""
460 - if text[:3] == "xml":
461 -- text = u"xml version='1.0' encoding='%SOUP-ENCODING%'"
462 -+ text = "xml version='1.0' encoding='%SOUP-ENCODING%'"
463 - self._toStringSubclass(text, ProcessingInstruction)
464 -
465 - def handle_comment(self, text):
466 -@@ -1044,7 +1044,7 @@
467 - def handle_charref(self, ref):
468 - "Handle character references as data."
469 - if self.soup.convertEntities:
470 -- data = unichr(int(ref))
471 -+ data = chr(int(ref))
472 - else:
473 - data = '&#%s;' % ref
474 - self.handle_data(data)
475 -@@ -1056,7 +1056,7 @@
476 - data = None
477 - if self.soup.convertHTMLEntities:
478 - try:
479 -- data = unichr(name2codepoint[ref])
480 -+ data = chr(name2codepoint[ref])
481 - except KeyError:
482 - pass
483 -
484 -@@ -1147,7 +1147,7 @@
485 - lambda x: '<!' + x.group(1) + '>')
486 - ]
487 -
488 -- ROOT_TAG_NAME = u'[document]'
489 -+ ROOT_TAG_NAME = '[document]'
490 -
491 - HTML_ENTITIES = "html"
492 - XML_ENTITIES = "xml"
493 -@@ -1236,14 +1236,14 @@
494 - def _feed(self, inDocumentEncoding=None, isHTML=False):
495 - # Convert the document to Unicode.
496 - markup = self.markup
497 -- if isinstance(markup, unicode):
498 -+ if isinstance(markup, str):
499 - if not hasattr(self, 'originalEncoding'):
500 - self.originalEncoding = None
501 - else:
502 - dammit = UnicodeDammit\
503 - (markup, [self.fromEncoding, inDocumentEncoding],
504 - smartQuotesTo=self.smartQuotesTo, isHTML=isHTML)
505 -- markup = dammit.unicode
506 -+ markup = dammit.str
507 - self.originalEncoding = dammit.originalEncoding
508 - self.declaredHTMLEncoding = dammit.declaredHTMLEncoding
509 - if markup:
510 -@@ -1269,8 +1269,8 @@
511 - def isSelfClosingTag(self, name):
512 - """Returns true iff the given string is the name of a
513 - self-closing tag according to this parser."""
514 -- return self.SELF_CLOSING_TAGS.has_key(name) \
515 -- or self.instanceSelfClosingTags.has_key(name)
516 -+ return name in self.SELF_CLOSING_TAGS \
517 -+ or name in self.instanceSelfClosingTags
518 -
519 - def reset(self):
520 - Tag.__init__(self, self, self.ROOT_TAG_NAME)
521 -@@ -1305,7 +1305,7 @@
522 -
523 - def endData(self, containerClass=NavigableString):
524 - if self.currentData:
525 -- currentData = u''.join(self.currentData)
526 -+ currentData = ''.join(self.currentData)
527 - if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and
528 - not set([tag.name for tag in self.tagStack]).intersection(
529 - self.PRESERVE_WHITESPACE_TAGS)):
530 -@@ -1368,7 +1368,7 @@
531 -
532 - nestingResetTriggers = self.NESTABLE_TAGS.get(name)
533 - isNestable = nestingResetTriggers != None
534 -- isResetNesting = self.RESET_NESTING_TAGS.has_key(name)
535 -+ isResetNesting = name in self.RESET_NESTING_TAGS
536 - popTo = None
537 - inclusive = True
538 - for i in range(len(self.tagStack)-1, 0, -1):
539 -@@ -1381,7 +1381,7 @@
540 - if (nestingResetTriggers != None
541 - and p.name in nestingResetTriggers) \
542 - or (nestingResetTriggers == None and isResetNesting
543 -- and self.RESET_NESTING_TAGS.has_key(p.name)):
544 -+ and p.name in self.RESET_NESTING_TAGS):
545 -
546 - #If we encounter one of the nesting reset triggers
547 - #peculiar to this tag, or we encounter another tag
548 -@@ -1399,7 +1399,7 @@
549 - if self.quoteStack:
550 - #This is not a real tag.
551 - #print "<%s> is not real!" % name
552 -- attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs))
553 -+ attrs = ''.join([' %s="%s"' % (x_y[0], x_y[1]) for x_y in attrs])
554 - self.handle_data('<%s%s>' % (name, attrs))
555 - return
556 - self.endData()
557 -@@ -1493,7 +1493,7 @@
558 - BeautifulStoneSoup before writing your own subclass."""
559 -
560 - def __init__(self, *args, **kwargs):
561 -- if not kwargs.has_key('smartQuotesTo'):
562 -+ if 'smartQuotesTo' not in kwargs:
563 - kwargs['smartQuotesTo'] = self.HTML_ENTITIES
564 - kwargs['isHTML'] = True
565 - BeautifulStoneSoup.__init__(self, *args, **kwargs)
566 -@@ -1677,7 +1677,7 @@
567 - parent._getAttrMap()
568 - if (isinstance(tag, Tag) and len(tag.contents) == 1 and
569 - isinstance(tag.contents[0], NavigableString) and
570 -- not parent.attrMap.has_key(tag.name)):
571 -+ tag.name not in parent.attrMap):
572 - parent[tag.name] = tag.contents[0]
573 - BeautifulStoneSoup.popTag(self)
574 -
575 -@@ -1751,9 +1751,9 @@
576 - self._detectEncoding(markup, isHTML)
577 - self.smartQuotesTo = smartQuotesTo
578 - self.triedEncodings = []
579 -- if markup == '' or isinstance(markup, unicode):
580 -+ if markup == '' or isinstance(markup, str):
581 - self.originalEncoding = None
582 -- self.unicode = unicode(markup)
583 -+ self.str = str(markup)
584 - return
585 -
586 - u = None
587 -@@ -1766,7 +1766,7 @@
588 - if u: break
589 -
590 - # If no luck and we have auto-detection library, try that:
591 -- if not u and chardet and not isinstance(self.markup, unicode):
592 -+ if not u and chardet and not isinstance(self.markup, str):
593 - u = self._convertFrom(chardet.detect(self.markup)['encoding'])
594 -
595 - # As a last resort, try utf-8 and windows-1252:
596 -@@ -1775,7 +1775,7 @@
597 - u = self._convertFrom(proposed_encoding)
598 - if u: break
599 -
600 -- self.unicode = u
601 -+ self.str = u
602 - if not u: self.originalEncoding = None
603 -
604 - def _subMSChar(self, match):
605 -@@ -1783,7 +1783,7 @@
606 - entity."""
607 - orig = match.group(1)
608 - sub = self.MS_CHARS.get(orig)
609 -- if type(sub) == types.TupleType:
610 -+ if type(sub) == tuple:
611 - if self.smartQuotesTo == 'xml':
612 - sub = '&#x'.encode() + sub[1].encode() + ';'.encode()
613 - else:
614 -@@ -1804,7 +1804,7 @@
615 - if self.smartQuotesTo and proposed.lower() in("windows-1252",
616 - "iso-8859-1",
617 - "iso-8859-2"):
618 -- smart_quotes_re = "([\x80-\x9f])"
619 -+ smart_quotes_re = b"([\x80-\x9f])"
620 - smart_quotes_compiled = re.compile(smart_quotes_re)
621 - markup = smart_quotes_compiled.sub(self._subMSChar, markup)
622 -
623 -@@ -1813,7 +1813,7 @@
624 - u = self._toUnicode(markup, proposed)
625 - self.markup = u
626 - self.originalEncoding = proposed
627 -- except Exception, e:
628 -+ except Exception as e:
629 - # print "That didn't work!"
630 - # print e
631 - return None
632 -@@ -1842,7 +1842,7 @@
633 - elif data[:4] == '\xff\xfe\x00\x00':
634 - encoding = 'utf-32le'
635 - data = data[4:]
636 -- newdata = unicode(data, encoding)
637 -+ newdata = str(data, encoding)
638 - return newdata
639 -
640 - def _detectEncoding(self, xml_data, isHTML=False):
641 -@@ -1855,41 +1855,41 @@
642 - elif xml_data[:4] == '\x00\x3c\x00\x3f':
643 - # UTF-16BE
644 - sniffed_xml_encoding = 'utf-16be'
645 -- xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')
646 -+ xml_data = str(xml_data, 'utf-16be').encode('utf-8')
647 - elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \
648 - and (xml_data[2:4] != '\x00\x00'):
649 - # UTF-16BE with BOM
650 - sniffed_xml_encoding = 'utf-16be'
651 -- xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')
652 -+ xml_data = str(xml_data[2:], 'utf-16be').encode('utf-8')
653 - elif xml_data[:4] == '\x3c\x00\x3f\x00':
654 - # UTF-16LE
655 - sniffed_xml_encoding = 'utf-16le'
656 -- xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')
657 -+ xml_data = str(xml_data, 'utf-16le').encode('utf-8')
658 - elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \
659 - (xml_data[2:4] != '\x00\x00'):
660 - # UTF-16LE with BOM
661 - sniffed_xml_encoding = 'utf-16le'
662 -- xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')
663 -+ xml_data = str(xml_data[2:], 'utf-16le').encode('utf-8')
664 - elif xml_data[:4] == '\x00\x00\x00\x3c':
665 - # UTF-32BE
666 - sniffed_xml_encoding = 'utf-32be'
667 -- xml_data = unicode(xml_data, 'utf-32be').encode('utf-8')
668 -+ xml_data = str(xml_data, 'utf-32be').encode('utf-8')
669 - elif xml_data[:4] == '\x3c\x00\x00\x00':
670 - # UTF-32LE
671 - sniffed_xml_encoding = 'utf-32le'
672 -- xml_data = unicode(xml_data, 'utf-32le').encode('utf-8')
673 -+ xml_data = str(xml_data, 'utf-32le').encode('utf-8')
674 - elif xml_data[:4] == '\x00\x00\xfe\xff':
675 - # UTF-32BE with BOM
676 - sniffed_xml_encoding = 'utf-32be'
677 -- xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8')
678 -+ xml_data = str(xml_data[4:], 'utf-32be').encode('utf-8')
679 - elif xml_data[:4] == '\xff\xfe\x00\x00':
680 - # UTF-32LE with BOM
681 - sniffed_xml_encoding = 'utf-32le'
682 -- xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8')
683 -+ xml_data = str(xml_data[4:], 'utf-32le').encode('utf-8')
684 - elif xml_data[:3] == '\xef\xbb\xbf':
685 - # UTF-8 with BOM
686 - sniffed_xml_encoding = 'utf-8'
687 -- xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8')
688 -+ xml_data = str(xml_data[3:], 'utf-8').encode('utf-8')
689 - else:
690 - sniffed_xml_encoding = 'ascii'
691 - pass
692 -@@ -1954,41 +1954,41 @@
693 - 250,251,252,253,254,255)
694 - import string
695 - c.EBCDIC_TO_ASCII_MAP = string.maketrans( \
696 -- ''.join(map(chr, range(256))), ''.join(map(chr, emap)))
697 -+ ''.join(map(chr, list(range(256)))), ''.join(map(chr, emap)))
698 - return s.translate(c.EBCDIC_TO_ASCII_MAP)
699 -
700 -- MS_CHARS = { '\x80' : ('euro', '20AC'),
701 -- '\x81' : ' ',
702 -- '\x82' : ('sbquo', '201A'),
703 -- '\x83' : ('fnof', '192'),
704 -- '\x84' : ('bdquo', '201E'),
705 -- '\x85' : ('hellip', '2026'),
706 -- '\x86' : ('dagger', '2020'),
707 -- '\x87' : ('Dagger', '2021'),
708 -- '\x88' : ('circ', '2C6'),
709 -- '\x89' : ('permil', '2030'),
710 -- '\x8A' : ('Scaron', '160'),
711 -- '\x8B' : ('lsaquo', '2039'),
712 -- '\x8C' : ('OElig', '152'),
713 -- '\x8D' : '?',
714 -- '\x8E' : ('#x17D', '17D'),
715 -- '\x8F' : '?',
716 -- '\x90' : '?',
717 -- '\x91' : ('lsquo', '2018'),
718 -- '\x92' : ('rsquo', '2019'),
719 -- '\x93' : ('ldquo', '201C'),
720 -- '\x94' : ('rdquo', '201D'),
721 -- '\x95' : ('bull', '2022'),
722 -- '\x96' : ('ndash', '2013'),
723 -- '\x97' : ('mdash', '2014'),
724 -- '\x98' : ('tilde', '2DC'),
725 -- '\x99' : ('trade', '2122'),
726 -- '\x9a' : ('scaron', '161'),
727 -- '\x9b' : ('rsaquo', '203A'),
728 -- '\x9c' : ('oelig', '153'),
729 -- '\x9d' : '?',
730 -- '\x9e' : ('#x17E', '17E'),
731 -- '\x9f' : ('Yuml', ''),}
732 -+ MS_CHARS = { b'\x80' : ('euro', '20AC'),
733 -+ b'\x81' : ' ',
734 -+ b'\x82' : ('sbquo', '201A'),
735 -+ b'\x83' : ('fnof', '192'),
736 -+ b'\x84' : ('bdquo', '201E'),
737 -+ b'\x85' : ('hellip', '2026'),
738 -+ b'\x86' : ('dagger', '2020'),
739 -+ b'\x87' : ('Dagger', '2021'),
740 -+ b'\x88' : ('circ', '2C6'),
741 -+ b'\x89' : ('permil', '2030'),
742 -+ b'\x8A' : ('Scaron', '160'),
743 -+ b'\x8B' : ('lsaquo', '2039'),
744 -+ b'\x8C' : ('OElig', '152'),
745 -+ b'\x8D' : '?',
746 -+ b'\x8E' : ('#x17D', '17D'),
747 -+ b'\x8F' : '?',
748 -+ b'\x90' : '?',
749 -+ b'\x91' : ('lsquo', '2018'),
750 -+ b'\x92' : ('rsquo', '2019'),
751 -+ b'\x93' : ('ldquo', '201C'),
752 -+ b'\x94' : ('rdquo', '201D'),
753 -+ b'\x95' : ('bull', '2022'),
754 -+ b'\x96' : ('ndash', '2013'),
755 -+ b'\x97' : ('mdash', '2014'),
756 -+ b'\x98' : ('tilde', '2DC'),
757 -+ b'\x99' : ('trade', '2122'),
758 -+ b'\x9a' : ('scaron', '161'),
759 -+ b'\x9b' : ('rsaquo', '203A'),
760 -+ b'\x9c' : ('oelig', '153'),
761 -+ b'\x9d' : '?',
762 -+ b'\x9e' : ('#x17E', '17E'),
763 -+ b'\x9f' : ('Yuml', ''),}
764 -
765 - #######################################################################
766 -
767 -@@ -1997,4 +1997,4 @@
768 - if __name__ == '__main__':
769 - import sys
770 - soup = BeautifulSoup(sys.stdin)
771 -- print soup.prettify()
772 -+ print(soup.prettify())
773 ---- BeautifulSoupTests.py
774 -+++ BeautifulSoupTests.py
775 -@@ -82,7 +82,7 @@
776 - def testFindAllText(self):
777 - soup = BeautifulSoup("<html>\xbb</html>")
778 - self.assertEqual(soup.findAll(text=re.compile('.*')),
779 -- [u'\xbb'])
780 -+ ['\xbb'])
781 -
782 - def testFindAllByRE(self):
783 - import re
784 -@@ -215,7 +215,7 @@
785 - soup = BeautifulSoup(self.x, parseOnlyThese=strainer)
786 - self.assertEquals(len(soup), 10)
787 -
788 -- strainer = SoupStrainer(text=lambda(x):x[8]=='3')
789 -+ strainer = SoupStrainer(text=lambda x:x[8]=='3')
790 - soup = BeautifulSoup(self.x, parseOnlyThese=strainer)
791 - self.assertEquals(len(soup), 3)
792 -
793 -@@ -256,7 +256,7 @@
794 - self.assertEqual(copied.decode(), self.soup.decode())
795 -
796 - def testUnicodePickle(self):
797 -- import cPickle as pickle
798 -+ import pickle as pickle
799 - html = "<b>" + chr(0xc3) + "</b>"
800 - soup = BeautifulSoup(html)
801 - dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL)
802 -@@ -586,23 +586,23 @@
803 - self.assertEquals(soup.decode(), "<<sacr&eacute; bleu!>>")
804 -
805 - soup = BeautifulStoneSoup(text, convertEntities=htmlEnt)
806 -- self.assertEquals(soup.decode(), u"<<sacr\xe9 bleu!>>")
807 -+ self.assertEquals(soup.decode(), "<<sacr\xe9 bleu!>>")
808 -
809 - # Make sure the "XML", "HTML", and "XHTML" settings work.
810 - text = "&lt;&trade;&apos;"
811 - soup = BeautifulStoneSoup(text, convertEntities=xmlEnt)
812 -- self.assertEquals(soup.decode(), u"<&trade;'")
813 -+ self.assertEquals(soup.decode(), "<&trade;'")
814 -
815 - soup = BeautifulStoneSoup(text, convertEntities=htmlEnt)
816 -- self.assertEquals(soup.decode(), u"<\u2122&apos;")
817 -+ self.assertEquals(soup.decode(), "<\u2122&apos;")
818 -
819 - soup = BeautifulStoneSoup(text, convertEntities=xhtmlEnt)
820 -- self.assertEquals(soup.decode(), u"<\u2122'")
821 -+ self.assertEquals(soup.decode(), "<\u2122'")
822 -
823 - def testNonBreakingSpaces(self):
824 - soup = BeautifulSoup("<a>&nbsp;&nbsp;</a>",
825 - convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
826 -- self.assertEquals(soup.decode(), u"<a>\xa0\xa0</a>")
827 -+ self.assertEquals(soup.decode(), "<a>\xa0\xa0</a>")
828 -
829 - def testWhitespaceInDeclaration(self):
830 - self.assertSoupEquals('<! DOCTYPE>', '<!DOCTYPE>')
831 -@@ -617,27 +617,27 @@
832 - self.assertSoupEquals('<b>hello&nbsp;there</b>')
833 -
834 - def testEntitiesInAttributeValues(self):
835 -- self.assertSoupEquals('<x t="x&#241;">', '<x t="x\xc3\xb1"></x>',
836 -+ self.assertSoupEquals('<x t="x&#241;">', b'<x t="x\xc3\xb1"></x>',
837 - encoding='utf-8')
838 -- self.assertSoupEquals('<x t="x&#xf1;">', '<x t="x\xc3\xb1"></x>',
839 -+ self.assertSoupEquals('<x t="x&#xf1;">', b'<x t="x\xc3\xb1"></x>',
840 - encoding='utf-8')
841 -
842 - soup = BeautifulSoup('<x t="&gt;&trade;">',
843 - convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
844 -- self.assertEquals(soup.decode(), u'<x t="&gt;\u2122"></x>')
845 -+ self.assertEquals(soup.decode(), '<x t="&gt;\u2122"></x>')
846 -
847 - uri = "http://crummy.com?sacr&eacute;&amp;bleu"
848 - link = '<a href="%s"></a>' % uri
849 -
850 - soup = BeautifulSoup(link, convertEntities=BeautifulSoup.HTML_ENTITIES)
851 - self.assertEquals(soup.decode(),
852 -- link.replace("&eacute;", u"\xe9"))
853 -+ link.replace("&eacute;", "\xe9"))
854 -
855 - uri = "http://crummy.com?sacr&eacute;&bleu"
856 - link = '<a href="%s"></a>' % uri
857 - soup = BeautifulSoup(link, convertEntities=BeautifulSoup.HTML_ENTITIES)
858 - self.assertEquals(soup.a['href'],
859 -- uri.replace("&eacute;", u"\xe9"))
860 -+ uri.replace("&eacute;", "\xe9"))
861 -
862 - def testNakedAmpersands(self):
863 - html = {'convertEntities':BeautifulStoneSoup.HTML_ENTITIES}
864 -@@ -663,13 +663,13 @@
865 - smart quote fixes."""
866 -
867 - def testUnicodeDammitStandalone(self):
868 -- markup = "<foo>\x92</foo>"
869 -+ markup = b"<foo>\x92</foo>"
870 - dammit = UnicodeDammit(markup)
871 -- self.assertEquals(dammit.unicode, "<foo>&#x2019;</foo>")
872 -+ self.assertEquals(dammit.str, "<foo>&#x2019;</foo>")
873 -
874 -- hebrew = "\xed\xe5\xec\xf9"
875 -+ hebrew = b"\xed\xe5\xec\xf9"
876 - dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
877 -- self.assertEquals(dammit.unicode, u'\u05dd\u05d5\u05dc\u05e9')
878 -+ self.assertEquals(dammit.str, '\u05dd\u05d5\u05dc\u05e9')
879 - self.assertEquals(dammit.originalEncoding, 'iso-8859-8')
880 -
881 - def testGarbageInGarbageOut(self):
882 -@@ -677,13 +677,13 @@
883 - asciiSoup = BeautifulStoneSoup(ascii)
884 - self.assertEquals(ascii, asciiSoup.decode())
885 -
886 -- unicodeData = u"<foo>\u00FC</foo>"
887 -+ unicodeData = "<foo>\u00FC</foo>"
888 - utf8 = unicodeData.encode("utf-8")
889 -- self.assertEquals(utf8, '<foo>\xc3\xbc</foo>')
890 -+ self.assertEquals(utf8, b'<foo>\xc3\xbc</foo>')
891 -
892 - unicodeSoup = BeautifulStoneSoup(unicodeData)
893 - self.assertEquals(unicodeData, unicodeSoup.decode())
894 -- self.assertEquals(unicodeSoup.foo.string, u'\u00FC')
895 -+ self.assertEquals(unicodeSoup.foo.string, '\u00FC')
896 -
897 - utf8Soup = BeautifulStoneSoup(utf8, fromEncoding='utf-8')
898 - self.assertEquals(utf8, utf8Soup.encode('utf-8'))
899 -@@ -696,18 +696,18 @@
900 -
901 - def testHandleInvalidCodec(self):
902 - for bad_encoding in ['.utf8', '...', 'utF---16.!']:
903 -- soup = BeautifulSoup(u"Räksmörgås".encode("utf-8"),
904 -+ soup = BeautifulSoup("Räksmörgås".encode("utf-8"),
905 - fromEncoding=bad_encoding)
906 - self.assertEquals(soup.originalEncoding, 'utf-8')
907 -
908 - def testUnicodeSearch(self):
909 -- html = u'<html><body><h1>Räksmörgås</h1></body></html>'
910 -+ html = '<html><body><h1>Räksmörgås</h1></body></html>'
911 - soup = BeautifulSoup(html)
912 -- self.assertEqual(soup.find(text=u'Räksmörgås'),u'Räksmörgås')
913 -+ self.assertEqual(soup.find(text='Räksmörgås'),'Räksmörgås')
914 -
915 - def testRewrittenXMLHeader(self):
916 -- euc_jp = '<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n'
917 -- utf8 = "<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n"
918 -+ euc_jp = b'<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n'
919 -+ utf8 = b"<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n"
920 - soup = BeautifulStoneSoup(euc_jp)
921 - if soup.originalEncoding != "euc-jp":
922 - raise Exception("Test failed when parsing euc-jp document. "
923 -@@ -718,12 +718,12 @@
924 - self.assertEquals(soup.originalEncoding, "euc-jp")
925 - self.assertEquals(soup.renderContents('utf-8'), utf8)
926 -
927 -- old_text = "<?xml encoding='windows-1252'><foo>\x92</foo>"
928 -+ old_text = b"<?xml encoding='windows-1252'><foo>\x92</foo>"
929 - new_text = "<?xml version='1.0' encoding='utf-8'?><foo>&rsquo;</foo>"
930 - self.assertSoupEquals(old_text, new_text)
931 -
932 - def testRewrittenMetaTag(self):
933 -- no_shift_jis_html = '''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>'''
934 -+ no_shift_jis_html = b'''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>'''
935 - soup = BeautifulSoup(no_shift_jis_html)
936 -
937 - # Beautiful Soup used to try to rewrite the meta tag even if the
938 -@@ -733,16 +733,16 @@
939 - soup = BeautifulSoup(no_shift_jis_html, parseOnlyThese=strainer)
940 - self.assertEquals(soup.contents[0].name, 'pre')
941 -
942 -- meta_tag = ('<meta content="text/html; charset=x-sjis" '
943 -- 'http-equiv="Content-type" />')
944 -+ meta_tag = (b'<meta content="text/html; charset=x-sjis" '
945 -+ b'http-equiv="Content-type" />')
946 - shift_jis_html = (
947 -- '<html><head>\n%s\n'
948 -- '<meta http-equiv="Content-language" content="ja" />'
949 -- '</head><body><pre>\n'
950 -- '\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
951 -- '\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
952 -- '\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n'
953 -- '</pre></body></html>') % meta_tag
954 -+ b'<html><head>\n' + meta_tag + b'\n'
955 -+ b'<meta http-equiv="Content-language" content="ja" />'
956 -+ b'</head><body><pre>\n'
957 -+ b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
958 -+ b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
959 -+ b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n'
960 -+ b'</pre></body></html>')
961 - soup = BeautifulSoup(shift_jis_html)
962 - if soup.originalEncoding != "shift-jis":
963 - raise Exception("Test failed when parsing shift-jis document "
964 -@@ -755,59 +755,59 @@
965 - content_type_tag = soup.meta['content']
966 - self.assertEquals(content_type_tag[content_type_tag.find('charset='):],
967 - 'charset=%SOUP-ENCODING%')
968 -- content_type = str(soup.meta)
969 -+ content_type = soup.meta.decode()
970 - index = content_type.find('charset=')
971 - self.assertEqual(content_type[index:index+len('charset=utf8')+1],
972 - 'charset=utf-8')
973 - content_type = soup.meta.encode('shift-jis')
974 -- index = content_type.find('charset=')
975 -+ index = content_type.find(b'charset=')
976 - self.assertEqual(content_type[index:index+len('charset=shift-jis')],
977 - 'charset=shift-jis'.encode())
978 -
979 - self.assertEquals(soup.encode('utf-8'), (
980 -- '<html><head>\n'
981 -- '<meta content="text/html; charset=utf-8" '
982 -- 'http-equiv="Content-type" />\n'
983 -- '<meta http-equiv="Content-language" content="ja" />'
984 -- '</head><body><pre>\n'
985 -- '\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3'
986 -- '\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3'
987 -- '\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6'
988 -- '\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3'
989 -- '\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n'
990 -- '</pre></body></html>'))
991 -+ b'<html><head>\n'
992 -+ b'<meta content="text/html; charset=utf-8" '
993 -+ b'http-equiv="Content-type" />\n'
994 -+ b'<meta http-equiv="Content-language" content="ja" />'
995 -+ b'</head><body><pre>\n'
996 -+ b'\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3'
997 -+ b'\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3'
998 -+ b'\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6'
999 -+ b'\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3'
1000 -+ b'\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n'
1001 -+ b'</pre></body></html>'))
1002 - self.assertEquals(soup.encode("shift-jis"),
1003 - shift_jis_html.replace('x-sjis'.encode(),
1004 - 'shift-jis'.encode()))
1005 -
1006 -- isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""
1007 -+ isolatin = b"""<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""
1008 - soup = BeautifulSoup(isolatin)
1009 -
1010 - utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode())
1011 -- utf8 = utf8.replace("\xe9", "\xc3\xa9")
1012 -+ utf8 = utf8.replace(b"\xe9", b"\xc3\xa9")
1013 - self.assertSoupEquals(soup.encode("utf-8"), utf8, encoding='utf-8')
1014 -
1015 - def testHebrew(self):
1016 -- iso_8859_8= '<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n'
1017 -- utf8 = '<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n'
1018 -+ iso_8859_8= b'<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n'
1019 -+ utf8 = b'<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n'
1020 - soup = BeautifulStoneSoup(iso_8859_8, fromEncoding="iso-8859-8")
1021 - self.assertEquals(soup.encode('utf-8'), utf8)
1022 -
1023 - def testSmartQuotesNotSoSmartAnymore(self):
1024 -- self.assertSoupEquals("\x91Foo\x92 <!--blah-->",
1025 -+ self.assertSoupEquals(b"\x91Foo\x92 <!--blah-->",
1026 - '&lsquo;Foo&rsquo; <!--blah-->')
1027 -
1028 - def testDontConvertSmartQuotesWhenAlsoConvertingEntities(self):
1029 -- smartQuotes = "Il a dit, \x8BSacr&eacute; bl&#101;u!\x9b"
1030 -+ smartQuotes = b"Il a dit, \x8BSacr&eacute; bl&#101;u!\x9b"
1031 - soup = BeautifulSoup(smartQuotes)
1032 - self.assertEquals(soup.decode(),
1033 - 'Il a dit, &lsaquo;Sacr&eacute; bl&#101;u!&rsaquo;')
1034 - soup = BeautifulSoup(smartQuotes, convertEntities="html")
1035 - self.assertEquals(soup.encode('utf-8'),
1036 -- 'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba')
1037 -+ b'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba')
1038 -
1039 - def testDontSeeSmartQuotesWhereThereAreNone(self):
1040 -- utf_8 = "\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
1041 -+ utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
1042 - self.assertSoupEquals(utf_8, encoding='utf-8')
1043 -
1044 -
1045 ---- setup.py
1046 -+++ setup.py
1047 -@@ -19,19 +19,19 @@
1048 - suite = loader.loadTestsFromModule(BeautifulSoupTests)
1049 - suite.run(result)
1050 - if not result.wasSuccessful():
1051 -- print "Unit tests have failed!"
1052 -+ print("Unit tests have failed!")
1053 - for l in result.errors, result.failures:
1054 - for case, error in l:
1055 -- print "-" * 80
1056 -+ print("-" * 80)
1057 - desc = case.shortDescription()
1058 - if desc:
1059 -- print desc
1060 -- print error
1061 -- print '''If you see an error like: "'ascii' codec can't encode character...", see\nthe Beautiful Soup documentation:\n http://www.crummy.com/software/BeautifulSoup/documentation.html#Why%20can't%20Beautiful%20Soup%20print%20out%20the%20non-ASCII%20characters%20I%20gave%20it?'''
1062 -- print "This might or might not be a problem depending on what you plan to do with\nBeautiful Soup."
1063 -+ print(desc)
1064 -+ print(error)
1065 -+ print('''If you see an error like: "'ascii' codec can't encode character...", see\nthe Beautiful Soup documentation:\n http://www.crummy.com/software/BeautifulSoup/documentation.html#Why%20can't%20Beautiful%20Soup%20print%20out%20the%20non-ASCII%20characters%20I%20gave%20it?''')
1066 -+ print("This might or might not be a problem depending on what you plan to do with\nBeautiful Soup.")
1067 - if sys.argv[1] == 'sdist':
1068 -- print
1069 -- print "I'm not going to make a source distribution since the tests don't pass."
1070 -+ print()
1071 -+ print("I'm not going to make a source distribution since the tests don't pass.")
1072 - sys.exit(1)
1073 -
1074 - setup(name="BeautifulSoup",
1075
1076 diff --git a/profiles/package.mask b/profiles/package.mask
1077 index 6d5d46bffac..81ca42e20b7 100644
1078 --- a/profiles/package.mask
1079 +++ b/profiles/package.mask
1080 @@ -481,11 +481,6 @@ media-plugins/vdr-skinnopacity
1081 # #623706. Removal in a month.
1082 net-libs/dhcpcd-dbus
1083
1084 -# Pacho Ramos <pacho@g.o> (14 Jul 2017)
1085 -# Not compatible with python >= 3.5 but neither needed by anything in the
1086 -# tree anymore, bug #624670. Removal in a month.
1087 -=dev-python/beautifulsoup-3.1.0.1-r2
1088 -
1089 # Lars Wendler <polynomial-c@g.o> (07 Jul 2017)
1090 # Masked until >=net-fs/samba-4.7 is in the tree and
1091 # unmasked. (bug #624106)