Gentoo Archives: gentoo-commits

From: "Arfrever Frehtes Taifersar Arahesis (arfrever)" <arfrever@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] gentoo-x86 commit in dev-python/beautifulsoup/files: beautifulsoup-3.1.0.1-python-3.patch
Date: Tue, 08 Sep 2009 19:57:08
Message-Id: E1Ml6oJ-00058v-T6@stork.gentoo.org
1 arfrever 09/09/08 19:57:03
2
3 Added: beautifulsoup-3.1.0.1-python-3.patch
4 Log:
5 Add patch for compatibility with Python 3 (upstream patches don't apply cleanly). Set SUPPORT_PYTHON_ABIS.
6 (Portage version: 14218-svn/cvs/Linux x86_64)
7
8 Revision Changes Path
9 1.1 dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-python-3.patch
10
11 file : http://sources.gentoo.org/viewcvs.py/gentoo-x86/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-python-3.patch?rev=1.1&view=markup
12 plain: http://sources.gentoo.org/viewcvs.py/gentoo-x86/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-python-3.patch?rev=1.1&content-type=text/plain
13
14 Index: beautifulsoup-3.1.0.1-python-3.patch
15 ===================================================================
16 --- BeautifulSoup.py
17 +++ BeautifulSoup.py
18 @@ -76,7 +76,7 @@
19 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT.
20
21 """
22 -from __future__ import generators
23 +
24
25 __author__ = "Leonard Richardson (leonardr@××××××××.org)"
26 __version__ = "3.1.0.1"
27 @@ -84,12 +84,12 @@
28 __license__ = "New-style BSD"
29
30 import codecs
31 -import markupbase
32 +import _markupbase
33 import types
34 import re
35 -from HTMLParser import HTMLParser, HTMLParseError
36 +from html.parser import HTMLParser, HTMLParseError
37 try:
38 - from htmlentitydefs import name2codepoint
39 + from html.entities import name2codepoint
40 except ImportError:
41 name2codepoint = {}
42 try:
43 @@ -98,18 +98,18 @@
44 from sets import Set as set
45
46 #These hacks make Beautiful Soup able to parse XML with namespaces
47 -markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match
48 +_markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match
49
50 DEFAULT_OUTPUT_ENCODING = "utf-8"
51
52 # First, the classes that represent markup elements.
53
54 -def sob(unicode, encoding):
55 +def sob(str, encoding):
56 """Returns either the given Unicode string or its encoding."""
57 if encoding is None:
58 - return unicode
59 + return str
60 else:
61 - return unicode.encode(encoding)
62 + return str.encode(encoding)
63
64 class PageElement:
65 """Contains the navigational information for some part of the page
66 @@ -178,8 +178,8 @@
67 return lastChild
68
69 def insert(self, position, newChild):
70 - if (isinstance(newChild, basestring)
71 - or isinstance(newChild, unicode)) \
72 + if (isinstance(newChild, str)
73 + or isinstance(newChild, str)) \
74 and not isinstance(newChild, NavigableString):
75 newChild = NavigableString(newChild)
76
77 @@ -334,7 +334,7 @@
78 g = generator()
79 while True:
80 try:
81 - i = g.next()
82 + i = g.__next__()
83 except StopIteration:
84 break
85 if i:
86 @@ -385,22 +385,22 @@
87 def toEncoding(self, s, encoding=None):
88 """Encodes an object to a string in some encoding, or to Unicode.
89 ."""
90 - if isinstance(s, unicode):
91 + if isinstance(s, str):
92 if encoding:
93 s = s.encode(encoding)
94 elif isinstance(s, str):
95 if encoding:
96 s = s.encode(encoding)
97 else:
98 - s = unicode(s)
99 + s = str(s)
100 else:
101 if encoding:
102 s = self.toEncoding(str(s), encoding)
103 else:
104 - s = unicode(s)
105 + s = str(s)
106 return s
107
108 -class NavigableString(unicode, PageElement):
109 +class NavigableString(str, PageElement):
110
111 def __new__(cls, value):
112 """Create a new NavigableString.
113 @@ -410,12 +410,12 @@
114 passed in to the superclass's __new__ or the superclass won't know
115 how to handle non-ASCII characters.
116 """
117 - if isinstance(value, unicode):
118 - return unicode.__new__(cls, value)
119 - return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
120 + if isinstance(value, str):
121 + return str.__new__(cls, value)
122 + return str.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
123
124 def __getnewargs__(self):
125 - return (unicode(self),)
126 + return (str(self),)
127
128 def __getattr__(self, attr):
129 """text.string gives you text. This is for backwards
130 @@ -424,7 +424,7 @@
131 if attr == 'string':
132 return self
133 else:
134 - raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)
135 + raise AttributeError("'%s' object has no attribute '%s'" % (self.__class__.__name__, attr))
136
137 def encode(self, encoding=DEFAULT_OUTPUT_ENCODING):
138 return self.decode().encode(encoding)
139 @@ -435,23 +435,23 @@
140 class CData(NavigableString):
141
142 def decodeGivenEventualEncoding(self, eventualEncoding):
143 - return u'<![CDATA[' + self + u']]>'
144 + return '<![CDATA[' + self + ']]>'
145
146 class ProcessingInstruction(NavigableString):
147
148 def decodeGivenEventualEncoding(self, eventualEncoding):
149 output = self
150 - if u'%SOUP-ENCODING%' in output:
151 + if '%SOUP-ENCODING%' in output:
152 output = self.substituteEncoding(output, eventualEncoding)
153 - return u'<?' + output + u'?>'
154 + return '<?' + output + '?>'
155
156 class Comment(NavigableString):
157 def decodeGivenEventualEncoding(self, eventualEncoding):
158 - return u'<!--' + self + u'-->'
159 + return '<!--' + self + '-->'
160
161 class Declaration(NavigableString):
162 def decodeGivenEventualEncoding(self, eventualEncoding):
163 - return u'<!' + self + u'>'
164 + return '<!' + self + '>'
165
166 class Tag(PageElement):
167
168 @@ -460,7 +460,7 @@
169 def _invert(h):
170 "Cheap function to invert a hash."
171 i = {}
172 - for k,v in h.items():
173 + for k,v in list(h.items()):
174 i[v] = k
175 return i
176
177 @@ -479,23 +479,23 @@
178 escaped."""
179 x = match.group(1)
180 if self.convertHTMLEntities and x in name2codepoint:
181 - return unichr(name2codepoint[x])
182 + return chr(name2codepoint[x])
183 elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS:
184 if self.convertXMLEntities:
185 return self.XML_ENTITIES_TO_SPECIAL_CHARS[x]
186 else:
187 - return u'&%s;' % x
188 + return '&%s;' % x
189 elif len(x) > 0 and x[0] == '#':
190 # Handle numeric entities
191 if len(x) > 1 and x[1] == 'x':
192 - return unichr(int(x[2:], 16))
193 + return chr(int(x[2:], 16))
194 else:
195 - return unichr(int(x[1:]))
196 + return chr(int(x[1:]))
197
198 elif self.escapeUnrecognizedEntities:
199 - return u'&amp;%s;' % x
200 + return '&amp;%s;' % x
201 else:
202 - return u'&%s;' % x
203 + return '&%s;' % x
204
205 def __init__(self, parser, name, attrs=None, parent=None,
206 previous=None):
207 @@ -524,7 +524,7 @@
208 return kval
209 return (k, re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);",
210 self._convertEntities, val))
211 - self.attrs = map(convert, self.attrs)
212 + self.attrs = list(map(convert, self.attrs))
213
214 def get(self, key, default=None):
215 """Returns the value of the 'key' attribute for the tag, or
216 @@ -533,7 +533,7 @@
217 return self._getAttrMap().get(key, default)
218
219 def has_key(self, key):
220 - return self._getAttrMap().has_key(key)
221 + return key in self._getAttrMap()
222
223 def __getitem__(self, key):
224 """tag[key] returns the value of the 'key' attribute for the tag,
225 @@ -551,7 +551,7 @@
226 def __contains__(self, x):
227 return x in self.contents
228
229 - def __nonzero__(self):
230 + def __bool__(self):
231 "A tag is non-None even if it has no contents."
232 return True
233
234 @@ -577,14 +577,14 @@
235 #We don't break because bad HTML can define the same
236 #attribute multiple times.
237 self._getAttrMap()
238 - if self.attrMap.has_key(key):
239 + if key in self.attrMap:
240 del self.attrMap[key]
241
242 def __call__(self, *args, **kwargs):
243 """Calling a tag like a function is the same as calling its
244 findAll() method. Eg. tag('a') returns a list of all the A tags
245 found within this tag."""
246 - return apply(self.findAll, args, kwargs)
247 + return self.findAll(*args, **kwargs)
248
249 def __getattr__(self, tag):
250 #print "Getattr %s.%s" % (self.__class__, tag)
251 @@ -592,7 +592,7 @@
252 return self.find(tag[:-3])
253 elif tag.find('__') != 0:
254 return self.find(tag)
255 - raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag)
256 + raise AttributeError("'%s' object has no attribute '%s'" % (self.__class__, tag))
257
258 def __eq__(self, other):
259 """Returns true iff this tag has the same name, the same attributes,
260 @@ -868,7 +868,7 @@
261 if isinstance(markupName, Tag):
262 markup = markupName
263 markupAttrs = markup
264 - callFunctionWithTagData = callable(self.name) \
265 + callFunctionWithTagData = hasattr(self.name, '__call__') \
266 and not isinstance(markupName, Tag)
267
268 if (not self.name) \
269 @@ -880,7 +880,7 @@
270 else:
271 match = True
272 markupAttrMap = None
273 - for attr, matchAgainst in self.attrs.items():
274 + for attr, matchAgainst in list(self.attrs.items()):
275 if not markupAttrMap:
276 if hasattr(markupAttrs, 'get'):
277 markupAttrMap = markupAttrs
278 @@ -921,16 +921,16 @@
279 if self._matches(markup, self.text):
280 found = markup
281 else:
282 - raise Exception, "I don't know how to match against a %s" \
283 - % markup.__class__
284 + raise Exception("I don't know how to match against a %s" \
285 + % markup.__class__)
286 return found
287
288 def _matches(self, markup, matchAgainst):
289 #print "Matching %s against %s" % (markup, matchAgainst)
290 result = False
291 - if matchAgainst == True and type(matchAgainst) == types.BooleanType:
292 + if matchAgainst == True and type(matchAgainst) == bool:
293 result = markup != None
294 - elif callable(matchAgainst):
295 + elif hasattr(matchAgainst, '__call__'):
296 result = matchAgainst(markup)
297 else:
298 #Custom match methods take the tag as an argument, but all
299 @@ -938,7 +938,7 @@
300 if isinstance(markup, Tag):
301 markup = markup.name
302 if markup is not None and not isString(markup):
303 - markup = unicode(markup)
304 + markup = str(markup)
305 #Now we know that chunk is either a string, or None.
306 if hasattr(matchAgainst, 'match'):
307 # It's a regexp object.
308 @@ -947,10 +947,10 @@
309 and (markup is not None or not isString(matchAgainst))):
310 result = markup in matchAgainst
311 elif hasattr(matchAgainst, 'items'):
312 - result = markup.has_key(matchAgainst)
313 + result = matchAgainst in markup
314 elif matchAgainst and isString(markup):
315 - if isinstance(markup, unicode):
316 - matchAgainst = unicode(matchAgainst)
317 + if isinstance(markup, str):
318 + matchAgainst = str(matchAgainst)
319 else:
320 matchAgainst = str(matchAgainst)
321
322 @@ -971,13 +971,13 @@
323 """Convenience method that works with all 2.x versions of Python
324 to determine whether or not something is listlike."""
325 return ((hasattr(l, '__iter__') and not isString(l))
326 - or (type(l) in (types.ListType, types.TupleType)))
327 + or (type(l) in (list, tuple)))
328
329 def isString(s):
330 """Convenience method that works with all 2.x versions of Python
331 to determine whether or not something is stringlike."""
332 try:
333 - return isinstance(s, unicode) or isinstance(s, basestring)
334 + return isinstance(s, str) or isinstance(s, str)
335 except NameError:
336 return isinstance(s, str)
337
338 @@ -989,7 +989,7 @@
339 for portion in args:
340 if hasattr(portion, 'items'):
341 #It's a map. Merge it.
342 - for k,v in portion.items():
343 + for k,v in list(portion.items()):
344 built[k] = v
345 elif isList(portion) and not isString(portion):
346 #It's a list. Map each item to the default.
347 @@ -1034,7 +1034,7 @@
348 object, possibly one with a %SOUP-ENCODING% slot into which an
349 encoding will be plugged later."""
350 if text[:3] == "xml":
351 - text = u"xml version='1.0' encoding='%SOUP-ENCODING%'"
352 + text = "xml version='1.0' encoding='%SOUP-ENCODING%'"
353 self._toStringSubclass(text, ProcessingInstruction)
354
355 def handle_comment(self, text):
356 @@ -1044,7 +1044,7 @@
357 def handle_charref(self, ref):
358 "Handle character references as data."
359 if self.soup.convertEntities:
360 - data = unichr(int(ref))
361 + data = chr(int(ref))
362 else:
363 data = '&#%s;' % ref
364 self.handle_data(data)
365 @@ -1056,7 +1056,7 @@
366 data = None
367 if self.soup.convertHTMLEntities:
368 try:
369 - data = unichr(name2codepoint[ref])
370 + data = chr(name2codepoint[ref])
371 except KeyError:
372 pass
373
374 @@ -1147,7 +1147,7 @@
375 lambda x: '<!' + x.group(1) + '>')
376 ]
377
378 - ROOT_TAG_NAME = u'[document]'
379 + ROOT_TAG_NAME = '[document]'
380
381 HTML_ENTITIES = "html"
382 XML_ENTITIES = "xml"
383 @@ -1236,14 +1236,14 @@
384 def _feed(self, inDocumentEncoding=None, isHTML=False):
385 # Convert the document to Unicode.
386 markup = self.markup
387 - if isinstance(markup, unicode):
388 + if isinstance(markup, str):
389 if not hasattr(self, 'originalEncoding'):
390 self.originalEncoding = None
391 else:
392 dammit = UnicodeDammit\
393 (markup, [self.fromEncoding, inDocumentEncoding],
394 smartQuotesTo=self.smartQuotesTo, isHTML=isHTML)
395 - markup = dammit.unicode
396 + markup = dammit.str
397 self.originalEncoding = dammit.originalEncoding
398 self.declaredHTMLEncoding = dammit.declaredHTMLEncoding
399 if markup:
400 @@ -1269,8 +1269,8 @@
401 def isSelfClosingTag(self, name):
402 """Returns true iff the given string is the name of a
403 self-closing tag according to this parser."""
404 - return self.SELF_CLOSING_TAGS.has_key(name) \
405 - or self.instanceSelfClosingTags.has_key(name)
406 + return name in self.SELF_CLOSING_TAGS \
407 + or name in self.instanceSelfClosingTags
408
409 def reset(self):
410 Tag.__init__(self, self, self.ROOT_TAG_NAME)
411 @@ -1305,7 +1305,7 @@
412
413 def endData(self, containerClass=NavigableString):
414 if self.currentData:
415 - currentData = u''.join(self.currentData)
416 + currentData = ''.join(self.currentData)
417 if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and
418 not set([tag.name for tag in self.tagStack]).intersection(
419 self.PRESERVE_WHITESPACE_TAGS)):
420 @@ -1368,7 +1368,7 @@
421
422 nestingResetTriggers = self.NESTABLE_TAGS.get(name)
423 isNestable = nestingResetTriggers != None
424 - isResetNesting = self.RESET_NESTING_TAGS.has_key(name)
425 + isResetNesting = name in self.RESET_NESTING_TAGS
426 popTo = None
427 inclusive = True
428 for i in range(len(self.tagStack)-1, 0, -1):
429 @@ -1381,7 +1381,7 @@
430 if (nestingResetTriggers != None
431 and p.name in nestingResetTriggers) \
432 or (nestingResetTriggers == None and isResetNesting
433 - and self.RESET_NESTING_TAGS.has_key(p.name)):
434 + and p.name in self.RESET_NESTING_TAGS):
435
436 #If we encounter one of the nesting reset triggers
437 #peculiar to this tag, or we encounter another tag
438 @@ -1399,7 +1399,7 @@
439 if self.quoteStack:
440 #This is not a real tag.
441 #print "<%s> is not real!" % name
442 - attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs))
443 + attrs = ''.join([' %s="%s"' % (x_y[0], x_y[1]) for x_y in attrs])
444 self.handle_data('<%s%s>' % (name, attrs))
445 return
446 self.endData()
447 @@ -1493,7 +1493,7 @@
448 BeautifulStoneSoup before writing your own subclass."""
449
450 def __init__(self, *args, **kwargs):
451 - if not kwargs.has_key('smartQuotesTo'):
452 + if 'smartQuotesTo' not in kwargs:
453 kwargs['smartQuotesTo'] = self.HTML_ENTITIES
454 kwargs['isHTML'] = True
455 BeautifulStoneSoup.__init__(self, *args, **kwargs)
456 @@ -1677,7 +1677,7 @@
457 parent._getAttrMap()
458 if (isinstance(tag, Tag) and len(tag.contents) == 1 and
459 isinstance(tag.contents[0], NavigableString) and
460 - not parent.attrMap.has_key(tag.name)):
461 + tag.name not in parent.attrMap):
462 parent[tag.name] = tag.contents[0]
463 BeautifulStoneSoup.popTag(self)
464
465 @@ -1751,9 +1751,9 @@
466 self._detectEncoding(markup, isHTML)
467 self.smartQuotesTo = smartQuotesTo
468 self.triedEncodings = []
469 - if markup == '' or isinstance(markup, unicode):
470 + if markup == '' or isinstance(markup, str):
471 self.originalEncoding = None
472 - self.unicode = unicode(markup)
473 + self.str = str(markup)
474 return
475
476 u = None
477 @@ -1766,7 +1766,7 @@
478 if u: break
479
480 # If no luck and we have auto-detection library, try that:
481 - if not u and chardet and not isinstance(self.markup, unicode):
482 + if not u and chardet and not isinstance(self.markup, str):
483 u = self._convertFrom(chardet.detect(self.markup)['encoding'])
484
485 # As a last resort, try utf-8 and windows-1252:
486 @@ -1775,7 +1775,7 @@
487 u = self._convertFrom(proposed_encoding)
488 if u: break
489
490 - self.unicode = u
491 + self.str = u
492 if not u: self.originalEncoding = None
493
494 def _subMSChar(self, match):
495 @@ -1783,7 +1783,7 @@
496 entity."""
497 orig = match.group(1)
498 sub = self.MS_CHARS.get(orig)
499 - if type(sub) == types.TupleType:
500 + if type(sub) == tuple:
501 if self.smartQuotesTo == 'xml':
502 sub = '&#x'.encode() + sub[1].encode() + ';'.encode()
503 else:
504 @@ -1804,7 +1804,7 @@
505 if self.smartQuotesTo and proposed.lower() in("windows-1252",
506 "iso-8859-1",
507 "iso-8859-2"):
508 - smart_quotes_re = "([\x80-\x9f])"
509 + smart_quotes_re = b"([\x80-\x9f])"
510 smart_quotes_compiled = re.compile(smart_quotes_re)
511 markup = smart_quotes_compiled.sub(self._subMSChar, markup)
512
513 @@ -1813,7 +1813,7 @@
514 u = self._toUnicode(markup, proposed)
515 self.markup = u
516 self.originalEncoding = proposed
517 - except Exception, e:
518 + except Exception as e:
519 # print "That didn't work!"
520 # print e
521 return None
522 @@ -1842,7 +1842,7 @@
523 elif data[:4] == '\xff\xfe\x00\x00':
524 encoding = 'utf-32le'
525 data = data[4:]
526 - newdata = unicode(data, encoding)
527 + newdata = str(data, encoding)
528 return newdata
529
530 def _detectEncoding(self, xml_data, isHTML=False):
531 @@ -1855,41 +1855,41 @@
532 elif xml_data[:4] == '\x00\x3c\x00\x3f':
533 # UTF-16BE
534 sniffed_xml_encoding = 'utf-16be'
535 - xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')
536 + xml_data = str(xml_data, 'utf-16be').encode('utf-8')
537 elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \
538 and (xml_data[2:4] != '\x00\x00'):
539 # UTF-16BE with BOM
540 sniffed_xml_encoding = 'utf-16be'
541 - xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')
542 + xml_data = str(xml_data[2:], 'utf-16be').encode('utf-8')
543 elif xml_data[:4] == '\x3c\x00\x3f\x00':
544 # UTF-16LE
545 sniffed_xml_encoding = 'utf-16le'
546 - xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')
547 + xml_data = str(xml_data, 'utf-16le').encode('utf-8')
548 elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \
549 (xml_data[2:4] != '\x00\x00'):
550 # UTF-16LE with BOM
551 sniffed_xml_encoding = 'utf-16le'
552 - xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')
553 + xml_data = str(xml_data[2:], 'utf-16le').encode('utf-8')
554 elif xml_data[:4] == '\x00\x00\x00\x3c':
555 # UTF-32BE
556 sniffed_xml_encoding = 'utf-32be'
557 - xml_data = unicode(xml_data, 'utf-32be').encode('utf-8')
558 + xml_data = str(xml_data, 'utf-32be').encode('utf-8')
559 elif xml_data[:4] == '\x3c\x00\x00\x00':
560 # UTF-32LE
561 sniffed_xml_encoding = 'utf-32le'
562 - xml_data = unicode(xml_data, 'utf-32le').encode('utf-8')
563 + xml_data = str(xml_data, 'utf-32le').encode('utf-8')
564 elif xml_data[:4] == '\x00\x00\xfe\xff':
565 # UTF-32BE with BOM
566 sniffed_xml_encoding = 'utf-32be'
567 - xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8')
568 + xml_data = str(xml_data[4:], 'utf-32be').encode('utf-8')
569 elif xml_data[:4] == '\xff\xfe\x00\x00':
570 # UTF-32LE with BOM
571 sniffed_xml_encoding = 'utf-32le'
572 - xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8')
573 + xml_data = str(xml_data[4:], 'utf-32le').encode('utf-8')
574 elif xml_data[:3] == '\xef\xbb\xbf':
575 # UTF-8 with BOM
576 sniffed_xml_encoding = 'utf-8'
577 - xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8')
578 + xml_data = str(xml_data[3:], 'utf-8').encode('utf-8')
579 else:
580 sniffed_xml_encoding = 'ascii'
581 pass
582 @@ -1954,41 +1954,41 @@
583 250,251,252,253,254,255)
584 import string
585 c.EBCDIC_TO_ASCII_MAP = string.maketrans( \
586 - ''.join(map(chr, range(256))), ''.join(map(chr, emap)))
587 + ''.join(map(chr, list(range(256)))), ''.join(map(chr, emap)))
588 return s.translate(c.EBCDIC_TO_ASCII_MAP)
589
590 - MS_CHARS = { '\x80' : ('euro', '20AC'),
591 - '\x81' : ' ',
592 - '\x82' : ('sbquo', '201A'),
593 - '\x83' : ('fnof', '192'),
594 - '\x84' : ('bdquo', '201E'),
595 - '\x85' : ('hellip', '2026'),
596 - '\x86' : ('dagger', '2020'),
597 - '\x87' : ('Dagger', '2021'),
598 - '\x88' : ('circ', '2C6'),
599 - '\x89' : ('permil', '2030'),
600 - '\x8A' : ('Scaron', '160'),
601 - '\x8B' : ('lsaquo', '2039'),
602 - '\x8C' : ('OElig', '152'),
603 - '\x8D' : '?',
604 - '\x8E' : ('#x17D', '17D'),
605 - '\x8F' : '?',
606 - '\x90' : '?',
607 - '\x91' : ('lsquo', '2018'),
608 - '\x92' : ('rsquo', '2019'),
609 - '\x93' : ('ldquo', '201C'),
610 - '\x94' : ('rdquo', '201D'),
611 - '\x95' : ('bull', '2022'),
612 - '\x96' : ('ndash', '2013'),
613 - '\x97' : ('mdash', '2014'),
614 - '\x98' : ('tilde', '2DC'),
615 - '\x99' : ('trade', '2122'),
616 - '\x9a' : ('scaron', '161'),
617 - '\x9b' : ('rsaquo', '203A'),
618 - '\x9c' : ('oelig', '153'),
619 - '\x9d' : '?',
620 - '\x9e' : ('#x17E', '17E'),
621 - '\x9f' : ('Yuml', ''),}
622 + MS_CHARS = { b'\x80' : ('euro', '20AC'),
623 + b'\x81' : ' ',
624 + b'\x82' : ('sbquo', '201A'),
625 + b'\x83' : ('fnof', '192'),
626 + b'\x84' : ('bdquo', '201E'),
627 + b'\x85' : ('hellip', '2026'),
628 + b'\x86' : ('dagger', '2020'),
629 + b'\x87' : ('Dagger', '2021'),
630 + b'\x88' : ('circ', '2C6'),
631 + b'\x89' : ('permil', '2030'),
632 + b'\x8A' : ('Scaron', '160'),
633 + b'\x8B' : ('lsaquo', '2039'),
634 + b'\x8C' : ('OElig', '152'),
635 + b'\x8D' : '?',
636 + b'\x8E' : ('#x17D', '17D'),
637 + b'\x8F' : '?',
638 + b'\x90' : '?',
639 + b'\x91' : ('lsquo', '2018'),
640 + b'\x92' : ('rsquo', '2019'),
641 + b'\x93' : ('ldquo', '201C'),
642 + b'\x94' : ('rdquo', '201D'),
643 + b'\x95' : ('bull', '2022'),
644 + b'\x96' : ('ndash', '2013'),
645 + b'\x97' : ('mdash', '2014'),
646 + b'\x98' : ('tilde', '2DC'),
647 + b'\x99' : ('trade', '2122'),
648 + b'\x9a' : ('scaron', '161'),
649 + b'\x9b' : ('rsaquo', '203A'),
650 + b'\x9c' : ('oelig', '153'),
651 + b'\x9d' : '?',
652 + b'\x9e' : ('#x17E', '17E'),
653 + b'\x9f' : ('Yuml', ''),}
654
655 #######################################################################
656
657 @@ -1997,4 +1997,4 @@
658 if __name__ == '__main__':
659 import sys
660 soup = BeautifulSoup(sys.stdin)
661 - print soup.prettify()
662 + print(soup.prettify())
663 --- BeautifulSoupTests.py
664 +++ BeautifulSoupTests.py
665 @@ -82,7 +82,7 @@
666 def testFindAllText(self):
667 soup = BeautifulSoup("<html>\xbb</html>")
668 self.assertEqual(soup.findAll(text=re.compile('.*')),
669 - [u'\xbb'])
670 + ['\xbb'])
671
672 def testFindAllByRE(self):
673 import re
674 @@ -215,7 +215,7 @@
675 soup = BeautifulSoup(self.x, parseOnlyThese=strainer)
676 self.assertEquals(len(soup), 10)
677
678 - strainer = SoupStrainer(text=lambda(x):x[8]=='3')
679 + strainer = SoupStrainer(text=lambda x:x[8]=='3')
680 soup = BeautifulSoup(self.x, parseOnlyThese=strainer)
681 self.assertEquals(len(soup), 3)
682
683 @@ -256,7 +256,7 @@
684 self.assertEqual(copied.decode(), self.soup.decode())
685
686 def testUnicodePickle(self):
687 - import cPickle as pickle
688 + import pickle as pickle
689 html = "<b>" + chr(0xc3) + "</b>"
690 soup = BeautifulSoup(html)
691 dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL)
692 @@ -586,23 +586,23 @@
693 self.assertEquals(soup.decode(), "<<sacr&eacute; bleu!>>")
694
695 soup = BeautifulStoneSoup(text, convertEntities=htmlEnt)
696 - self.assertEquals(soup.decode(), u"<<sacr\xe9 bleu!>>")
697 + self.assertEquals(soup.decode(), "<<sacr\xe9 bleu!>>")
698
699 # Make sure the "XML", "HTML", and "XHTML" settings work.
700 text = "&lt;&trade;&apos;"
701 soup = BeautifulStoneSoup(text, convertEntities=xmlEnt)
702 - self.assertEquals(soup.decode(), u"<&trade;'")
703 + self.assertEquals(soup.decode(), "<&trade;'")
704
705 soup = BeautifulStoneSoup(text, convertEntities=htmlEnt)
706 - self.assertEquals(soup.decode(), u"<\u2122&apos;")
707 + self.assertEquals(soup.decode(), "<\u2122&apos;")
708
709 soup = BeautifulStoneSoup(text, convertEntities=xhtmlEnt)
710 - self.assertEquals(soup.decode(), u"<\u2122'")
711 + self.assertEquals(soup.decode(), "<\u2122'")
712
713 def testNonBreakingSpaces(self):
714 soup = BeautifulSoup("<a>&nbsp;&nbsp;</a>",
715 convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
716 - self.assertEquals(soup.decode(), u"<a>\xa0\xa0</a>")
717 + self.assertEquals(soup.decode(), "<a>\xa0\xa0</a>")
718
719 def testWhitespaceInDeclaration(self):
720 self.assertSoupEquals('<! DOCTYPE>', '<!DOCTYPE>')
721 @@ -617,27 +617,27 @@
722 self.assertSoupEquals('<b>hello&nbsp;there</b>')
723
724 def testEntitiesInAttributeValues(self):
725 - self.assertSoupEquals('<x t="x&#241;">', '<x t="x\xc3\xb1"></x>',
726 + self.assertSoupEquals('<x t="x&#241;">', b'<x t="x\xc3\xb1"></x>',
727 encoding='utf-8')
728 - self.assertSoupEquals('<x t="x&#xf1;">', '<x t="x\xc3\xb1"></x>',
729 + self.assertSoupEquals('<x t="x&#xf1;">', b'<x t="x\xc3\xb1"></x>',
730 encoding='utf-8')
731
732 soup = BeautifulSoup('<x t="&gt;&trade;">',
733 convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
734 - self.assertEquals(soup.decode(), u'<x t="&gt;\u2122"></x>')
735 + self.assertEquals(soup.decode(), '<x t="&gt;\u2122"></x>')
736
737 uri = "http://crummy.com?sacr&eacute;&amp;bleu"
738 link = '<a href="%s"></a>' % uri
739
740 soup = BeautifulSoup(link, convertEntities=BeautifulSoup.HTML_ENTITIES)
741 self.assertEquals(soup.decode(),
742 - link.replace("&eacute;", u"\xe9"))
743 + link.replace("&eacute;", "\xe9"))
744
745 uri = "http://crummy.com?sacr&eacute;&bleu"
746 link = '<a href="%s"></a>' % uri
747 soup = BeautifulSoup(link, convertEntities=BeautifulSoup.HTML_ENTITIES)
748 self.assertEquals(soup.a['href'],
749 - uri.replace("&eacute;", u"\xe9"))
750 + uri.replace("&eacute;", "\xe9"))
751
752 def testNakedAmpersands(self):
753 html = {'convertEntities':BeautifulStoneSoup.HTML_ENTITIES}
754 @@ -663,13 +663,13 @@
755 smart quote fixes."""
756
757 def testUnicodeDammitStandalone(self):
758 - markup = "<foo>\x92</foo>"
759 + markup = b"<foo>\x92</foo>"
760 dammit = UnicodeDammit(markup)
761 - self.assertEquals(dammit.unicode, "<foo>&#x2019;</foo>")
762 + self.assertEquals(dammit.str, "<foo>&#x2019;</foo>")
763
764 - hebrew = "\xed\xe5\xec\xf9"
765 + hebrew = b"\xed\xe5\xec\xf9"
766 dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
767 - self.assertEquals(dammit.unicode, u'\u05dd\u05d5\u05dc\u05e9')
768 + self.assertEquals(dammit.str, '\u05dd\u05d5\u05dc\u05e9')
769 self.assertEquals(dammit.originalEncoding, 'iso-8859-8')
770
771 def testGarbageInGarbageOut(self):
772 @@ -677,13 +677,13 @@
773 asciiSoup = BeautifulStoneSoup(ascii)
774 self.assertEquals(ascii, asciiSoup.decode())
775
776 - unicodeData = u"<foo>\u00FC</foo>"
777 + unicodeData = "<foo>\u00FC</foo>"
778 utf8 = unicodeData.encode("utf-8")
779 - self.assertEquals(utf8, '<foo>\xc3\xbc</foo>')
780 + self.assertEquals(utf8, b'<foo>\xc3\xbc</foo>')
781
782 unicodeSoup = BeautifulStoneSoup(unicodeData)
783 self.assertEquals(unicodeData, unicodeSoup.decode())
784 - self.assertEquals(unicodeSoup.foo.string, u'\u00FC')
785 + self.assertEquals(unicodeSoup.foo.string, '\u00FC')
786
787 utf8Soup = BeautifulStoneSoup(utf8, fromEncoding='utf-8')
788 self.assertEquals(utf8, utf8Soup.encode('utf-8'))
789 @@ -696,18 +696,18 @@
790
791 def testHandleInvalidCodec(self):
792 for bad_encoding in ['.utf8', '...', 'utF---16.!']:
793 - soup = BeautifulSoup(u"Räksmörgås".encode("utf-8"),
794 + soup = BeautifulSoup("Räksmörgås".encode("utf-8"),
795 fromEncoding=bad_encoding)
796 self.assertEquals(soup.originalEncoding, 'utf-8')
797
798 def testUnicodeSearch(self):
799 - html = u'<html><body><h1>Räksmörgås</h1></body></html>'
800 + html = '<html><body><h1>Räksmörgås</h1></body></html>'
801 soup = BeautifulSoup(html)
802 - self.assertEqual(soup.find(text=u'Räksmörgås'),u'Räksmörgås')
803 + self.assertEqual(soup.find(text='Räksmörgås'),'Räksmörgås')
804
805 def testRewrittenXMLHeader(self):
806 - euc_jp = '<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n'
807 - utf8 = "<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n"
808 + euc_jp = b'<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n'
809 + utf8 = b"<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n"
810 soup = BeautifulStoneSoup(euc_jp)
811 if soup.originalEncoding != "euc-jp":
812 raise Exception("Test failed when parsing euc-jp document. "
813 @@ -718,12 +718,12 @@
814 self.assertEquals(soup.originalEncoding, "euc-jp")
815 self.assertEquals(soup.renderContents('utf-8'), utf8)
816
817 - old_text = "<?xml encoding='windows-1252'><foo>\x92</foo>"
818 + old_text = b"<?xml encoding='windows-1252'><foo>\x92</foo>"
819 new_text = "<?xml version='1.0' encoding='utf-8'?><foo>&rsquo;</foo>"
820 self.assertSoupEquals(old_text, new_text)
821
822 def testRewrittenMetaTag(self):
823 - no_shift_jis_html = '''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>'''
824 + no_shift_jis_html = b'''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>'''
825 soup = BeautifulSoup(no_shift_jis_html)
826
827 # Beautiful Soup used to try to rewrite the meta tag even if the
828 @@ -733,16 +733,16 @@
829 soup = BeautifulSoup(no_shift_jis_html, parseOnlyThese=strainer)
830 self.assertEquals(soup.contents[0].name, 'pre')
831
832 - meta_tag = ('<meta content="text/html; charset=x-sjis" '
833 - 'http-equiv="Content-type" />')
834 + meta_tag = (b'<meta content="text/html; charset=x-sjis" '
835 + b'http-equiv="Content-type" />')
836 shift_jis_html = (
837 - '<html><head>\n%s\n'
838 - '<meta http-equiv="Content-language" content="ja" />'
839 - '</head><body><pre>\n'
840 - '\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
841 - '\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
842 - '\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n'
843 - '</pre></body></html>') % meta_tag
844 + b'<html><head>\n' + meta_tag + b'\n'
845 + b'<meta http-equiv="Content-language" content="ja" />'
846 + b'</head><body><pre>\n'
847 + b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
848 + b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
849 + b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n'
850 + b'</pre></body></html>')
851 soup = BeautifulSoup(shift_jis_html)
852 if soup.originalEncoding != "shift-jis":
853 raise Exception("Test failed when parsing shift-jis document "
854 @@ -755,59 +755,59 @@
855 content_type_tag = soup.meta['content']
856 self.assertEquals(content_type_tag[content_type_tag.find('charset='):],
857 'charset=%SOUP-ENCODING%')
858 - content_type = str(soup.meta)
859 + content_type = soup.meta.decode()
860 index = content_type.find('charset=')
861 self.assertEqual(content_type[index:index+len('charset=utf8')+1],
862 'charset=utf-8')
863 content_type = soup.meta.encode('shift-jis')
864 - index = content_type.find('charset=')
865 + index = content_type.find(b'charset=')
866 self.assertEqual(content_type[index:index+len('charset=shift-jis')],
867 'charset=shift-jis'.encode())
868
869 self.assertEquals(soup.encode('utf-8'), (
870 - '<html><head>\n'
871 - '<meta content="text/html; charset=utf-8" '
872 - 'http-equiv="Content-type" />\n'
873 - '<meta http-equiv="Content-language" content="ja" />'
874 - '</head><body><pre>\n'
875 - '\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3'
876 - '\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3'
877 - '\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6'
878 - '\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3'
879 - '\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n'
880 - '</pre></body></html>'))
881 + b'<html><head>\n'
882 + b'<meta content="text/html; charset=utf-8" '
883 + b'http-equiv="Content-type" />\n'
884 + b'<meta http-equiv="Content-language" content="ja" />'
885 + b'</head><body><pre>\n'
886 + b'\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3'
887 + b'\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3'
888 + b'\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6'
889 + b'\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3'
890 + b'\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n'
891 + b'</pre></body></html>'))
892 self.assertEquals(soup.encode("shift-jis"),
893 shift_jis_html.replace('x-sjis'.encode(),
894 'shift-jis'.encode()))
895
896 - isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""
897 + isolatin = b"""<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""
898 soup = BeautifulSoup(isolatin)
899
900 utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode())
901 - utf8 = utf8.replace("\xe9", "\xc3\xa9")
902 + utf8 = utf8.replace(b"\xe9", b"\xc3\xa9")
903 self.assertSoupEquals(soup.encode("utf-8"), utf8, encoding='utf-8')
904
905 def testHebrew(self):
906 - iso_8859_8= '<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n'
907 - utf8 = '<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n'
908 + iso_8859_8= b'<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n'
909 + utf8 = b'<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n'
910 soup = BeautifulStoneSoup(iso_8859_8, fromEncoding="iso-8859-8")
911 self.assertEquals(soup.encode('utf-8'), utf8)
912
913 def testSmartQuotesNotSoSmartAnymore(self):
914 - self.assertSoupEquals("\x91Foo\x92 <!--blah-->",
915 + self.assertSoupEquals(b"\x91Foo\x92 <!--blah-->",
916 '&lsquo;Foo&rsquo; <!--blah-->')
917
918 def testDontConvertSmartQuotesWhenAlsoConvertingEntities(self):
919 - smartQuotes = "Il a dit, \x8BSacr&eacute; bl&#101;u!\x9b"
920 + smartQuotes = b"Il a dit, \x8BSacr&eacute; bl&#101;u!\x9b"
921 soup = BeautifulSoup(smartQuotes)
922 self.assertEquals(soup.decode(),
923 'Il a dit, &lsaquo;Sacr&eacute; bl&#101;u!&rsaquo;')
924 soup = BeautifulSoup(smartQuotes, convertEntities="html")
925 self.assertEquals(soup.encode('utf-8'),
926 - 'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba')
927 + b'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba')
928
929 def testDontSeeSmartQuotesWhereThereAreNone(self):
930 - utf_8 = "\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
931 + utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
932 self.assertSoupEquals(utf_8, encoding='utf-8')
933
934
935 --- setup.py
936 +++ setup.py
937 @@ -19,19 +19,19 @@
938 suite = loader.loadTestsFromModule(BeautifulSoupTests)
939 suite.run(result)
940 if not result.wasSuccessful():
941 - print "Unit tests have failed!"
942 + print("Unit tests have failed!")
943 for l in result.errors, result.failures:
944 for case, error in l:
945 - print "-" * 80
946 + print("-" * 80)
947 desc = case.shortDescription()
948 if desc:
949 - print desc
950 - print error
951 - print '''If you see an error like: "'ascii' codec can't encode character...", see\nthe Beautiful Soup documentation:\n http://www.crummy.com/software/BeautifulSoup/documentation.html#Why%20can't%20Beautiful%20Soup%20print%20out%20the%20non-ASCII%20characters%20I%20gave%20it?'''
952 - print "This might or might not be a problem depending on what you plan to do with\nBeautiful Soup."
953 + print(desc)
954 + print(error)
955 + print('''If you see an error like: "'ascii' codec can't encode character...", see\nthe Beautiful Soup documentation:\n http://www.crummy.com/software/BeautifulSoup/documentation.html#Why%20can't%20Beautiful%20Soup%20print%20out%20the%20non-ASCII%20characters%20I%20gave%20it?''')
956 + print("This might or might not be a problem depending on what you plan to do with\nBeautiful Soup.")
957 if sys.argv[1] == 'sdist':
958 - print
959 - print "I'm not going to make a source distribution since the tests don't pass."
960 + print()
961 + print("I'm not going to make a source distribution since the tests don't pass.")
962 sys.exit(1)
963
964 setup(name="BeautifulSoup",