[gentoo-commits] gentoo-x86 commit in dev-python/beautifulsoup/files: beautifulsoup-3.1.0.1-python-3.patch - gentoo-commits

From:	"Arfrever Frehtes Taifersar Arahesis (arfrever)" <arfrever@g.o>
To:	gentoo-commits@l.g.o
Subject:	[gentoo-commits] gentoo-x86 commit in dev-python/beautifulsoup/files: beautifulsoup-3.1.0.1-python-3.patch
Date:	Tue, 08 Sep 2009 19:57:08
Message-Id:	`E1Ml6oJ-00058v-T6@stork.gentoo.org`

1

arfrever    09/09/08 19:57:03

2

3

  Added:                beautifulsoup-3.1.0.1-python-3.patch

4

  Log:

5

  Add patch for compatibility with Python 3 (upstream patches don't apply cleanly). Set SUPPORT_PYTHON_ABIS.

6

  (Portage version: 14218-svn/cvs/Linux x86_64)

7

8

Revision  Changes    Path

9

1.1                  dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-python-3.patch

10

11

file : http://sources.gentoo.org/viewcvs.py/gentoo-x86/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-python-3.patch?rev=1.1&view=markup

12

plain: http://sources.gentoo.org/viewcvs.py/gentoo-x86/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-python-3.patch?rev=1.1&content-type=text/plain

13

14

Index: beautifulsoup-3.1.0.1-python-3.patch

15

===================================================================

16

--- BeautifulSoup.py

17

+++ BeautifulSoup.py

18

@@ -76,7 +76,7 @@

19

 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT.

20

21

"""

22

-from __future__ import generators

23

+

24

25

 __author__ = "Leonard Richardson (leonardr@××××××××.org)"

26

 __version__ = "3.1.0.1"

27

@@ -84,12 +84,12 @@

28

 __license__ = "New-style BSD"

29

30

 import codecs

31

-import markupbase

32

+import _markupbase

33

 import types

34

 import re

35

-from HTMLParser import HTMLParser, HTMLParseError

36

+from html.parser import HTMLParser, HTMLParseError

37

 try:

38

-    from htmlentitydefs import name2codepoint

39

+    from html.entities import name2codepoint

40

 except ImportError:

41

     name2codepoint = {}

42

 try:

43

@@ -98,18 +98,18 @@

44

     from sets import Set as set

45

46

 #These hacks make Beautiful Soup able to parse XML with namespaces

47

-markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match

48

+_markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match

49

50

 DEFAULT_OUTPUT_ENCODING = "utf-8"

51

52

 # First, the classes that represent markup elements.

53

54

-def sob(unicode, encoding):

55

+def sob(str, encoding):

56

     """Returns either the given Unicode string or its encoding."""

57

     if encoding is None:

58

-        return unicode

59

+        return str

60

     else:

61

-        return unicode.encode(encoding)

62

+        return str.encode(encoding)

63

64

 class PageElement:

65

     """Contains the navigational information for some part of the page

66

@@ -178,8 +178,8 @@

67

         return lastChild

68

69

     def insert(self, position, newChild):

70

-        if (isinstance(newChild, basestring)

71

-            or isinstance(newChild, unicode)) \

72

+        if (isinstance(newChild, str)

73

+            or isinstance(newChild, str)) \

74

             and not isinstance(newChild, NavigableString):

75

             newChild = NavigableString(newChild)

76

77

@@ -334,7 +334,7 @@

78

         g = generator()

79

         while True:

80

             try:

81

-                i = g.next()

82

+                i = g.__next__()

83

             except StopIteration:

84

                 break

85

             if i:

86

@@ -385,22 +385,22 @@

87

     def toEncoding(self, s, encoding=None):

88

         """Encodes an object to a string in some encoding, or to Unicode.

89

         ."""

90

-        if isinstance(s, unicode):

91

+        if isinstance(s, str):

92

             if encoding:

93

                 s = s.encode(encoding)

94

         elif isinstance(s, str):

95

             if encoding:

96

                 s = s.encode(encoding)

97

             else:

98

-                s = unicode(s)

99

+                s = str(s)

100

         else:

101

             if encoding:

102

                 s  = self.toEncoding(str(s), encoding)

103

             else:

104

-                s = unicode(s)

105

+                s = str(s)

106

         return s

107

108

-class NavigableString(unicode, PageElement):

109

+class NavigableString(str, PageElement):

110

111

     def __new__(cls, value):

112

         """Create a new NavigableString.

113

@@ -410,12 +410,12 @@

114

         passed in to the superclass's __new__ or the superclass won't know

115

         how to handle non-ASCII characters.

116

"""

117

-        if isinstance(value, unicode):

118

-            return unicode.__new__(cls, value)

119

-        return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)

120

+        if isinstance(value, str):

121

+            return str.__new__(cls, value)

122

+        return str.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)

123

124

     def __getnewargs__(self):

125

-        return (unicode(self),)

126

+        return (str(self),)

127

128

     def __getattr__(self, attr):

129

         """text.string gives you text. This is for backwards

130

@@ -424,7 +424,7 @@

131

         if attr == 'string':

132

             return self

133

         else:

134

-            raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)

135

+            raise AttributeError("'%s' object has no attribute '%s'" % (self.__class__.__name__, attr))

136

137

     def encode(self, encoding=DEFAULT_OUTPUT_ENCODING):

138

         return self.decode().encode(encoding)

139

@@ -435,23 +435,23 @@

140

 class CData(NavigableString):

141

142

     def decodeGivenEventualEncoding(self, eventualEncoding):

143

-        return u'<![CDATA[' + self + u']]>'

144

+        return '<![CDATA[' + self + ']]>'

145

146

 class ProcessingInstruction(NavigableString):

147

148

     def decodeGivenEventualEncoding(self, eventualEncoding):

149

         output = self

150

-        if u'%SOUP-ENCODING%' in output:

151

+        if '%SOUP-ENCODING%' in output:

152

             output = self.substituteEncoding(output, eventualEncoding)

153

-        return u'<?' + output + u'?>'

154

+        return '<?' + output + '?>'

155

156

 class Comment(NavigableString):

157

     def decodeGivenEventualEncoding(self, eventualEncoding):

158

-        return u'<!--' + self + u'-->'

159

+        return '<!--' + self + '-->'

160

161

 class Declaration(NavigableString):

162

     def decodeGivenEventualEncoding(self, eventualEncoding):

163

-        return u'<!' + self + u'>'

164

+        return '<!' + self + '>'

165

166

 class Tag(PageElement):

167

168

@@ -460,7 +460,7 @@

169

     def _invert(h):

170

         "Cheap function to invert a hash."

171

         i = {}

172

-        for k,v in h.items():

173

+        for k,v in list(h.items()):

174

             i[v] = k

175

         return i

176

177

@@ -479,23 +479,23 @@

178

         escaped."""

179

         x = match.group(1)

180

         if self.convertHTMLEntities and x in name2codepoint:

181

-            return unichr(name2codepoint[x])

182

+            return chr(name2codepoint[x])

183

         elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS:

184

             if self.convertXMLEntities:

185

                 return self.XML_ENTITIES_TO_SPECIAL_CHARS[x]

186

             else:

187

-                return u'&%s;' % x

188

+                return '&%s;' % x

189

         elif len(x) > 0 and x[0] == '#':

190

             # Handle numeric entities

191

             if len(x) > 1 and x[1] == 'x':

192

-                return unichr(int(x[2:], 16))

193

+                return chr(int(x[2:], 16))

194

             else:

195

-                return unichr(int(x[1:]))

196

+                return chr(int(x[1:]))

197

198

         elif self.escapeUnrecognizedEntities:

199

-            return u'&amp;%s;' % x

200

+            return '&amp;%s;' % x

201

         else:

202

-            return u'&%s;' % x

203

+            return '&%s;' % x

204

205

     def __init__(self, parser, name, attrs=None, parent=None,

206

                  previous=None):

207

@@ -524,7 +524,7 @@

208

                 return kval

209

             return (k, re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);",

210

                               self._convertEntities, val))

211

-        self.attrs = map(convert, self.attrs)

212

+        self.attrs = list(map(convert, self.attrs))

213

214

     def get(self, key, default=None):

215

         """Returns the value of the 'key' attribute for the tag, or

216

@@ -533,7 +533,7 @@

217

         return self._getAttrMap().get(key, default)

218

219

     def has_key(self, key):

220

-        return self._getAttrMap().has_key(key)

221

+        return key in self._getAttrMap()

222

223

     def __getitem__(self, key):

224

         """tag[key] returns the value of the 'key' attribute for the tag,

225

@@ -551,7 +551,7 @@

226

     def __contains__(self, x):

227

         return x in self.contents

228

229

-    def __nonzero__(self):

230

+    def __bool__(self):

231

         "A tag is non-None even if it has no contents."

232

         return True

233

234

@@ -577,14 +577,14 @@

235

                 #We don't break because bad HTML can define the same

236

                 #attribute multiple times.

237

             self._getAttrMap()

238

-            if self.attrMap.has_key(key):

239

+            if key in self.attrMap:

240

                 del self.attrMap[key]

241

242

     def __call__(self, *args, **kwargs):

243

         """Calling a tag like a function is the same as calling its

244

         findAll() method. Eg. tag('a') returns a list of all the A tags

245

         found within this tag."""

246

-        return apply(self.findAll, args, kwargs)

247

+        return self.findAll(*args, **kwargs)

248

249

     def __getattr__(self, tag):

250

         #print "Getattr %s.%s" % (self.__class__, tag)

251

@@ -592,7 +592,7 @@

252

             return self.find(tag[:-3])

253

         elif tag.find('__') != 0:

254

             return self.find(tag)

255

-        raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag)

256

+        raise AttributeError("'%s' object has no attribute '%s'" % (self.__class__, tag))

257

258

     def __eq__(self, other):

259

         """Returns true iff this tag has the same name, the same attributes,

260

@@ -868,7 +868,7 @@

261

         if isinstance(markupName, Tag):

262

             markup = markupName

263

             markupAttrs = markup

264

-        callFunctionWithTagData = callable(self.name) \

265

+        callFunctionWithTagData = hasattr(self.name, '__call__') \

266

                                 and not isinstance(markupName, Tag)

267

268

         if (not self.name) \

269

@@ -880,7 +880,7 @@

270

             else:

271

                 match = True

272

                 markupAttrMap = None

273

-                for attr, matchAgainst in self.attrs.items():

274

+                for attr, matchAgainst in list(self.attrs.items()):

275

                     if not markupAttrMap:

276

                          if hasattr(markupAttrs, 'get'):

277

                             markupAttrMap = markupAttrs

278

@@ -921,16 +921,16 @@

279

             if self._matches(markup, self.text):

280

                 found = markup

281

         else:

282

-            raise Exception, "I don't know how to match against a %s" \

283

-                  % markup.__class__

284

+            raise Exception("I don't know how to match against a %s" \

285

+                  % markup.__class__)

286

         return found

287

288

     def _matches(self, markup, matchAgainst):

289

         #print "Matching %s against %s" % (markup, matchAgainst)

290

         result = False

291

-        if matchAgainst == True and type(matchAgainst) == types.BooleanType:

292

+        if matchAgainst == True and type(matchAgainst) == bool:

293

             result = markup != None

294

-        elif callable(matchAgainst):

295

+        elif hasattr(matchAgainst, '__call__'):

296

             result = matchAgainst(markup)

297

         else:

298

             #Custom match methods take the tag as an argument, but all

299

@@ -938,7 +938,7 @@

300

             if isinstance(markup, Tag):

301

                 markup = markup.name

302

             if markup is not None and not isString(markup):

303

-                markup = unicode(markup)

304

+                markup = str(markup)

305

             #Now we know that chunk is either a string, or None.

306

             if hasattr(matchAgainst, 'match'):

307

                 # It's a regexp object.

308

@@ -947,10 +947,10 @@

309

                   and (markup is not None or not isString(matchAgainst))):

310

                 result = markup in matchAgainst

311

             elif hasattr(matchAgainst, 'items'):

312

-                result = markup.has_key(matchAgainst)

313

+                result = matchAgainst in markup

314

             elif matchAgainst and isString(markup):

315

-                if isinstance(markup, unicode):

316

-                    matchAgainst = unicode(matchAgainst)

317

+                if isinstance(markup, str):

318

+                    matchAgainst = str(matchAgainst)

319

                 else:

320

                     matchAgainst = str(matchAgainst)

321

322

@@ -971,13 +971,13 @@

323

     """Convenience method that works with all 2.x versions of Python

324

     to determine whether or not something is listlike."""

325

     return ((hasattr(l, '__iter__') and not isString(l))

326

-            or (type(l) in (types.ListType, types.TupleType)))

327

+            or (type(l) in (list, tuple)))

328

329

 def isString(s):

330

     """Convenience method that works with all 2.x versions of Python

331

     to determine whether or not something is stringlike."""

332

     try:

333

-        return isinstance(s, unicode) or isinstance(s, basestring)

334

+        return isinstance(s, str) or isinstance(s, str)

335

     except NameError:

336

         return isinstance(s, str)

337

338

@@ -989,7 +989,7 @@

339

     for portion in args:

340

         if hasattr(portion, 'items'):

341

             #It's a map. Merge it.

342

-            for k,v in portion.items():

343

+            for k,v in list(portion.items()):

344

                 built[k] = v

345

         elif isList(portion) and not isString(portion):

346

             #It's a list. Map each item to the default.

347

@@ -1034,7 +1034,7 @@

348

         object, possibly one with a %SOUP-ENCODING% slot into which an

349

         encoding will be plugged later."""

350

         if text[:3] == "xml":

351

-            text = u"xml version='1.0' encoding='%SOUP-ENCODING%'"

352

+            text = "xml version='1.0' encoding='%SOUP-ENCODING%'"

353

         self._toStringSubclass(text, ProcessingInstruction)

354

355

     def handle_comment(self, text):

356

@@ -1044,7 +1044,7 @@

357

     def handle_charref(self, ref):

358

         "Handle character references as data."

359

         if self.soup.convertEntities:

360

-            data = unichr(int(ref))

361

+            data = chr(int(ref))

362

         else:

363

             data = '&#%s;' % ref

364

         self.handle_data(data)

365

@@ -1056,7 +1056,7 @@

366

         data = None

367

         if self.soup.convertHTMLEntities:

368

             try:

369

-                data = unichr(name2codepoint[ref])

370

+                data = chr(name2codepoint[ref])

371

             except KeyError:

372

                 pass

373

374

@@ -1147,7 +1147,7 @@

375

                        lambda x: '<!' + x.group(1) + '>')

376

]

377

378

-    ROOT_TAG_NAME = u'[document]'

379

+    ROOT_TAG_NAME = '[document]'

380

381

     HTML_ENTITIES = "html"

382

     XML_ENTITIES = "xml"

383

@@ -1236,14 +1236,14 @@

384

     def _feed(self, inDocumentEncoding=None, isHTML=False):

385

         # Convert the document to Unicode.

386

         markup = self.markup

387

-        if isinstance(markup, unicode):

388

+        if isinstance(markup, str):

389

             if not hasattr(self, 'originalEncoding'):

390

                 self.originalEncoding = None

391

         else:

392

             dammit = UnicodeDammit\

393

                      (markup, [self.fromEncoding, inDocumentEncoding],

394

                       smartQuotesTo=self.smartQuotesTo, isHTML=isHTML)

395

-            markup = dammit.unicode

396

+            markup = dammit.str

397

             self.originalEncoding = dammit.originalEncoding

398

             self.declaredHTMLEncoding = dammit.declaredHTMLEncoding

399

         if markup:

400

@@ -1269,8 +1269,8 @@

401

     def isSelfClosingTag(self, name):

402

         """Returns true iff the given string is the name of a

403

         self-closing tag according to this parser."""

404

-        return self.SELF_CLOSING_TAGS.has_key(name) \

405

-               or self.instanceSelfClosingTags.has_key(name)

406

+        return name in self.SELF_CLOSING_TAGS \

407

+               or name in self.instanceSelfClosingTags

408

409

     def reset(self):

410

         Tag.__init__(self, self, self.ROOT_TAG_NAME)

411

@@ -1305,7 +1305,7 @@

412

413

     def endData(self, containerClass=NavigableString):

414

         if self.currentData:

415

-            currentData = u''.join(self.currentData)

416

+            currentData = ''.join(self.currentData)

417

             if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and

418

                 not set([tag.name for tag in self.tagStack]).intersection(

419

                     self.PRESERVE_WHITESPACE_TAGS)):

420

@@ -1368,7 +1368,7 @@

421

422

         nestingResetTriggers = self.NESTABLE_TAGS.get(name)

423

         isNestable = nestingResetTriggers != None

424

-        isResetNesting = self.RESET_NESTING_TAGS.has_key(name)

425

+        isResetNesting = name in self.RESET_NESTING_TAGS

426

         popTo = None

427

         inclusive = True

428

         for i in range(len(self.tagStack)-1, 0, -1):

429

@@ -1381,7 +1381,7 @@

430

             if (nestingResetTriggers != None

431

                 and p.name in nestingResetTriggers) \

432

                 or (nestingResetTriggers == None and isResetNesting

433

-                    and self.RESET_NESTING_TAGS.has_key(p.name)):

434

+                    and p.name in self.RESET_NESTING_TAGS):

435

436

                 #If we encounter one of the nesting reset triggers

437

                 #peculiar to this tag, or we encounter another tag

438

@@ -1399,7 +1399,7 @@

439

         if self.quoteStack:

440

             #This is not a real tag.

441

             #print "<%s> is not real!" % name

442

-            attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs))

443

+            attrs = ''.join([' %s="%s"' % (x_y[0], x_y[1]) for x_y in attrs])

444

             self.handle_data('<%s%s>' % (name, attrs))

445

             return

446

         self.endData()

447

@@ -1493,7 +1493,7 @@

448

     BeautifulStoneSoup before writing your own subclass."""

449

450

     def __init__(self, *args, **kwargs):

451

-        if not kwargs.has_key('smartQuotesTo'):

452

+        if 'smartQuotesTo' not in kwargs:

453

             kwargs['smartQuotesTo'] = self.HTML_ENTITIES

454

         kwargs['isHTML'] = True

455

         BeautifulStoneSoup.__init__(self, *args, **kwargs)

456

@@ -1677,7 +1677,7 @@

457

             parent._getAttrMap()

458

             if (isinstance(tag, Tag) and len(tag.contents) == 1 and

459

                 isinstance(tag.contents[0], NavigableString) and

460

-                not parent.attrMap.has_key(tag.name)):

461

+                tag.name not in parent.attrMap):

462

                 parent[tag.name] = tag.contents[0]

463

         BeautifulStoneSoup.popTag(self)

464

465

@@ -1751,9 +1751,9 @@

466

                      self._detectEncoding(markup, isHTML)

467

         self.smartQuotesTo = smartQuotesTo

468

         self.triedEncodings = []

469

-        if markup == '' or isinstance(markup, unicode):

470

+        if markup == '' or isinstance(markup, str):

471

             self.originalEncoding = None

472

-            self.unicode = unicode(markup)

473

+            self.str = str(markup)

474

             return

475

476

         u = None

477

@@ -1766,7 +1766,7 @@

478

                 if u: break

479

480

         # If no luck and we have auto-detection library, try that:

481

-        if not u and chardet and not isinstance(self.markup, unicode):

482

+        if not u and chardet and not isinstance(self.markup, str):

483

             u = self._convertFrom(chardet.detect(self.markup)['encoding'])

484

485

         # As a last resort, try utf-8 and windows-1252:

486

@@ -1775,7 +1775,7 @@

487

                 u = self._convertFrom(proposed_encoding)

488

                 if u: break

489

490

-        self.unicode = u

491

+        self.str = u

492

         if not u: self.originalEncoding = None

493

494

     def _subMSChar(self, match):

495

@@ -1783,7 +1783,7 @@

496

         entity."""

497

         orig = match.group(1)

498

         sub = self.MS_CHARS.get(orig)

499

-        if type(sub) == types.TupleType:

500

+        if type(sub) == tuple:

501

             if self.smartQuotesTo == 'xml':

502

                 sub = '&#x'.encode() + sub[1].encode() + ';'.encode()

503

             else:

504

@@ -1804,7 +1804,7 @@

505

         if self.smartQuotesTo and proposed.lower() in("windows-1252",

506

                                                       "iso-8859-1",

507

                                                       "iso-8859-2"):

508

-            smart_quotes_re = "([\x80-\x9f])"

509

+            smart_quotes_re = b"([\x80-\x9f])"

510

             smart_quotes_compiled = re.compile(smart_quotes_re)

511

             markup = smart_quotes_compiled.sub(self._subMSChar, markup)

512

513

@@ -1813,7 +1813,7 @@

514

             u = self._toUnicode(markup, proposed)

515

             self.markup = u

516

             self.originalEncoding = proposed

517

-        except Exception, e:

518

+        except Exception as e:

519

             # print "That didn't work!"

520

             # print e

521

             return None

522

@@ -1842,7 +1842,7 @@

523

         elif data[:4] == '\xff\xfe\x00\x00':

524

             encoding = 'utf-32le'

525

             data = data[4:]

526

-        newdata = unicode(data, encoding)

527

+        newdata = str(data, encoding)

528

         return newdata

529

530

     def _detectEncoding(self, xml_data, isHTML=False):

531

@@ -1855,41 +1855,41 @@

532

             elif xml_data[:4] == '\x00\x3c\x00\x3f':

533

                 # UTF-16BE

534

                 sniffed_xml_encoding = 'utf-16be'

535

-                xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')

536

+                xml_data = str(xml_data, 'utf-16be').encode('utf-8')

537

             elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \

538

                      and (xml_data[2:4] != '\x00\x00'):

539

                 # UTF-16BE with BOM

540

                 sniffed_xml_encoding = 'utf-16be'

541

-                xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')

542

+                xml_data = str(xml_data[2:], 'utf-16be').encode('utf-8')

543

             elif xml_data[:4] == '\x3c\x00\x3f\x00':

544

                 # UTF-16LE

545

                 sniffed_xml_encoding = 'utf-16le'

546

-                xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')

547

+                xml_data = str(xml_data, 'utf-16le').encode('utf-8')

548

             elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \

549

                      (xml_data[2:4] != '\x00\x00'):

550

                 # UTF-16LE with BOM

551

                 sniffed_xml_encoding = 'utf-16le'

552

-                xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')

553

+                xml_data = str(xml_data[2:], 'utf-16le').encode('utf-8')

554

             elif xml_data[:4] == '\x00\x00\x00\x3c':

555

                 # UTF-32BE

556

                 sniffed_xml_encoding = 'utf-32be'

557

-                xml_data = unicode(xml_data, 'utf-32be').encode('utf-8')

558

+                xml_data = str(xml_data, 'utf-32be').encode('utf-8')

559

             elif xml_data[:4] == '\x3c\x00\x00\x00':

560

                 # UTF-32LE

561

                 sniffed_xml_encoding = 'utf-32le'

562

-                xml_data = unicode(xml_data, 'utf-32le').encode('utf-8')

563

+                xml_data = str(xml_data, 'utf-32le').encode('utf-8')

564

             elif xml_data[:4] == '\x00\x00\xfe\xff':

565

                 # UTF-32BE with BOM

566

                 sniffed_xml_encoding = 'utf-32be'

567

-                xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8')

568

+                xml_data = str(xml_data[4:], 'utf-32be').encode('utf-8')

569

             elif xml_data[:4] == '\xff\xfe\x00\x00':

570

                 # UTF-32LE with BOM

571

                 sniffed_xml_encoding = 'utf-32le'

572

-                xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8')

573

+                xml_data = str(xml_data[4:], 'utf-32le').encode('utf-8')

574

             elif xml_data[:3] == '\xef\xbb\xbf':

575

                 # UTF-8 with BOM

576

                 sniffed_xml_encoding = 'utf-8'

577

-                xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8')

578

+                xml_data = str(xml_data[3:], 'utf-8').encode('utf-8')

579

             else:

580

                 sniffed_xml_encoding = 'ascii'

581

                 pass

582

@@ -1954,41 +1954,41 @@

583

                     250,251,252,253,254,255)

584

             import string

585

             c.EBCDIC_TO_ASCII_MAP = string.maketrans( \

586

-            ''.join(map(chr, range(256))), ''.join(map(chr, emap)))

587

+            ''.join(map(chr, list(range(256)))), ''.join(map(chr, emap)))

588

         return s.translate(c.EBCDIC_TO_ASCII_MAP)

589

590

-    MS_CHARS = { '\x80' : ('euro', '20AC'),

591

-                 '\x81' : ' ',

592

-                 '\x82' : ('sbquo', '201A'),

593

-                 '\x83' : ('fnof', '192'),

594

-                 '\x84' : ('bdquo', '201E'),

595

-                 '\x85' : ('hellip', '2026'),

596

-                 '\x86' : ('dagger', '2020'),

597

-                 '\x87' : ('Dagger', '2021'),

598

-                 '\x88' : ('circ', '2C6'),

599

-                 '\x89' : ('permil', '2030'),

600

-                 '\x8A' : ('Scaron', '160'),

601

-                 '\x8B' : ('lsaquo', '2039'),

602

-                 '\x8C' : ('OElig', '152'),

603

-                 '\x8D' : '?',

604

-                 '\x8E' : ('#x17D', '17D'),

605

-                 '\x8F' : '?',

606

-                 '\x90' : '?',

607

-                 '\x91' : ('lsquo', '2018'),

608

-                 '\x92' : ('rsquo', '2019'),

609

-                 '\x93' : ('ldquo', '201C'),

610

-                 '\x94' : ('rdquo', '201D'),

611

-                 '\x95' : ('bull', '2022'),

612

-                 '\x96' : ('ndash', '2013'),

613

-                 '\x97' : ('mdash', '2014'),

614

-                 '\x98' : ('tilde', '2DC'),

615

-                 '\x99' : ('trade', '2122'),

616

-                 '\x9a' : ('scaron', '161'),

617

-                 '\x9b' : ('rsaquo', '203A'),

618

-                 '\x9c' : ('oelig', '153'),

619

-                 '\x9d' : '?',

620

-                 '\x9e' : ('#x17E', '17E'),

621

-                 '\x9f' : ('Yuml', ''),}

622

+    MS_CHARS = { b'\x80' : ('euro', '20AC'),

623

+                 b'\x81' : ' ',

624

+                 b'\x82' : ('sbquo', '201A'),

625

+                 b'\x83' : ('fnof', '192'),

626

+                 b'\x84' : ('bdquo', '201E'),

627

+                 b'\x85' : ('hellip', '2026'),

628

+                 b'\x86' : ('dagger', '2020'),

629

+                 b'\x87' : ('Dagger', '2021'),

630

+                 b'\x88' : ('circ', '2C6'),

631

+                 b'\x89' : ('permil', '2030'),

632

+                 b'\x8A' : ('Scaron', '160'),

633

+                 b'\x8B' : ('lsaquo', '2039'),

634

+                 b'\x8C' : ('OElig', '152'),

635

+                 b'\x8D' : '?',

636

+                 b'\x8E' : ('#x17D', '17D'),

637

+                 b'\x8F' : '?',

638

+                 b'\x90' : '?',

639

+                 b'\x91' : ('lsquo', '2018'),

640

+                 b'\x92' : ('rsquo', '2019'),

641

+                 b'\x93' : ('ldquo', '201C'),

642

+                 b'\x94' : ('rdquo', '201D'),

643

+                 b'\x95' : ('bull', '2022'),

644

+                 b'\x96' : ('ndash', '2013'),

645

+                 b'\x97' : ('mdash', '2014'),

646

+                 b'\x98' : ('tilde', '2DC'),

647

+                 b'\x99' : ('trade', '2122'),

648

+                 b'\x9a' : ('scaron', '161'),

649

+                 b'\x9b' : ('rsaquo', '203A'),

650

+                 b'\x9c' : ('oelig', '153'),

651

+                 b'\x9d' : '?',

652

+                 b'\x9e' : ('#x17E', '17E'),

653

+                 b'\x9f' : ('Yuml', ''),}

654

655

 #######################################################################

656

657

@@ -1997,4 +1997,4 @@

658

 if __name__ == '__main__':

659

     import sys

660

     soup = BeautifulSoup(sys.stdin)

661

-    print soup.prettify()

662

+    print(soup.prettify())

663

--- BeautifulSoupTests.py

664

+++ BeautifulSoupTests.py

665

@@ -82,7 +82,7 @@

666

     def testFindAllText(self):

667

         soup = BeautifulSoup("<html>\xbb</html>")

668

         self.assertEqual(soup.findAll(text=re.compile('.*')),

669

-                         [u'\xbb'])

670

+                         ['\xbb'])

671

672

     def testFindAllByRE(self):

673

         import re

674

@@ -215,7 +215,7 @@

675

         soup = BeautifulSoup(self.x, parseOnlyThese=strainer)

676

         self.assertEquals(len(soup), 10)

677

678

-        strainer = SoupStrainer(text=lambda(x):x[8]=='3')

679

+        strainer = SoupStrainer(text=lambda x:x[8]=='3')

680

         soup = BeautifulSoup(self.x, parseOnlyThese=strainer)

681

         self.assertEquals(len(soup), 3)

682

683

@@ -256,7 +256,7 @@

684

         self.assertEqual(copied.decode(), self.soup.decode())

685

686

     def testUnicodePickle(self):

687

-        import cPickle as pickle

688

+        import pickle as pickle

689

         html = "<b>" + chr(0xc3) + "</b>"

690

         soup = BeautifulSoup(html)

691

         dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL)

692

@@ -586,23 +586,23 @@

693

         self.assertEquals(soup.decode(), "<<sacr&eacute; bleu!>>")

694

695

         soup = BeautifulStoneSoup(text, convertEntities=htmlEnt)

696

-        self.assertEquals(soup.decode(), u"<<sacr\xe9 bleu!>>")

697

+        self.assertEquals(soup.decode(), "<<sacr\xe9 bleu!>>")

698

699

         # Make sure the "XML", "HTML", and "XHTML" settings work.

700

         text = "&lt;&trade;&apos;"

701

         soup = BeautifulStoneSoup(text, convertEntities=xmlEnt)

702

-        self.assertEquals(soup.decode(), u"<&trade;'")

703

+        self.assertEquals(soup.decode(), "<&trade;'")

704

705

         soup = BeautifulStoneSoup(text, convertEntities=htmlEnt)

706

-        self.assertEquals(soup.decode(), u"<\u2122&apos;")

707

+        self.assertEquals(soup.decode(), "<\u2122&apos;")

708

709

         soup = BeautifulStoneSoup(text, convertEntities=xhtmlEnt)

710

-        self.assertEquals(soup.decode(), u"<\u2122'")

711

+        self.assertEquals(soup.decode(), "<\u2122'")

712

713

     def testNonBreakingSpaces(self):

714

         soup = BeautifulSoup("<a>&nbsp;&nbsp;</a>",

715

                              convertEntities=BeautifulStoneSoup.HTML_ENTITIES)

716

-        self.assertEquals(soup.decode(), u"<a>\xa0\xa0</a>")

717

+        self.assertEquals(soup.decode(), "<a>\xa0\xa0</a>")

718

719

     def testWhitespaceInDeclaration(self):

720

         self.assertSoupEquals('<! DOCTYPE>', '<!DOCTYPE>')

721

@@ -617,27 +617,27 @@

722

         self.assertSoupEquals('<b>hello&nbsp;there</b>')

723

724

     def testEntitiesInAttributeValues(self):

725

-        self.assertSoupEquals('<x t="x&#241;">', '<x t="x\xc3\xb1"></x>',

726

+        self.assertSoupEquals('<x t="x&#241;">', b'<x t="x\xc3\xb1"></x>',

727

                               encoding='utf-8')

728

-        self.assertSoupEquals('<x t="x&#xf1;">', '<x t="x\xc3\xb1"></x>',

729

+        self.assertSoupEquals('<x t="x&#xf1;">', b'<x t="x\xc3\xb1"></x>',

730

                               encoding='utf-8')

731

732

         soup = BeautifulSoup('<x t="&gt;&trade;">',

733

                              convertEntities=BeautifulStoneSoup.HTML_ENTITIES)

734

-        self.assertEquals(soup.decode(), u'<x t="&gt;\u2122"></x>')

735

+        self.assertEquals(soup.decode(), '<x t="&gt;\u2122"></x>')

736

737

         uri = "http://crummy.com?sacr&eacute;&amp;bleu"

738

         link = '<a href="%s"></a>' % uri

739

740

         soup = BeautifulSoup(link, convertEntities=BeautifulSoup.HTML_ENTITIES)

741

         self.assertEquals(soup.decode(),

742

-                          link.replace("&eacute;", u"\xe9"))

743

+                          link.replace("&eacute;", "\xe9"))

744

745

         uri = "http://crummy.com?sacr&eacute;&bleu"

746

         link = '<a href="%s"></a>' % uri

747

         soup = BeautifulSoup(link, convertEntities=BeautifulSoup.HTML_ENTITIES)

748

         self.assertEquals(soup.a['href'],

749

-                          uri.replace("&eacute;", u"\xe9"))

750

+                          uri.replace("&eacute;", "\xe9"))

751

752

     def testNakedAmpersands(self):

753

         html = {'convertEntities':BeautifulStoneSoup.HTML_ENTITIES}

754

@@ -663,13 +663,13 @@

755

     smart quote fixes."""

756

757

     def testUnicodeDammitStandalone(self):

758

-        markup = "<foo>\x92</foo>"

759

+        markup = b"<foo>\x92</foo>"

760

         dammit = UnicodeDammit(markup)

761

-        self.assertEquals(dammit.unicode, "<foo>&#x2019;</foo>")

762

+        self.assertEquals(dammit.str, "<foo>&#x2019;</foo>")

763

764

-        hebrew = "\xed\xe5\xec\xf9"

765

+        hebrew = b"\xed\xe5\xec\xf9"

766

         dammit = UnicodeDammit(hebrew, ["iso-8859-8"])

767

-        self.assertEquals(dammit.unicode, u'\u05dd\u05d5\u05dc\u05e9')

768

+        self.assertEquals(dammit.str, '\u05dd\u05d5\u05dc\u05e9')

769

         self.assertEquals(dammit.originalEncoding, 'iso-8859-8')

770

771

     def testGarbageInGarbageOut(self):

772

@@ -677,13 +677,13 @@

773

         asciiSoup = BeautifulStoneSoup(ascii)

774

         self.assertEquals(ascii, asciiSoup.decode())

775

776

-        unicodeData = u"<foo>\u00FC</foo>"

777

+        unicodeData = "<foo>\u00FC</foo>"

778

         utf8 = unicodeData.encode("utf-8")

779

-        self.assertEquals(utf8, '<foo>\xc3\xbc</foo>')

780

+        self.assertEquals(utf8, b'<foo>\xc3\xbc</foo>')

781

782

         unicodeSoup = BeautifulStoneSoup(unicodeData)

783

         self.assertEquals(unicodeData, unicodeSoup.decode())

784

-        self.assertEquals(unicodeSoup.foo.string, u'\u00FC')

785

+        self.assertEquals(unicodeSoup.foo.string, '\u00FC')

786

787

         utf8Soup = BeautifulStoneSoup(utf8, fromEncoding='utf-8')

788

         self.assertEquals(utf8, utf8Soup.encode('utf-8'))

789

@@ -696,18 +696,18 @@

790

791

     def testHandleInvalidCodec(self):

792

         for bad_encoding in ['.utf8', '...', 'utF---16.!']:

793

-            soup = BeautifulSoup(u"Räksmörgås".encode("utf-8"),

794

+            soup = BeautifulSoup("Räksmörgås".encode("utf-8"),

795

                                  fromEncoding=bad_encoding)

796

             self.assertEquals(soup.originalEncoding, 'utf-8')

797

798

     def testUnicodeSearch(self):

799

-        html = u'<html><body><h1>Räksmörgås</h1></body></html>'

800

+        html = '<html><body><h1>Räksmörgås</h1></body></html>'

801

         soup = BeautifulSoup(html)

802

-        self.assertEqual(soup.find(text=u'Räksmörgås'),u'Räksmörgås')

803

+        self.assertEqual(soup.find(text='Räksmörgås'),'Räksmörgås')

804

805

     def testRewrittenXMLHeader(self):

806

-        euc_jp = '<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n'

807

-        utf8 = "<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n"

808

+        euc_jp = b'<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n'

809

+        utf8 = b"<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n"

810

         soup = BeautifulStoneSoup(euc_jp)

811

         if soup.originalEncoding != "euc-jp":

812

             raise Exception("Test failed when parsing euc-jp document. "

813

@@ -718,12 +718,12 @@

814

         self.assertEquals(soup.originalEncoding, "euc-jp")

815

         self.assertEquals(soup.renderContents('utf-8'), utf8)

816

817

-        old_text = "<?xml encoding='windows-1252'><foo>\x92</foo>"

818

+        old_text = b"<?xml encoding='windows-1252'><foo>\x92</foo>"

819

         new_text = "<?xml version='1.0' encoding='utf-8'?><foo>&rsquo;</foo>"

820

         self.assertSoupEquals(old_text, new_text)

821

822

     def testRewrittenMetaTag(self):

823

-        no_shift_jis_html = '''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>'''

824

+        no_shift_jis_html = b'''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>'''

825

         soup = BeautifulSoup(no_shift_jis_html)

826

827

         # Beautiful Soup used to try to rewrite the meta tag even if the

828

@@ -733,16 +733,16 @@

829

         soup = BeautifulSoup(no_shift_jis_html, parseOnlyThese=strainer)

830

         self.assertEquals(soup.contents[0].name, 'pre')

831

832

-        meta_tag = ('<meta content="text/html; charset=x-sjis" '

833

-                    'http-equiv="Content-type" />')

834

+        meta_tag = (b'<meta content="text/html; charset=x-sjis" '

835

+                    b'http-equiv="Content-type" />')

836

         shift_jis_html = (

837

-            '<html><head>\n%s\n'

838

-            '<meta http-equiv="Content-language" content="ja" />'

839

-            '</head><body><pre>\n'

840

-            '\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'

841

-            '\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'

842

-            '\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n'

843

-            '</pre></body></html>') % meta_tag

844

+            b'<html><head>\n' + meta_tag + b'\n'

845

+            b'<meta http-equiv="Content-language" content="ja" />'

846

+            b'</head><body><pre>\n'

847

+            b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'

848

+            b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'

849

+            b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n'

850

+            b'</pre></body></html>')

851

         soup = BeautifulSoup(shift_jis_html)

852

         if soup.originalEncoding != "shift-jis":

853

             raise Exception("Test failed when parsing shift-jis document "

854

@@ -755,59 +755,59 @@

855

         content_type_tag = soup.meta['content']

856

         self.assertEquals(content_type_tag[content_type_tag.find('charset='):],

857

                           'charset=%SOUP-ENCODING%')

858

-        content_type = str(soup.meta)

859

+        content_type = soup.meta.decode()

860

         index = content_type.find('charset=')

861

         self.assertEqual(content_type[index:index+len('charset=utf8')+1],

862

                          'charset=utf-8')

863

         content_type = soup.meta.encode('shift-jis')

864

-        index = content_type.find('charset=')

865

+        index = content_type.find(b'charset=')

866

         self.assertEqual(content_type[index:index+len('charset=shift-jis')],

867

                          'charset=shift-jis'.encode())

868

869

         self.assertEquals(soup.encode('utf-8'), (

870

-                '<html><head>\n'

871

-                '<meta content="text/html; charset=utf-8" '

872

-                'http-equiv="Content-type" />\n'

873

-                '<meta http-equiv="Content-language" content="ja" />'

874

-                '</head><body><pre>\n'

875

-                '\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3'

876

-                '\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3'

877

-                '\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6'

878

-                '\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3'

879

-                '\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n'

880

-                '</pre></body></html>'))

881

+                b'<html><head>\n'

882

+                b'<meta content="text/html; charset=utf-8" '

883

+                b'http-equiv="Content-type" />\n'

884

+                b'<meta http-equiv="Content-language" content="ja" />'

885

+                b'</head><body><pre>\n'

886

+                b'\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3'

887

+                b'\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3'

888

+                b'\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6'

889

+                b'\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3'

890

+                b'\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n'

891

+                b'</pre></body></html>'))

892

         self.assertEquals(soup.encode("shift-jis"),

893

                           shift_jis_html.replace('x-sjis'.encode(),

894

                                                  'shift-jis'.encode()))

895

896

-        isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""

897

+        isolatin = b"""<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""

898

         soup = BeautifulSoup(isolatin)

899

900

         utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode())

901

-        utf8 = utf8.replace("\xe9", "\xc3\xa9")

902

+        utf8 = utf8.replace(b"\xe9", b"\xc3\xa9")

903

         self.assertSoupEquals(soup.encode("utf-8"), utf8, encoding='utf-8')

904

905

     def testHebrew(self):

906

-        iso_8859_8= '<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n'

907

-        utf8 = '<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n'

908

+        iso_8859_8= b'<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n'

909

+        utf8 = b'<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n'

910

         soup = BeautifulStoneSoup(iso_8859_8, fromEncoding="iso-8859-8")

911

         self.assertEquals(soup.encode('utf-8'), utf8)

912

913

     def testSmartQuotesNotSoSmartAnymore(self):

914

-        self.assertSoupEquals("\x91Foo\x92 <!--blah-->",

915

+        self.assertSoupEquals(b"\x91Foo\x92 <!--blah-->",

916

                               '&lsquo;Foo&rsquo; <!--blah-->')

917

918

     def testDontConvertSmartQuotesWhenAlsoConvertingEntities(self):

919

-        smartQuotes = "Il a dit, \x8BSacr&eacute; bl&#101;u!\x9b"

920

+        smartQuotes = b"Il a dit, \x8BSacr&eacute; bl&#101;u!\x9b"

921

         soup = BeautifulSoup(smartQuotes)

922

         self.assertEquals(soup.decode(),

923

                           'Il a dit, &lsaquo;Sacr&eacute; bl&#101;u!&rsaquo;')

924

         soup = BeautifulSoup(smartQuotes, convertEntities="html")

925

         self.assertEquals(soup.encode('utf-8'),

926

-                          'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba')

927

+                          b'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba')

928

929

     def testDontSeeSmartQuotesWhereThereAreNone(self):

930

-        utf_8 = "\343\202\261\343\203\274\343\202\277\343\202\244 Watch"

931

+        utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"

932

         self.assertSoupEquals(utf_8, encoding='utf-8')

933

934

935

--- setup.py

936

+++ setup.py

937

@@ -19,19 +19,19 @@

938

 suite = loader.loadTestsFromModule(BeautifulSoupTests)

939

 suite.run(result)

940

 if not result.wasSuccessful():

941

-    print "Unit tests have failed!"

942

+    print("Unit tests have failed!")

943

     for l in result.errors, result.failures:

944

         for case, error in l:

945

-            print "-" * 80

946

+            print("-" * 80)

947

             desc = case.shortDescription()

948

             if desc:

949

-                print desc

950

-            print error        

951

-    print '''If you see an error like: "'ascii' codec can't encode character...", see\nthe Beautiful Soup documentation:\n http://www.crummy.com/software/BeautifulSoup/documentation.html#Why%20can't%20Beautiful%20Soup%20print%20out%20the%20non-ASCII%20characters%20I%20gave%20it?'''

952

-    print "This might or might not be a problem depending on what you plan to do with\nBeautiful Soup."

953

+                print(desc)

954

+            print(error)        

955

+    print('''If you see an error like: "'ascii' codec can't encode character...", see\nthe Beautiful Soup documentation:\n http://www.crummy.com/software/BeautifulSoup/documentation.html#Why%20can't%20Beautiful%20Soup%20print%20out%20the%20non-ASCII%20characters%20I%20gave%20it?''')

956

+    print("This might or might not be a problem depending on what you plan to do with\nBeautiful Soup.")

957

     if sys.argv[1] == 'sdist':

958

-        print

959

-        print "I'm not going to make a source distribution since the tests don't pass."

960

+        print()

961

+        print("I'm not going to make a source distribution since the tests don't pass.")

962

         sys.exit(1)

963

964

 setup(name="BeautifulSoup",

Gentoo Archives: gentoo-commits