[gentoo-commits] repo/gentoo:master commit in: dev-python/beautifulsoup/files/, profiles/, dev-python/beautifulsoup/ - gentoo-commits

From:	"Michał Górny" <mgorny@g.o>
To:	gentoo-commits@l.g.o
Subject:	[gentoo-commits] repo/gentoo:master commit in: dev-python/beautifulsoup/files/, profiles/, dev-python/beautifulsoup/
Date:	Mon, 14 Aug 2017 07:13:38
Message-Id:	`1502694753.9741510885394a808ea58561b23a1711771f1f4a.mgorny@gentoo`

1

commit:     9741510885394a808ea58561b23a1711771f1f4a

2

Author:     Michał Górny <mgorny <AT> gentoo <DOT> org>

3

AuthorDate: Mon Aug 14 06:46:57 2017 +0000

4

Commit:     Michał Górny <mgorny <AT> gentoo <DOT> org>

5

CommitDate: Mon Aug 14 07:12:33 2017 +0000

6

URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=97415108

7

8

dev-python/beautifulsoup: Drop masked ancient version

9

10

 dev-python/beautifulsoup/Manifest                  |   1 -

11

 .../beautifulsoup/beautifulsoup-3.1.0.1-r2.ebuild  |  40 -

12

 .../beautifulsoup-3.1.0.1-disable-tests.patch      |  39 -

13

 .../files/beautifulsoup-3.1.0.1-python-3.patch     | 949 ---------------------

14

 profiles/package.mask                              |   5 -

15

 5 files changed, 1034 deletions(-)

16

17

diff --git a/dev-python/beautifulsoup/Manifest b/dev-python/beautifulsoup/Manifest

18

index a87f7f747d9..6ba59cf1880 100644

19

--- a/dev-python/beautifulsoup/Manifest

20

+++ b/dev-python/beautifulsoup/Manifest

21

@@ -1,4 +1,3 @@

22

-DIST BeautifulSoup-3.1.0.1.tar.gz 71460 SHA256 820a80f473240d9d30047f36c959d530a699a732500662dd8b03e1d3ccad12a8 SHA512 812969faf454a58d849921836ed07ec9a950f34fb31e29e118cdf1a75a533370e430f417402b5a5016d23b2d3a1c44a1cf5fde5b3bfd1bc98c50036edd51c0d6 WHIRLPOOL a199585817dcabcc6327c3836a66128605ebf92a6663b5c660125061a797485a504d300791bcd43e0e94e4f08ca59c01f65f42481da07b1240350cbfc6ea6b0c

23

 DIST BeautifulSoup-3.2.1.tar.gz 31224 SHA256 f5ba85e907e7dfd78e44e4000b3eaef3a650aefc57831e8a645702db2e5b50db SHA512 365b7b045a2069cf437877543577bc0aa99256a6dc4c9743670b46bfceab5494a06628012d6eccecfe99c25d5c9e0c65814964b47026f15ba1a538444cfb7789 WHIRLPOOL c2f84b29421d0153fb1fecc87d63e00a61182e03bc0683132babca5d6c94143b4875a60a19124a36e4e6e78ce80bff9e1e81b37335700efc14084da933307e26

24

 DIST beautifulsoup4-4.5.1.tar.gz 158039 SHA256 3c9474036afda9136aac6463def733f81017bf9ef3510d25634f335b0c87f5e1 SHA512 d560d7f743507084ec546708d29bb3764512f5b2c380004280dde813350bf48d1697fddce3bd3f95186407bf5142941d7adc7d0de8e7962eb5ca1278dbc7e93f WHIRLPOOL bf971596707c2ff69e93528164be01254258aa45601763c543246b67c5d31024b0e4de618382775a3cf313d255d8d1d6268a47542773531aacee9a2643412661

25

 DIST beautifulsoup4-4.5.3.tar.gz 159185 SHA256 b21ca09366fa596043578fd4188b052b46634d22059e68dd0077d9ee77e08a3e SHA512 d31db0e3bb778a78c37882fcd55dc580eb5eeadfd48744eae6e2e0d0ef5983b216a4682af84a4971611b05fb99c45012ce094475f2d7c39a5b90dad99906ec84 WHIRLPOOL f8dbffd8e4a1dbee0a7ad8a4bcbe22a984f524474f0241a4c03ef5c37b291f9834a6ff1d076421c0cf1087588df1e49f5b99cd9afd7e81591c9063d92d4d097d

26

27

diff --git a/dev-python/beautifulsoup/beautifulsoup-3.1.0.1-r2.ebuild b/dev-python/beautifulsoup/beautifulsoup-3.1.0.1-r2.ebuild

28

deleted file mode 100644

29

index a69a317f6d9..00000000000

30

--- a/dev-python/beautifulsoup/beautifulsoup-3.1.0.1-r2.ebuild

31

+++ /dev/null

32

@@ -1,40 +0,0 @@

33

-# Copyright 1999-2017 Gentoo Foundation

34

-# Distributed under the terms of the GNU General Public License v2

35

-

36

-EAPI="5"

37

-# A few tests fail with python3.3/3.4 :(

38

-PYTHON_COMPAT=( python3_4 pypy3 )

39

-

40

-inherit distutils-r1 eutils

41

-

42

-MY_PN="BeautifulSoup"

43

-MY_P="${MY_PN}-${PV}"

44

-

45

-DESCRIPTION="HTML/XML parser for quick-turnaround applications like screen-scraping"

46

-HOMEPAGE="http://www.crummy.com/software/BeautifulSoup/ https://pypi.python.org/pypi/BeautifulSoup"

47

-SRC_URI="http://www.crummy.com/software/${MY_PN}/download/${MY_P}.tar.gz"

48

-

49

-LICENSE="BSD"

50

-SLOT="python-3"

51

-KEYWORDS="alpha amd64 arm hppa ia64 ppc ppc64 s390 sh sparc x86 ~amd64-fbsd ~x86-fbsd ~amd64-linux ~x86-linux ~x86-macos ~x86-solaris"

52

-IUSE=""

53

-

54

-DEPEND=""

55

-RDEPEND="!dev-python/beautifulsoup:0"

56

-

57

-S="${WORKDIR}/${MY_P}"

58

-

59

-PATCHES=(

60

-	"${FILESDIR}/${P}-python-3.patch"

61

-	"${FILESDIR}/${P}-disable-tests.patch"

62

-)

63

-

64

-python_test() {

65

-	"${PYTHON}" BeautifulSoupTests.py || die "Tests fail with ${EPYTHON}"

66

-}

67

-

68

-python_install_all() {

69

-	distutils-r1_python_install_all

70

-	# Delete useless files.

71

-	rm -r "${ED%/}/usr/bin" || die

72

-}

73

74

diff --git a/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-disable-tests.patch b/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-disable-tests.patch

75

deleted file mode 100644

76

index c97cd76ee31..00000000000

77

--- a/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-disable-tests.patch

78

+++ /dev/null

79

@@ -1,39 +0,0 @@

80

---- lib/BeautifulSoupTests.py.orig	2015-07-21 08:39:33.077000000 +0000

81

-+++ lib/BeautifulSoupTests.py	2015-07-21 08:41:19.285000000 +0000

82

-@@ -538,13 +538,13 @@

83

-         text = "<td nowrap>foo</td>"

84

-         self.assertSoupEquals(text, text)

85

-

86

--    def testCData(self):

87

--        xml = "<root>foo<![CDATA[foobar]]>bar</root>"

88

--        self.assertSoupEquals(xml, xml)

89

--        r = re.compile("foo.*bar")

90

--        soup = BeautifulSoup(xml)

91

--        self.assertEquals(soup.find(text=r).string, "foobar")

92

--        self.assertEquals(soup.find(text=r).__class__, CData)

93

-+    #def testCData(self):

94

-+    #    xml = "<root>foo<![CDATA[foobar]]>bar</root>"

95

-+    #    self.assertSoupEquals(xml, xml)

96

-+    #    r = re.compile("foo.*bar")

97

-+    #    soup = BeautifulSoup(xml)

98

-+    #    self.assertEquals(soup.find(text=r).string, "foobar")

99

-+    #    self.assertEquals(soup.find(text=r).__class__, CData)

100

-

101

-     def testComments(self):

102

-         xml = "foo<!--foobar-->baz"

103

-@@ -607,11 +607,11 @@

104

-     def testWhitespaceInDeclaration(self):

105

-         self.assertSoupEquals('<! DOCTYPE>', '<!DOCTYPE>')

106

-

107

--    def testJunkInDeclaration(self):

108

--        self.assertSoupEquals('<! Foo = -8>a', '<!Foo = -8>a')

109

-+    #def testJunkInDeclaration(self):

110

-+    #    self.assertSoupEquals('<! Foo = -8>a', '<!Foo = -8>a')

111

-

112

--    def testIncompleteDeclaration(self):

113

--        self.assertSoupEquals('a<!b <p>c')

114

-+    #def testIncompleteDeclaration(self):

115

-+    #    self.assertSoupEquals('a<!b <p>c')

116

-

117

-     def testEntityReplacement(self):

118

-         self.assertSoupEquals('<b>hello&nbsp;there</b>')

119

120

diff --git a/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-python-3.patch b/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-python-3.patch

121

deleted file mode 100644

122

index adcbb43dd07..00000000000

123

--- a/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-python-3.patch

124

+++ /dev/null

125

@@ -1,949 +0,0 @@

126

---- BeautifulSoup.py

127

-+++ BeautifulSoup.py

128

-@@ -76,7 +76,7 @@

129

- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT.

130

-

131

- """

132

--from __future__ import generators

133

-+

134

-

135

- __author__ = "Leonard Richardson (leonardr@××××××××.org)"

136

- __version__ = "3.1.0.1"

137

-@@ -84,12 +84,12 @@

138

- __license__ = "New-style BSD"

139

-

140

- import codecs

141

--import markupbase

142

-+import _markupbase

143

- import types

144

- import re

145

--from HTMLParser import HTMLParser, HTMLParseError

146

-+from html.parser import HTMLParser, HTMLParseError

147

- try:

148

--    from htmlentitydefs import name2codepoint

149

-+    from html.entities import name2codepoint

150

- except ImportError:

151

-     name2codepoint = {}

152

- try:

153

-@@ -98,18 +98,18 @@

154

-     from sets import Set as set

155

-

156

- #These hacks make Beautiful Soup able to parse XML with namespaces

157

--markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match

158

-+_markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match

159

-

160

- DEFAULT_OUTPUT_ENCODING = "utf-8"

161

-

162

- # First, the classes that represent markup elements.

163

-

164

--def sob(unicode, encoding):

165

-+def sob(str, encoding):

166

-     """Returns either the given Unicode string or its encoding."""

167

-     if encoding is None:

168

--        return unicode

169

-+        return str

170

-     else:

171

--        return unicode.encode(encoding)

172

-+        return str.encode(encoding)

173

-

174

- class PageElement:

175

-     """Contains the navigational information for some part of the page

176

-@@ -178,8 +178,8 @@

177

-         return lastChild

178

-

179

-     def insert(self, position, newChild):

180

--        if (isinstance(newChild, basestring)

181

--            or isinstance(newChild, unicode)) \

182

-+        if (isinstance(newChild, str)

183

-+            or isinstance(newChild, str)) \

184

-             and not isinstance(newChild, NavigableString):

185

-             newChild = NavigableString(newChild)

186

-

187

-@@ -334,7 +334,7 @@

188

-         g = generator()

189

-         while True:

190

-             try:

191

--                i = g.next()

192

-+                i = g.__next__()

193

-             except StopIteration:

194

-                 break

195

-             if i:

196

-@@ -385,22 +385,22 @@

197

-     def toEncoding(self, s, encoding=None):

198

-         """Encodes an object to a string in some encoding, or to Unicode.

199

-         ."""

200

--        if isinstance(s, unicode):

201

-+        if isinstance(s, str):

202

-             if encoding:

203

-                 s = s.encode(encoding)

204

-         elif isinstance(s, str):

205

-             if encoding:

206

-                 s = s.encode(encoding)

207

-             else:

208

--                s = unicode(s)

209

-+                s = str(s)

210

-         else:

211

-             if encoding:

212

-                 s  = self.toEncoding(str(s), encoding)

213

-             else:

214

--                s = unicode(s)

215

-+                s = str(s)

216

-         return s

217

-

218

--class NavigableString(unicode, PageElement):

219

-+class NavigableString(str, PageElement):

220

-

221

-     def __new__(cls, value):

222

-         """Create a new NavigableString.

223

-@@ -410,12 +410,12 @@

224

-         passed in to the superclass's __new__ or the superclass won't know

225

-         how to handle non-ASCII characters.

226

-         """

227

--        if isinstance(value, unicode):

228

--            return unicode.__new__(cls, value)

229

--        return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)

230

-+        if isinstance(value, str):

231

-+            return str.__new__(cls, value)

232

-+        return str.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)

233

-

234

-     def __getnewargs__(self):

235

--        return (unicode(self),)

236

-+        return (str(self),)

237

-

238

-     def __getattr__(self, attr):

239

-         """text.string gives you text. This is for backwards

240

-@@ -424,7 +424,7 @@

241

-         if attr == 'string':

242

-             return self

243

-         else:

244

--            raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)

245

-+            raise AttributeError("'%s' object has no attribute '%s'" % (self.__class__.__name__, attr))

246

-

247

-     def encode(self, encoding=DEFAULT_OUTPUT_ENCODING):

248

-         return self.decode().encode(encoding)

249

-@@ -435,23 +435,23 @@

250

- class CData(NavigableString):

251

-

252

-     def decodeGivenEventualEncoding(self, eventualEncoding):

253

--        return u'<![CDATA[' + self + u']]>'

254

-+        return '<![CDATA[' + self + ']]>'

255

-

256

- class ProcessingInstruction(NavigableString):

257

-

258

-     def decodeGivenEventualEncoding(self, eventualEncoding):

259

-         output = self

260

--        if u'%SOUP-ENCODING%' in output:

261

-+        if '%SOUP-ENCODING%' in output:

262

-             output = self.substituteEncoding(output, eventualEncoding)

263

--        return u'<?' + output + u'?>'

264

-+        return '<?' + output + '?>'

265

-

266

- class Comment(NavigableString):

267

-     def decodeGivenEventualEncoding(self, eventualEncoding):

268

--        return u'<!--' + self + u'-->'

269

-+        return '<!--' + self + '-->'

270

-

271

- class Declaration(NavigableString):

272

-     def decodeGivenEventualEncoding(self, eventualEncoding):

273

--        return u'<!' + self + u'>'

274

-+        return '<!' + self + '>'

275

-

276

- class Tag(PageElement):

277

-

278

-@@ -460,7 +460,7 @@

279

-     def _invert(h):

280

-         "Cheap function to invert a hash."

281

-         i = {}

282

--        for k,v in h.items():

283

-+        for k,v in list(h.items()):

284

-             i[v] = k

285

-         return i

286

-

287

-@@ -479,23 +479,23 @@

288

-         escaped."""

289

-         x = match.group(1)

290

-         if self.convertHTMLEntities and x in name2codepoint:

291

--            return unichr(name2codepoint[x])

292

-+            return chr(name2codepoint[x])

293

-         elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS:

294

-             if self.convertXMLEntities:

295

-                 return self.XML_ENTITIES_TO_SPECIAL_CHARS[x]

296

-             else:

297

--                return u'&%s;' % x

298

-+                return '&%s;' % x

299

-         elif len(x) > 0 and x[0] == '#':

300

-             # Handle numeric entities

301

-             if len(x) > 1 and x[1] == 'x':

302

--                return unichr(int(x[2:], 16))

303

-+                return chr(int(x[2:], 16))

304

-             else:

305

--                return unichr(int(x[1:]))

306

-+                return chr(int(x[1:]))

307

-

308

-         elif self.escapeUnrecognizedEntities:

309

--            return u'&amp;%s;' % x

310

-+            return '&amp;%s;' % x

311

-         else:

312

--            return u'&%s;' % x

313

-+            return '&%s;' % x

314

-

315

-     def __init__(self, parser, name, attrs=None, parent=None,

316

-                  previous=None):

317

-@@ -524,7 +524,7 @@

318

-                 return kval

319

-             return (k, re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);",

320

-                               self._convertEntities, val))

321

--        self.attrs = map(convert, self.attrs)

322

-+        self.attrs = list(map(convert, self.attrs))

323

-

324

-     def get(self, key, default=None):

325

-         """Returns the value of the 'key' attribute for the tag, or

326

-@@ -533,7 +533,7 @@

327

-         return self._getAttrMap().get(key, default)

328

-

329

-     def has_key(self, key):

330

--        return self._getAttrMap().has_key(key)

331

-+        return key in self._getAttrMap()

332

-

333

-     def __getitem__(self, key):

334

-         """tag[key] returns the value of the 'key' attribute for the tag,

335

-@@ -551,7 +551,7 @@

336

-     def __contains__(self, x):

337

-         return x in self.contents

338

-

339

--    def __nonzero__(self):

340

-+    def __bool__(self):

341

-         "A tag is non-None even if it has no contents."

342

-         return True

343

-

344

-@@ -577,14 +577,14 @@

345

-                 #We don't break because bad HTML can define the same

346

-                 #attribute multiple times.

347

-             self._getAttrMap()

348

--            if self.attrMap.has_key(key):

349

-+            if key in self.attrMap:

350

-                 del self.attrMap[key]

351

-

352

-     def __call__(self, *args, **kwargs):

353

-         """Calling a tag like a function is the same as calling its

354

-         findAll() method. Eg. tag('a') returns a list of all the A tags

355

-         found within this tag."""

356

--        return apply(self.findAll, args, kwargs)

357

-+        return self.findAll(*args, **kwargs)

358

-

359

-     def __getattr__(self, tag):

360

-         #print "Getattr %s.%s" % (self.__class__, tag)

361

-@@ -592,7 +592,7 @@

362

-             return self.find(tag[:-3])

363

-         elif tag.find('__') != 0:

364

-             return self.find(tag)

365

--        raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag)

366

-+        raise AttributeError("'%s' object has no attribute '%s'" % (self.__class__, tag))

367

-

368

-     def __eq__(self, other):

369

-         """Returns true iff this tag has the same name, the same attributes,

370

-@@ -868,7 +868,7 @@

371

-         if isinstance(markupName, Tag):

372

-             markup = markupName

373

-             markupAttrs = markup

374

--        callFunctionWithTagData = callable(self.name) \

375

-+        callFunctionWithTagData = hasattr(self.name, '__call__') \

376

-                                 and not isinstance(markupName, Tag)

377

-

378

-         if (not self.name) \

379

-@@ -880,7 +880,7 @@

380

-             else:

381

-                 match = True

382

-                 markupAttrMap = None

383

--                for attr, matchAgainst in self.attrs.items():

384

-+                for attr, matchAgainst in list(self.attrs.items()):

385

-                     if not markupAttrMap:

386

-                          if hasattr(markupAttrs, 'get'):

387

-                             markupAttrMap = markupAttrs

388

-@@ -921,16 +921,16 @@

389

-             if self._matches(markup, self.text):

390

-                 found = markup

391

-         else:

392

--            raise Exception, "I don't know how to match against a %s" \

393

--                  % markup.__class__

394

-+            raise Exception("I don't know how to match against a %s" \

395

-+                  % markup.__class__)

396

-         return found

397

-

398

-     def _matches(self, markup, matchAgainst):

399

-         #print "Matching %s against %s" % (markup, matchAgainst)

400

-         result = False

401

--        if matchAgainst == True and type(matchAgainst) == types.BooleanType:

402

-+        if matchAgainst == True and type(matchAgainst) == bool:

403

-             result = markup != None

404

--        elif callable(matchAgainst):

405

-+        elif hasattr(matchAgainst, '__call__'):

406

-             result = matchAgainst(markup)

407

-         else:

408

-             #Custom match methods take the tag as an argument, but all

409

-@@ -938,7 +938,7 @@

410

-             if isinstance(markup, Tag):

411

-                 markup = markup.name

412

-             if markup is not None and not isString(markup):

413

--                markup = unicode(markup)

414

-+                markup = str(markup)

415

-             #Now we know that chunk is either a string, or None.

416

-             if hasattr(matchAgainst, 'match'):

417

-                 # It's a regexp object.

418

-@@ -947,10 +947,10 @@

419

-                   and (markup is not None or not isString(matchAgainst))):

420

-                 result = markup in matchAgainst

421

-             elif hasattr(matchAgainst, 'items'):

422

--                result = markup.has_key(matchAgainst)

423

-+                result = matchAgainst in markup

424

-             elif matchAgainst and isString(markup):

425

--                if isinstance(markup, unicode):

426

--                    matchAgainst = unicode(matchAgainst)

427

-+                if isinstance(markup, str):

428

-+                    matchAgainst = str(matchAgainst)

429

-                 else:

430

-                     matchAgainst = str(matchAgainst)

431

-

432

-@@ -971,13 +971,13 @@

433

-     """Convenience method that works with all 2.x versions of Python

434

-     to determine whether or not something is listlike."""

435

-     return ((hasattr(l, '__iter__') and not isString(l))

436

--            or (type(l) in (types.ListType, types.TupleType)))

437

-+            or (type(l) in (list, tuple)))

438

-

439

- def isString(s):

440

-     """Convenience method that works with all 2.x versions of Python

441

-     to determine whether or not something is stringlike."""

442

-     try:

443

--        return isinstance(s, unicode) or isinstance(s, basestring)

444

-+        return isinstance(s, str) or isinstance(s, str)

445

-     except NameError:

446

-         return isinstance(s, str)

447

-

448

-@@ -989,7 +989,7 @@

449

-     for portion in args:

450

-         if hasattr(portion, 'items'):

451

-             #It's a map. Merge it.

452

--            for k,v in portion.items():

453

-+            for k,v in list(portion.items()):

454

-                 built[k] = v

455

-         elif isList(portion) and not isString(portion):

456

-             #It's a list. Map each item to the default.

457

-@@ -1034,7 +1034,7 @@

458

-         object, possibly one with a %SOUP-ENCODING% slot into which an

459

-         encoding will be plugged later."""

460

-         if text[:3] == "xml":

461

--            text = u"xml version='1.0' encoding='%SOUP-ENCODING%'"

462

-+            text = "xml version='1.0' encoding='%SOUP-ENCODING%'"

463

-         self._toStringSubclass(text, ProcessingInstruction)

464

-

465

-     def handle_comment(self, text):

466

-@@ -1044,7 +1044,7 @@

467

-     def handle_charref(self, ref):

468

-         "Handle character references as data."

469

-         if self.soup.convertEntities:

470

--            data = unichr(int(ref))

471

-+            data = chr(int(ref))

472

-         else:

473

-             data = '&#%s;' % ref

474

-         self.handle_data(data)

475

-@@ -1056,7 +1056,7 @@

476

-         data = None

477

-         if self.soup.convertHTMLEntities:

478

-             try:

479

--                data = unichr(name2codepoint[ref])

480

-+                data = chr(name2codepoint[ref])

481

-             except KeyError:

482

-                 pass

483

-

484

-@@ -1147,7 +1147,7 @@

485

-                        lambda x: '<!' + x.group(1) + '>')

486

-                       ]

487

-

488

--    ROOT_TAG_NAME = u'[document]'

489

-+    ROOT_TAG_NAME = '[document]'

490

-

491

-     HTML_ENTITIES = "html"

492

-     XML_ENTITIES = "xml"

493

-@@ -1236,14 +1236,14 @@

494

-     def _feed(self, inDocumentEncoding=None, isHTML=False):

495

-         # Convert the document to Unicode.

496

-         markup = self.markup

497

--        if isinstance(markup, unicode):

498

-+        if isinstance(markup, str):

499

-             if not hasattr(self, 'originalEncoding'):

500

-                 self.originalEncoding = None

501

-         else:

502

-             dammit = UnicodeDammit\

503

-                      (markup, [self.fromEncoding, inDocumentEncoding],

504

-                       smartQuotesTo=self.smartQuotesTo, isHTML=isHTML)

505

--            markup = dammit.unicode

506

-+            markup = dammit.str

507

-             self.originalEncoding = dammit.originalEncoding

508

-             self.declaredHTMLEncoding = dammit.declaredHTMLEncoding

509

-         if markup:

510

-@@ -1269,8 +1269,8 @@

511

-     def isSelfClosingTag(self, name):

512

-         """Returns true iff the given string is the name of a

513

-         self-closing tag according to this parser."""

514

--        return self.SELF_CLOSING_TAGS.has_key(name) \

515

--               or self.instanceSelfClosingTags.has_key(name)

516

-+        return name in self.SELF_CLOSING_TAGS \

517

-+               or name in self.instanceSelfClosingTags

518

-

519

-     def reset(self):

520

-         Tag.__init__(self, self, self.ROOT_TAG_NAME)

521

-@@ -1305,7 +1305,7 @@

522

-

523

-     def endData(self, containerClass=NavigableString):

524

-         if self.currentData:

525

--            currentData = u''.join(self.currentData)

526

-+            currentData = ''.join(self.currentData)

527

-             if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and

528

-                 not set([tag.name for tag in self.tagStack]).intersection(

529

-                     self.PRESERVE_WHITESPACE_TAGS)):

530

-@@ -1368,7 +1368,7 @@

531

-

532

-         nestingResetTriggers = self.NESTABLE_TAGS.get(name)

533

-         isNestable = nestingResetTriggers != None

534

--        isResetNesting = self.RESET_NESTING_TAGS.has_key(name)

535

-+        isResetNesting = name in self.RESET_NESTING_TAGS

536

-         popTo = None

537

-         inclusive = True

538

-         for i in range(len(self.tagStack)-1, 0, -1):

539

-@@ -1381,7 +1381,7 @@

540

-             if (nestingResetTriggers != None

541

-                 and p.name in nestingResetTriggers) \

542

-                 or (nestingResetTriggers == None and isResetNesting

543

--                    and self.RESET_NESTING_TAGS.has_key(p.name)):

544

-+                    and p.name in self.RESET_NESTING_TAGS):

545

-

546

-                 #If we encounter one of the nesting reset triggers

547

-                 #peculiar to this tag, or we encounter another tag

548

-@@ -1399,7 +1399,7 @@

549

-         if self.quoteStack:

550

-             #This is not a real tag.

551

-             #print "<%s> is not real!" % name

552

--            attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs))

553

-+            attrs = ''.join([' %s="%s"' % (x_y[0], x_y[1]) for x_y in attrs])

554

-             self.handle_data('<%s%s>' % (name, attrs))

555

-             return

556

-         self.endData()

557

-@@ -1493,7 +1493,7 @@

558

-     BeautifulStoneSoup before writing your own subclass."""

559

-

560

-     def __init__(self, *args, **kwargs):

561

--        if not kwargs.has_key('smartQuotesTo'):

562

-+        if 'smartQuotesTo' not in kwargs:

563

-             kwargs['smartQuotesTo'] = self.HTML_ENTITIES

564

-         kwargs['isHTML'] = True

565

-         BeautifulStoneSoup.__init__(self, *args, **kwargs)

566

-@@ -1677,7 +1677,7 @@

567

-             parent._getAttrMap()

568

-             if (isinstance(tag, Tag) and len(tag.contents) == 1 and

569

-                 isinstance(tag.contents[0], NavigableString) and

570

--                not parent.attrMap.has_key(tag.name)):

571

-+                tag.name not in parent.attrMap):

572

-                 parent[tag.name] = tag.contents[0]

573

-         BeautifulStoneSoup.popTag(self)

574

-

575

-@@ -1751,9 +1751,9 @@

576

-                      self._detectEncoding(markup, isHTML)

577

-         self.smartQuotesTo = smartQuotesTo

578

-         self.triedEncodings = []

579

--        if markup == '' or isinstance(markup, unicode):

580

-+        if markup == '' or isinstance(markup, str):

581

-             self.originalEncoding = None

582

--            self.unicode = unicode(markup)

583

-+            self.str = str(markup)

584

-             return

585

-

586

-         u = None

587

-@@ -1766,7 +1766,7 @@

588

-                 if u: break

589

-

590

-         # If no luck and we have auto-detection library, try that:

591

--        if not u and chardet and not isinstance(self.markup, unicode):

592

-+        if not u and chardet and not isinstance(self.markup, str):

593

-             u = self._convertFrom(chardet.detect(self.markup)['encoding'])

594

-

595

-         # As a last resort, try utf-8 and windows-1252:

596

-@@ -1775,7 +1775,7 @@

597

-                 u = self._convertFrom(proposed_encoding)

598

-                 if u: break

599

-

600

--        self.unicode = u

601

-+        self.str = u

602

-         if not u: self.originalEncoding = None

603

-

604

-     def _subMSChar(self, match):

605

-@@ -1783,7 +1783,7 @@

606

-         entity."""

607

-         orig = match.group(1)

608

-         sub = self.MS_CHARS.get(orig)

609

--        if type(sub) == types.TupleType:

610

-+        if type(sub) == tuple:

611

-             if self.smartQuotesTo == 'xml':

612

-                 sub = '&#x'.encode() + sub[1].encode() + ';'.encode()

613

-             else:

614

-@@ -1804,7 +1804,7 @@

615

-         if self.smartQuotesTo and proposed.lower() in("windows-1252",

616

-                                                       "iso-8859-1",

617

-                                                       "iso-8859-2"):

618

--            smart_quotes_re = "([\x80-\x9f])"

619

-+            smart_quotes_re = b"([\x80-\x9f])"

620

-             smart_quotes_compiled = re.compile(smart_quotes_re)

621

-             markup = smart_quotes_compiled.sub(self._subMSChar, markup)

622

-

623

-@@ -1813,7 +1813,7 @@

624

-             u = self._toUnicode(markup, proposed)

625

-             self.markup = u

626

-             self.originalEncoding = proposed

627

--        except Exception, e:

628

-+        except Exception as e:

629

-             # print "That didn't work!"

630

-             # print e

631

-             return None

632

-@@ -1842,7 +1842,7 @@

633

-         elif data[:4] == '\xff\xfe\x00\x00':

634

-             encoding = 'utf-32le'

635

-             data = data[4:]

636

--        newdata = unicode(data, encoding)

637

-+        newdata = str(data, encoding)

638

-         return newdata

639

-

640

-     def _detectEncoding(self, xml_data, isHTML=False):

641

-@@ -1855,41 +1855,41 @@

642

-             elif xml_data[:4] == '\x00\x3c\x00\x3f':

643

-                 # UTF-16BE

644

-                 sniffed_xml_encoding = 'utf-16be'

645

--                xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')

646

-+                xml_data = str(xml_data, 'utf-16be').encode('utf-8')

647

-             elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \

648

-                      and (xml_data[2:4] != '\x00\x00'):

649

-                 # UTF-16BE with BOM

650

-                 sniffed_xml_encoding = 'utf-16be'

651

--                xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')

652

-+                xml_data = str(xml_data[2:], 'utf-16be').encode('utf-8')

653

-             elif xml_data[:4] == '\x3c\x00\x3f\x00':

654

-                 # UTF-16LE

655

-                 sniffed_xml_encoding = 'utf-16le'

656

--                xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')

657

-+                xml_data = str(xml_data, 'utf-16le').encode('utf-8')

658

-             elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \

659

-                      (xml_data[2:4] != '\x00\x00'):

660

-                 # UTF-16LE with BOM

661

-                 sniffed_xml_encoding = 'utf-16le'

662

--                xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')

663

-+                xml_data = str(xml_data[2:], 'utf-16le').encode('utf-8')

664

-             elif xml_data[:4] == '\x00\x00\x00\x3c':

665

-                 # UTF-32BE

666

-                 sniffed_xml_encoding = 'utf-32be'

667

--                xml_data = unicode(xml_data, 'utf-32be').encode('utf-8')

668

-+                xml_data = str(xml_data, 'utf-32be').encode('utf-8')

669

-             elif xml_data[:4] == '\x3c\x00\x00\x00':

670

-                 # UTF-32LE

671

-                 sniffed_xml_encoding = 'utf-32le'

672

--                xml_data = unicode(xml_data, 'utf-32le').encode('utf-8')

673

-+                xml_data = str(xml_data, 'utf-32le').encode('utf-8')

674

-             elif xml_data[:4] == '\x00\x00\xfe\xff':

675

-                 # UTF-32BE with BOM

676

-                 sniffed_xml_encoding = 'utf-32be'

677

--                xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8')

678

-+                xml_data = str(xml_data[4:], 'utf-32be').encode('utf-8')

679

-             elif xml_data[:4] == '\xff\xfe\x00\x00':

680

-                 # UTF-32LE with BOM

681

-                 sniffed_xml_encoding = 'utf-32le'

682

--                xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8')

683

-+                xml_data = str(xml_data[4:], 'utf-32le').encode('utf-8')

684

-             elif xml_data[:3] == '\xef\xbb\xbf':

685

-                 # UTF-8 with BOM

686

-                 sniffed_xml_encoding = 'utf-8'

687

--                xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8')

688

-+                xml_data = str(xml_data[3:], 'utf-8').encode('utf-8')

689

-             else:

690

-                 sniffed_xml_encoding = 'ascii'

691

-                 pass

692

-@@ -1954,41 +1954,41 @@

693

-                     250,251,252,253,254,255)

694

-             import string

695

-             c.EBCDIC_TO_ASCII_MAP = string.maketrans( \

696

--            ''.join(map(chr, range(256))), ''.join(map(chr, emap)))

697

-+            ''.join(map(chr, list(range(256)))), ''.join(map(chr, emap)))

698

-         return s.translate(c.EBCDIC_TO_ASCII_MAP)

699

-

700

--    MS_CHARS = { '\x80' : ('euro', '20AC'),

701

--                 '\x81' : ' ',

702

--                 '\x82' : ('sbquo', '201A'),

703

--                 '\x83' : ('fnof', '192'),

704

--                 '\x84' : ('bdquo', '201E'),

705

--                 '\x85' : ('hellip', '2026'),

706

--                 '\x86' : ('dagger', '2020'),

707

--                 '\x87' : ('Dagger', '2021'),

708

--                 '\x88' : ('circ', '2C6'),

709

--                 '\x89' : ('permil', '2030'),

710

--                 '\x8A' : ('Scaron', '160'),

711

--                 '\x8B' : ('lsaquo', '2039'),

712

--                 '\x8C' : ('OElig', '152'),

713

--                 '\x8D' : '?',

714

--                 '\x8E' : ('#x17D', '17D'),

715

--                 '\x8F' : '?',

716

--                 '\x90' : '?',

717

--                 '\x91' : ('lsquo', '2018'),

718

--                 '\x92' : ('rsquo', '2019'),

719

--                 '\x93' : ('ldquo', '201C'),

720

--                 '\x94' : ('rdquo', '201D'),

721

--                 '\x95' : ('bull', '2022'),

722

--                 '\x96' : ('ndash', '2013'),

723

--                 '\x97' : ('mdash', '2014'),

724

--                 '\x98' : ('tilde', '2DC'),

725

--                 '\x99' : ('trade', '2122'),

726

--                 '\x9a' : ('scaron', '161'),

727

--                 '\x9b' : ('rsaquo', '203A'),

728

--                 '\x9c' : ('oelig', '153'),

729

--                 '\x9d' : '?',

730

--                 '\x9e' : ('#x17E', '17E'),

731

--                 '\x9f' : ('Yuml', ''),}

732

-+    MS_CHARS = { b'\x80' : ('euro', '20AC'),

733

-+                 b'\x81' : ' ',

734

-+                 b'\x82' : ('sbquo', '201A'),

735

-+                 b'\x83' : ('fnof', '192'),

736

-+                 b'\x84' : ('bdquo', '201E'),

737

-+                 b'\x85' : ('hellip', '2026'),

738

-+                 b'\x86' : ('dagger', '2020'),

739

-+                 b'\x87' : ('Dagger', '2021'),

740

-+                 b'\x88' : ('circ', '2C6'),

741

-+                 b'\x89' : ('permil', '2030'),

742

-+                 b'\x8A' : ('Scaron', '160'),

743

-+                 b'\x8B' : ('lsaquo', '2039'),

744

-+                 b'\x8C' : ('OElig', '152'),

745

-+                 b'\x8D' : '?',

746

-+                 b'\x8E' : ('#x17D', '17D'),

747

-+                 b'\x8F' : '?',

748

-+                 b'\x90' : '?',

749

-+                 b'\x91' : ('lsquo', '2018'),

750

-+                 b'\x92' : ('rsquo', '2019'),

751

-+                 b'\x93' : ('ldquo', '201C'),

752

-+                 b'\x94' : ('rdquo', '201D'),

753

-+                 b'\x95' : ('bull', '2022'),

754

-+                 b'\x96' : ('ndash', '2013'),

755

-+                 b'\x97' : ('mdash', '2014'),

756

-+                 b'\x98' : ('tilde', '2DC'),

757

-+                 b'\x99' : ('trade', '2122'),

758

-+                 b'\x9a' : ('scaron', '161'),

759

-+                 b'\x9b' : ('rsaquo', '203A'),

760

-+                 b'\x9c' : ('oelig', '153'),

761

-+                 b'\x9d' : '?',

762

-+                 b'\x9e' : ('#x17E', '17E'),

763

-+                 b'\x9f' : ('Yuml', ''),}

764

-

765

- #######################################################################

766

-

767

-@@ -1997,4 +1997,4 @@

768

- if __name__ == '__main__':

769

-     import sys

770

-     soup = BeautifulSoup(sys.stdin)

771

--    print soup.prettify()

772

-+    print(soup.prettify())

773

---- BeautifulSoupTests.py

774

-+++ BeautifulSoupTests.py

775

-@@ -82,7 +82,7 @@

776

-     def testFindAllText(self):

777

-         soup = BeautifulSoup("<html>\xbb</html>")

778

-         self.assertEqual(soup.findAll(text=re.compile('.*')),

779

--                         [u'\xbb'])

780

-+                         ['\xbb'])

781

-

782

-     def testFindAllByRE(self):

783

-         import re

784

-@@ -215,7 +215,7 @@

785

-         soup = BeautifulSoup(self.x, parseOnlyThese=strainer)

786

-         self.assertEquals(len(soup), 10)

787

-

788

--        strainer = SoupStrainer(text=lambda(x):x[8]=='3')

789

-+        strainer = SoupStrainer(text=lambda x:x[8]=='3')

790

-         soup = BeautifulSoup(self.x, parseOnlyThese=strainer)

791

-         self.assertEquals(len(soup), 3)

792

-

793

-@@ -256,7 +256,7 @@

794

-         self.assertEqual(copied.decode(), self.soup.decode())

795

-

796

-     def testUnicodePickle(self):

797

--        import cPickle as pickle

798

-+        import pickle as pickle

799

-         html = "<b>" + chr(0xc3) + "</b>"

800

-         soup = BeautifulSoup(html)

801

-         dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL)

802

-@@ -586,23 +586,23 @@

803

-         self.assertEquals(soup.decode(), "<<sacr&eacute; bleu!>>")

804

-

805

-         soup = BeautifulStoneSoup(text, convertEntities=htmlEnt)

806

--        self.assertEquals(soup.decode(), u"<<sacr\xe9 bleu!>>")

807

-+        self.assertEquals(soup.decode(), "<<sacr\xe9 bleu!>>")

808

-

809

-         # Make sure the "XML", "HTML", and "XHTML" settings work.

810

-         text = "&lt;&trade;&apos;"

811

-         soup = BeautifulStoneSoup(text, convertEntities=xmlEnt)

812

--        self.assertEquals(soup.decode(), u"<&trade;'")

813

-+        self.assertEquals(soup.decode(), "<&trade;'")

814

-

815

-         soup = BeautifulStoneSoup(text, convertEntities=htmlEnt)

816

--        self.assertEquals(soup.decode(), u"<\u2122&apos;")

817

-+        self.assertEquals(soup.decode(), "<\u2122&apos;")

818

-

819

-         soup = BeautifulStoneSoup(text, convertEntities=xhtmlEnt)

820

--        self.assertEquals(soup.decode(), u"<\u2122'")

821

-+        self.assertEquals(soup.decode(), "<\u2122'")

822

-

823

-     def testNonBreakingSpaces(self):

824

-         soup = BeautifulSoup("<a>&nbsp;&nbsp;</a>",

825

-                              convertEntities=BeautifulStoneSoup.HTML_ENTITIES)

826

--        self.assertEquals(soup.decode(), u"<a>\xa0\xa0</a>")

827

-+        self.assertEquals(soup.decode(), "<a>\xa0\xa0</a>")

828

-

829

-     def testWhitespaceInDeclaration(self):

830

-         self.assertSoupEquals('<! DOCTYPE>', '<!DOCTYPE>')

831

-@@ -617,27 +617,27 @@

832

-         self.assertSoupEquals('<b>hello&nbsp;there</b>')

833

-

834

-     def testEntitiesInAttributeValues(self):

835

--        self.assertSoupEquals('<x t="x&#241;">', '<x t="x\xc3\xb1"></x>',

836

-+        self.assertSoupEquals('<x t="x&#241;">', b'<x t="x\xc3\xb1"></x>',

837

-                               encoding='utf-8')

838

--        self.assertSoupEquals('<x t="x&#xf1;">', '<x t="x\xc3\xb1"></x>',

839

-+        self.assertSoupEquals('<x t="x&#xf1;">', b'<x t="x\xc3\xb1"></x>',

840

-                               encoding='utf-8')

841

-

842

-         soup = BeautifulSoup('<x t="&gt;&trade;">',

843

-                              convertEntities=BeautifulStoneSoup.HTML_ENTITIES)

844

--        self.assertEquals(soup.decode(), u'<x t="&gt;\u2122"></x>')

845

-+        self.assertEquals(soup.decode(), '<x t="&gt;\u2122"></x>')

846

-

847

-         uri = "http://crummy.com?sacr&eacute;&amp;bleu"

848

-         link = '<a href="%s"></a>' % uri

849

-

850

-         soup = BeautifulSoup(link, convertEntities=BeautifulSoup.HTML_ENTITIES)

851

-         self.assertEquals(soup.decode(),

852

--                          link.replace("&eacute;", u"\xe9"))

853

-+                          link.replace("&eacute;", "\xe9"))

854

-

855

-         uri = "http://crummy.com?sacr&eacute;&bleu"

856

-         link = '<a href="%s"></a>' % uri

857

-         soup = BeautifulSoup(link, convertEntities=BeautifulSoup.HTML_ENTITIES)

858

-         self.assertEquals(soup.a['href'],

859

--                          uri.replace("&eacute;", u"\xe9"))

860

-+                          uri.replace("&eacute;", "\xe9"))

861

-

862

-     def testNakedAmpersands(self):

863

-         html = {'convertEntities':BeautifulStoneSoup.HTML_ENTITIES}

864

-@@ -663,13 +663,13 @@

865

-     smart quote fixes."""

866

-

867

-     def testUnicodeDammitStandalone(self):

868

--        markup = "<foo>\x92</foo>"

869

-+        markup = b"<foo>\x92</foo>"

870

-         dammit = UnicodeDammit(markup)

871

--        self.assertEquals(dammit.unicode, "<foo>&#x2019;</foo>")

872

-+        self.assertEquals(dammit.str, "<foo>&#x2019;</foo>")

873

-

874

--        hebrew = "\xed\xe5\xec\xf9"

875

-+        hebrew = b"\xed\xe5\xec\xf9"

876

-         dammit = UnicodeDammit(hebrew, ["iso-8859-8"])

877

--        self.assertEquals(dammit.unicode, u'\u05dd\u05d5\u05dc\u05e9')

878

-+        self.assertEquals(dammit.str, '\u05dd\u05d5\u05dc\u05e9')

879

-         self.assertEquals(dammit.originalEncoding, 'iso-8859-8')

880

-

881

-     def testGarbageInGarbageOut(self):

882

-@@ -677,13 +677,13 @@

883

-         asciiSoup = BeautifulStoneSoup(ascii)

884

-         self.assertEquals(ascii, asciiSoup.decode())

885

-

886

--        unicodeData = u"<foo>\u00FC</foo>"

887

-+        unicodeData = "<foo>\u00FC</foo>"

888

-         utf8 = unicodeData.encode("utf-8")

889

--        self.assertEquals(utf8, '<foo>\xc3\xbc</foo>')

890

-+        self.assertEquals(utf8, b'<foo>\xc3\xbc</foo>')

891

-

892

-         unicodeSoup = BeautifulStoneSoup(unicodeData)

893

-         self.assertEquals(unicodeData, unicodeSoup.decode())

894

--        self.assertEquals(unicodeSoup.foo.string, u'\u00FC')

895

-+        self.assertEquals(unicodeSoup.foo.string, '\u00FC')

896

-

897

-         utf8Soup = BeautifulStoneSoup(utf8, fromEncoding='utf-8')

898

-         self.assertEquals(utf8, utf8Soup.encode('utf-8'))

899

-@@ -696,18 +696,18 @@

900

-

901

-     def testHandleInvalidCodec(self):

902

-         for bad_encoding in ['.utf8', '...', 'utF---16.!']:

903

--            soup = BeautifulSoup(u"Räksmörgås".encode("utf-8"),

904

-+            soup = BeautifulSoup("Räksmörgås".encode("utf-8"),

905

-                                  fromEncoding=bad_encoding)

906

-             self.assertEquals(soup.originalEncoding, 'utf-8')

907

-

908

-     def testUnicodeSearch(self):

909

--        html = u'<html><body><h1>Räksmörgås</h1></body></html>'

910

-+        html = '<html><body><h1>Räksmörgås</h1></body></html>'

911

-         soup = BeautifulSoup(html)

912

--        self.assertEqual(soup.find(text=u'Räksmörgås'),u'Räksmörgås')

913

-+        self.assertEqual(soup.find(text='Räksmörgås'),'Räksmörgås')

914

-

915

-     def testRewrittenXMLHeader(self):

916

--        euc_jp = '<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n'

917

--        utf8 = "<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n"

918

-+        euc_jp = b'<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n'

919

-+        utf8 = b"<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n"

920

-         soup = BeautifulStoneSoup(euc_jp)

921

-         if soup.originalEncoding != "euc-jp":

922

-             raise Exception("Test failed when parsing euc-jp document. "

923

-@@ -718,12 +718,12 @@

924

-         self.assertEquals(soup.originalEncoding, "euc-jp")

925

-         self.assertEquals(soup.renderContents('utf-8'), utf8)

926

-

927

--        old_text = "<?xml encoding='windows-1252'><foo>\x92</foo>"

928

-+        old_text = b"<?xml encoding='windows-1252'><foo>\x92</foo>"

929

-         new_text = "<?xml version='1.0' encoding='utf-8'?><foo>&rsquo;</foo>"

930

-         self.assertSoupEquals(old_text, new_text)

931

-

932

-     def testRewrittenMetaTag(self):

933

--        no_shift_jis_html = '''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>'''

934

-+        no_shift_jis_html = b'''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>'''

935

-         soup = BeautifulSoup(no_shift_jis_html)

936

-

937

-         # Beautiful Soup used to try to rewrite the meta tag even if the

938

-@@ -733,16 +733,16 @@

939

-         soup = BeautifulSoup(no_shift_jis_html, parseOnlyThese=strainer)

940

-         self.assertEquals(soup.contents[0].name, 'pre')

941

-

942

--        meta_tag = ('<meta content="text/html; charset=x-sjis" '

943

--                    'http-equiv="Content-type" />')

944

-+        meta_tag = (b'<meta content="text/html; charset=x-sjis" '

945

-+                    b'http-equiv="Content-type" />')

946

-         shift_jis_html = (

947

--            '<html><head>\n%s\n'

948

--            '<meta http-equiv="Content-language" content="ja" />'

949

--            '</head><body><pre>\n'

950

--            '\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'

951

--            '\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'

952

--            '\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n'

953

--            '</pre></body></html>') % meta_tag

954

-+            b'<html><head>\n' + meta_tag + b'\n'

955

-+            b'<meta http-equiv="Content-language" content="ja" />'

956

-+            b'</head><body><pre>\n'

957

-+            b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'

958

-+            b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'

959

-+            b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n'

960

-+            b'</pre></body></html>')

961

-         soup = BeautifulSoup(shift_jis_html)

962

-         if soup.originalEncoding != "shift-jis":

963

-             raise Exception("Test failed when parsing shift-jis document "

964

-@@ -755,59 +755,59 @@

965

-         content_type_tag = soup.meta['content']

966

-         self.assertEquals(content_type_tag[content_type_tag.find('charset='):],

967

-                           'charset=%SOUP-ENCODING%')

968

--        content_type = str(soup.meta)

969

-+        content_type = soup.meta.decode()

970

-         index = content_type.find('charset=')

971

-         self.assertEqual(content_type[index:index+len('charset=utf8')+1],

972

-                          'charset=utf-8')

973

-         content_type = soup.meta.encode('shift-jis')

974

--        index = content_type.find('charset=')

975

-+        index = content_type.find(b'charset=')

976

-         self.assertEqual(content_type[index:index+len('charset=shift-jis')],

977

-                          'charset=shift-jis'.encode())

978

-

979

-         self.assertEquals(soup.encode('utf-8'), (

980

--                '<html><head>\n'

981

--                '<meta content="text/html; charset=utf-8" '

982

--                'http-equiv="Content-type" />\n'

983

--                '<meta http-equiv="Content-language" content="ja" />'

984

--                '</head><body><pre>\n'

985

--                '\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3'

986

--                '\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3'

987

--                '\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6'

988

--                '\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3'

989

--                '\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n'

990

--                '</pre></body></html>'))

991

-+                b'<html><head>\n'

992

-+                b'<meta content="text/html; charset=utf-8" '

993

-+                b'http-equiv="Content-type" />\n'

994

-+                b'<meta http-equiv="Content-language" content="ja" />'

995

-+                b'</head><body><pre>\n'

996

-+                b'\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3'

997

-+                b'\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3'

998

-+                b'\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6'

999

-+                b'\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3'

1000

-+                b'\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n'

1001

-+                b'</pre></body></html>'))

1002

-         self.assertEquals(soup.encode("shift-jis"),

1003

-                           shift_jis_html.replace('x-sjis'.encode(),

1004

-                                                  'shift-jis'.encode()))

1005

-

1006

--        isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""

1007

-+        isolatin = b"""<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""

1008

-         soup = BeautifulSoup(isolatin)

1009

-

1010

-         utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode())

1011

--        utf8 = utf8.replace("\xe9", "\xc3\xa9")

1012

-+        utf8 = utf8.replace(b"\xe9", b"\xc3\xa9")

1013

-         self.assertSoupEquals(soup.encode("utf-8"), utf8, encoding='utf-8')

1014

-

1015

-     def testHebrew(self):

1016

--        iso_8859_8= '<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n'

1017

--        utf8 = '<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n'

1018

-+        iso_8859_8= b'<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n'

1019

-+        utf8 = b'<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n'

1020

-         soup = BeautifulStoneSoup(iso_8859_8, fromEncoding="iso-8859-8")

1021

-         self.assertEquals(soup.encode('utf-8'), utf8)

1022

-

1023

-     def testSmartQuotesNotSoSmartAnymore(self):

1024

--        self.assertSoupEquals("\x91Foo\x92 <!--blah-->",

1025

-+        self.assertSoupEquals(b"\x91Foo\x92 <!--blah-->",

1026

-                               '&lsquo;Foo&rsquo; <!--blah-->')

1027

-

1028

-     def testDontConvertSmartQuotesWhenAlsoConvertingEntities(self):

1029

--        smartQuotes = "Il a dit, \x8BSacr&eacute; bl&#101;u!\x9b"

1030

-+        smartQuotes = b"Il a dit, \x8BSacr&eacute; bl&#101;u!\x9b"

1031

-         soup = BeautifulSoup(smartQuotes)

1032

-         self.assertEquals(soup.decode(),

1033

-                           'Il a dit, &lsaquo;Sacr&eacute; bl&#101;u!&rsaquo;')

1034

-         soup = BeautifulSoup(smartQuotes, convertEntities="html")

1035

-         self.assertEquals(soup.encode('utf-8'),

1036

--                          'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba')

1037

-+                          b'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba')

1038

-

1039

-     def testDontSeeSmartQuotesWhereThereAreNone(self):

1040

--        utf_8 = "\343\202\261\343\203\274\343\202\277\343\202\244 Watch"

1041

-+        utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"

1042

-         self.assertSoupEquals(utf_8, encoding='utf-8')

1043

-

1044

-

1045

---- setup.py

1046

-+++ setup.py

1047

-@@ -19,19 +19,19 @@

1048

- suite = loader.loadTestsFromModule(BeautifulSoupTests)

1049

- suite.run(result)

1050

- if not result.wasSuccessful():

1051

--    print "Unit tests have failed!"

1052

-+    print("Unit tests have failed!")

1053

-     for l in result.errors, result.failures:

1054

-         for case, error in l:

1055

--            print "-" * 80

1056

-+            print("-" * 80)

1057

-             desc = case.shortDescription()

1058

-             if desc:

1059

--                print desc

1060

--            print error        

1061

--    print '''If you see an error like: "'ascii' codec can't encode character...", see\nthe Beautiful Soup documentation:\n http://www.crummy.com/software/BeautifulSoup/documentation.html#Why%20can't%20Beautiful%20Soup%20print%20out%20the%20non-ASCII%20characters%20I%20gave%20it?'''

1062

--    print "This might or might not be a problem depending on what you plan to do with\nBeautiful Soup."

1063

-+                print(desc)

1064

-+            print(error)        

1065

-+    print('''If you see an error like: "'ascii' codec can't encode character...", see\nthe Beautiful Soup documentation:\n http://www.crummy.com/software/BeautifulSoup/documentation.html#Why%20can't%20Beautiful%20Soup%20print%20out%20the%20non-ASCII%20characters%20I%20gave%20it?''')

1066

-+    print("This might or might not be a problem depending on what you plan to do with\nBeautiful Soup.")

1067

-     if sys.argv[1] == 'sdist':

1068

--        print

1069

--        print "I'm not going to make a source distribution since the tests don't pass."

1070

-+        print()

1071

-+        print("I'm not going to make a source distribution since the tests don't pass.")

1072

-         sys.exit(1)

1073

-

1074

- setup(name="BeautifulSoup",

1075

1076

diff --git a/profiles/package.mask b/profiles/package.mask

1077

index 6d5d46bffac..81ca42e20b7 100644

1078

--- a/profiles/package.mask

1079

+++ b/profiles/package.mask

1080

@@ -481,11 +481,6 @@ media-plugins/vdr-skinnopacity

1081

 # #623706. Removal in a month.

1082

 net-libs/dhcpcd-dbus

1083

1084

-# Pacho Ramos <pacho@g.o> (14 Jul 2017)

1085

-# Not compatible with python >= 3.5 but neither needed by anything in the

1086

-# tree anymore, bug #624670. Removal in a month.

1087

-=dev-python/beautifulsoup-3.1.0.1-r2

1088

-

1089

 # Lars Wendler <polynomial-c@g.o> (07 Jul 2017)

1090

 # Masked until >=net-fs/samba-4.7 is in the tree and 

1091

 # unmasked. (bug #624106)

Gentoo Archives: gentoo-commits