Gentoo Archives: gentoo-commits

From: Brian Dolbec <dolsen@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/portage:repoman commit in: pym/repoman/modules/scan/metadata/
Date: Tue, 03 May 2016 09:33:53
Message-Id: 1462266750.75b897d91874c505e14dc2c0808c4cad4268ad76.dolsen@gentoo
1 commit: 75b897d91874c505e14dc2c0808c4cad4268ad76
2 Author: Brian Dolbec <dolsen <AT> gentoo <DOT> org>
3 AuthorDate: Tue May 3 07:18:05 2016 +0000
4 Commit: Brian Dolbec <dolsen <AT> gentoo <DOT> org>
5 CommitDate: Tue May 3 09:12:30 2016 +0000
6 URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=75b897d9
7
8 repoman: Use lxml for parsing of metadata
9
10 Note that we no longer throw a QA error for a missing XML prolog, as long as
11 the encoding matches the default ('UTF-8'; lowercase is also allowed).
12
13 pym/repoman/modules/scan/metadata/pkgmetadata.py | 44 +++++++-----------------
14 1 file changed, 12 insertions(+), 32 deletions(-)
15
16 diff --git a/pym/repoman/modules/scan/metadata/pkgmetadata.py b/pym/repoman/modules/scan/metadata/pkgmetadata.py
17 index e8db92f..22afddf 100644
18 --- a/pym/repoman/modules/scan/metadata/pkgmetadata.py
19 +++ b/pym/repoman/modules/scan/metadata/pkgmetadata.py
20 @@ -7,8 +7,8 @@ import sys
21 from itertools import chain
22
23 try:
24 - import xml.etree.ElementTree
25 - from xml.parsers.expat import ExpatError
26 + from lxml import etree
27 + from lxml.etree import ParserError
28 except (SystemExit, KeyboardInterrupt):
29 raise
30 except (ImportError, SystemError, RuntimeError, Exception):
31 @@ -26,12 +26,11 @@ from repoman._portage import portage
32 from repoman.metadata import metadata_dtd_uri
33 from repoman.checks.herds.herdbase import get_herd_base
34 from repoman.checks.herds.metadata import check_metadata, UnknownHerdsError
35 -from repoman._xml import _XMLParser, _MetadataTreeBuilder, XmlLint
36 +from repoman._xml import XmlLint
37 from repoman.modules.scan.scanbase import ScanBase
38
39 from portage.exception import InvalidAtom
40 from portage import os
41 -from portage import _encodings, _unicode_encode
42 from portage import exception
43 from portage.dep import Atom
44
45 @@ -141,50 +140,31 @@ class PkgMetadata(ScanBase, USEFlagChecks):
46
47 # metadata.xml parse check
48 metadata_bad = False
49 - xml_info = {}
50 - xml_parser = _XMLParser(xml_info, target=_MetadataTreeBuilder())
51
52 # read metadata.xml into memory
53 try:
54 - _metadata_xml = xml.etree.ElementTree.parse(
55 - _unicode_encode(
56 - os.path.join(checkdir, "metadata.xml"),
57 - encoding=_encodings['fs'], errors='strict'),
58 - parser=xml_parser)
59 - except (ExpatError, SyntaxError, EnvironmentError) as e:
60 + _metadata_xml = etree.parse(os.path.join(checkdir, 'metadata.xml'))
61 + except (ParserError, SyntaxError, EnvironmentError) as e:
62 metadata_bad = True
63 self.qatracker.add_error("metadata.bad", "%s/metadata.xml: %s" % (xpkg, e))
64 del e
65 self.muselist = frozenset(self.musedict)
66 return False
67
68 - if "XML_DECLARATION" not in xml_info:
69 + xml_encoding = _metadata_xml.docinfo.encoding
70 + if xml_encoding.upper() != metadata_xml_encoding:
71 self.qatracker.add_error(
72 "metadata.bad", "%s/metadata.xml: "
73 - "xml declaration is missing on first line, "
74 - "should be '%s'" % (xpkg, metadata_xml_declaration))
75 - else:
76 - xml_version, xml_encoding, xml_standalone = \
77 - xml_info["XML_DECLARATION"]
78 - if xml_encoding is None or \
79 - xml_encoding.upper() != metadata_xml_encoding:
80 - if xml_encoding is None:
81 - encoding_problem = "but it is undefined"
82 - else:
83 - encoding_problem = "not '%s'" % xml_encoding
84 - self.qatracker.add_error(
85 - "metadata.bad", "%s/metadata.xml: "
86 - "xml declaration encoding should be '%s', %s" %
87 - (xpkg, metadata_xml_encoding, encoding_problem))
88 + "xml declaration encoding should be '%s', not '%s'" %
89 + (xpkg, metadata_xml_encoding, xml_encoding))
90
91 - if "DOCTYPE" not in xml_info:
92 + if not _metadata_xml.docinfo:
93 metadata_bad = True
94 self.qatracker.add_error(
95 "metadata.bad",
96 "%s/metadata.xml: %s" % (xpkg, "DOCTYPE is missing"))
97 else:
98 - doctype_name, doctype_system, doctype_pubid = \
99 - xml_info["DOCTYPE"]
100 + doctype_system = _metadata_xml.docinfo.system_url
101 if doctype_system != metadata_dtd_uri:
102 if doctype_system is None:
103 system_problem = "but it is undefined"
104 @@ -194,7 +174,7 @@ class PkgMetadata(ScanBase, USEFlagChecks):
105 "metadata.bad", "%s/metadata.xml: "
106 "DOCTYPE: SYSTEM should refer to '%s', %s" %
107 (xpkg, metadata_dtd_uri, system_problem))
108 -
109 + doctype_name = _metadata_xml.docinfo.doctype.split(' ')[1]
110 if doctype_name != metadata_doctype_name:
111 self.qatracker.add_error(
112 "metadata.bad", "%s/metadata.xml: "