1 |
commit: 75b897d91874c505e14dc2c0808c4cad4268ad76 |
2 |
Author: Brian Dolbec <dolsen <AT> gentoo <DOT> org> |
3 |
AuthorDate: Tue May 3 07:18:05 2016 +0000 |
4 |
Commit: Brian Dolbec <dolsen <AT> gentoo <DOT> org> |
5 |
CommitDate: Tue May 3 09:12:30 2016 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=75b897d9 |
7 |
|
8 |
repoman: Use lxml for parsing of metadata |
9 |
|
10 |
Note that we no longer throw a QA error for a missing XML prolog, as long as |
11 |
the encoding matches the default ('UTF-8'; lowercase is also allowed). |
12 |
|
13 |
pym/repoman/modules/scan/metadata/pkgmetadata.py | 44 +++++++----------------- |
14 |
1 file changed, 12 insertions(+), 32 deletions(-) |
15 |
|
16 |
diff --git a/pym/repoman/modules/scan/metadata/pkgmetadata.py b/pym/repoman/modules/scan/metadata/pkgmetadata.py |
17 |
index e8db92f..22afddf 100644 |
18 |
--- a/pym/repoman/modules/scan/metadata/pkgmetadata.py |
19 |
+++ b/pym/repoman/modules/scan/metadata/pkgmetadata.py |
20 |
@@ -7,8 +7,8 @@ import sys |
21 |
from itertools import chain |
22 |
|
23 |
try: |
24 |
- import xml.etree.ElementTree |
25 |
- from xml.parsers.expat import ExpatError |
26 |
+ from lxml import etree |
27 |
+ from lxml.etree import ParserError |
28 |
except (SystemExit, KeyboardInterrupt): |
29 |
raise |
30 |
except (ImportError, SystemError, RuntimeError, Exception): |
31 |
@@ -26,12 +26,11 @@ from repoman._portage import portage |
32 |
from repoman.metadata import metadata_dtd_uri |
33 |
from repoman.checks.herds.herdbase import get_herd_base |
34 |
from repoman.checks.herds.metadata import check_metadata, UnknownHerdsError |
35 |
-from repoman._xml import _XMLParser, _MetadataTreeBuilder, XmlLint |
36 |
+from repoman._xml import XmlLint |
37 |
from repoman.modules.scan.scanbase import ScanBase |
38 |
|
39 |
from portage.exception import InvalidAtom |
40 |
from portage import os |
41 |
-from portage import _encodings, _unicode_encode |
42 |
from portage import exception |
43 |
from portage.dep import Atom |
44 |
|
45 |
@@ -141,50 +140,31 @@ class PkgMetadata(ScanBase, USEFlagChecks): |
46 |
|
47 |
# metadata.xml parse check |
48 |
metadata_bad = False |
49 |
- xml_info = {} |
50 |
- xml_parser = _XMLParser(xml_info, target=_MetadataTreeBuilder()) |
51 |
|
52 |
# read metadata.xml into memory |
53 |
try: |
54 |
- _metadata_xml = xml.etree.ElementTree.parse( |
55 |
- _unicode_encode( |
56 |
- os.path.join(checkdir, "metadata.xml"), |
57 |
- encoding=_encodings['fs'], errors='strict'), |
58 |
- parser=xml_parser) |
59 |
- except (ExpatError, SyntaxError, EnvironmentError) as e: |
60 |
+ _metadata_xml = etree.parse(os.path.join(checkdir, 'metadata.xml')) |
61 |
+ except (ParserError, SyntaxError, EnvironmentError) as e: |
62 |
metadata_bad = True |
63 |
self.qatracker.add_error("metadata.bad", "%s/metadata.xml: %s" % (xpkg, e)) |
64 |
del e |
65 |
self.muselist = frozenset(self.musedict) |
66 |
return False |
67 |
|
68 |
- if "XML_DECLARATION" not in xml_info: |
69 |
+ xml_encoding = _metadata_xml.docinfo.encoding |
70 |
+ if xml_encoding.upper() != metadata_xml_encoding: |
71 |
self.qatracker.add_error( |
72 |
"metadata.bad", "%s/metadata.xml: " |
73 |
- "xml declaration is missing on first line, " |
74 |
- "should be '%s'" % (xpkg, metadata_xml_declaration)) |
75 |
- else: |
76 |
- xml_version, xml_encoding, xml_standalone = \ |
77 |
- xml_info["XML_DECLARATION"] |
78 |
- if xml_encoding is None or \ |
79 |
- xml_encoding.upper() != metadata_xml_encoding: |
80 |
- if xml_encoding is None: |
81 |
- encoding_problem = "but it is undefined" |
82 |
- else: |
83 |
- encoding_problem = "not '%s'" % xml_encoding |
84 |
- self.qatracker.add_error( |
85 |
- "metadata.bad", "%s/metadata.xml: " |
86 |
- "xml declaration encoding should be '%s', %s" % |
87 |
- (xpkg, metadata_xml_encoding, encoding_problem)) |
88 |
+ "xml declaration encoding should be '%s', not '%s'" % |
89 |
+ (xpkg, metadata_xml_encoding, xml_encoding)) |
90 |
|
91 |
- if "DOCTYPE" not in xml_info: |
92 |
+ if not _metadata_xml.docinfo: |
93 |
metadata_bad = True |
94 |
self.qatracker.add_error( |
95 |
"metadata.bad", |
96 |
"%s/metadata.xml: %s" % (xpkg, "DOCTYPE is missing")) |
97 |
else: |
98 |
- doctype_name, doctype_system, doctype_pubid = \ |
99 |
- xml_info["DOCTYPE"] |
100 |
+ doctype_system = _metadata_xml.docinfo.system_url |
101 |
if doctype_system != metadata_dtd_uri: |
102 |
if doctype_system is None: |
103 |
system_problem = "but it is undefined" |
104 |
@@ -194,7 +174,7 @@ class PkgMetadata(ScanBase, USEFlagChecks): |
105 |
"metadata.bad", "%s/metadata.xml: " |
106 |
"DOCTYPE: SYSTEM should refer to '%s', %s" % |
107 |
(xpkg, metadata_dtd_uri, system_problem)) |
108 |
- |
109 |
+ doctype_name = _metadata_xml.docinfo.doctype.split(' ')[1] |
110 |
if doctype_name != metadata_doctype_name: |
111 |
self.qatracker.add_error( |
112 |
"metadata.bad", "%s/metadata.xml: " |