1 |
commit: dac532df96cb16626f4f1656b5aa2f82b8383c8d |
2 |
Author: Mart Raudsepp <leio <AT> gentoo <DOT> org> |
3 |
AuthorDate: Sun Dec 4 07:59:39 2016 +0000 |
4 |
Commit: Mart Raudsepp <leio <AT> gentoo <DOT> org> |
5 |
CommitDate: Sun Dec 4 07:59:39 2016 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=dac532df |
7 |
|
8 |
sync: Fix UTF-8 handling for projects.xml import |
9 |
|
10 |
Need to feed response.content bytestring into ElementTree, not response.text. |
11 |
With the latter ET seems to figure it's already decoded and goes all latin-1 on us. |
12 |
From response.content bytestream it notices the UTF-8 encoding XML markup and does |
13 |
things right. |
14 |
|
15 |
Diagnosed-by: Doug Freed <dwfreed <AT> mtu.edu> |
16 |
|
17 |
backend/lib/sync.py | 2 +- |
18 |
1 file changed, 1 insertion(+), 1 deletion(-) |
19 |
|
20 |
diff --git a/backend/lib/sync.py b/backend/lib/sync.py |
21 |
index 4894315..22419bf 100644 |
22 |
--- a/backend/lib/sync.py |
23 |
+++ b/backend/lib/sync.py |
24 |
@@ -13,7 +13,7 @@ def get_project_data(): |
25 |
if not data: |
26 |
print("Failed retrieving projects.xml") |
27 |
return |
28 |
- root = ET.fromstring(data.text) |
29 |
+ root = ET.fromstring(data.content) |
30 |
projects = {} |
31 |
# Parsing is based on http://www.gentoo.org/dtd/projects.dtd as of 2016-11-10 |
32 |
if root.tag.lower() != 'projects': |