1 |
commit: df4ddb601efbef157147fcfd6057afd01636acab |
2 |
Author: Mart Raudsepp <leio <AT> gentoo <DOT> org> |
3 |
AuthorDate: Sun Dec 4 05:26:10 2016 +0000 |
4 |
Commit: Mart Raudsepp <leio <AT> gentoo <DOT> org> |
5 |
CommitDate: Sun Dec 4 05:26:10 2016 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=df4ddb60 |
7 |
|
8 |
sync: Initial projects syncing to DB without members |
9 |
|
10 |
backend/lib/sync.py | 35 ++++++++++++++++++++++++++++------- |
11 |
1 file changed, 28 insertions(+), 7 deletions(-) |
12 |
|
13 |
diff --git a/backend/lib/sync.py b/backend/lib/sync.py |
14 |
index fbc653a..6ed8e01 100644 |
15 |
--- a/backend/lib/sync.py |
16 |
+++ b/backend/lib/sync.py |
17 |
@@ -2,19 +2,19 @@ import xml.etree.ElementTree as ET |
18 |
from flask import json |
19 |
import requests |
20 |
from .. import app, db |
21 |
-from .models import Category, Package, PackageVersion |
22 |
+from .models import Category, Maintainer, Package, PackageVersion |
23 |
|
24 |
proj_url = "https://api.gentoo.org/metastructure/projects.xml" |
25 |
pkg_url_base = "https://packages.gentoo.org/" |
26 |
http_session = requests.session() |
27 |
|
28 |
-def sync_projects(): |
29 |
+def get_project_data(): |
30 |
data = http_session.get(proj_url) |
31 |
if not data: |
32 |
print("Failed retrieving projects.xml") |
33 |
return |
34 |
root = ET.fromstring(data.text) |
35 |
- projects = [] |
36 |
+ projects = {} |
37 |
# Parsing is based on http://www.gentoo.org/dtd/projects.dtd as of 2016-11-10 |
38 |
if root.tag.lower() != 'projects': |
39 |
print("Downloaded projects.xml root tag isn't 'projects'") |
40 |
@@ -53,12 +53,33 @@ def sync_projects(): |
41 |
else: |
42 |
print("Skipping unknown <project> subtag <%s>" % tag) |
43 |
if 'email' in proj: |
44 |
- projects.append(proj) |
45 |
+ projects[proj['email']] = proj |
46 |
else: |
47 |
print("Skipping incomplete project data due to lack of required email identifier: %s" % (proj,)) |
48 |
- from pprint import pprint |
49 |
- print("Found the following projects and data:") |
50 |
- pprint(projects) |
51 |
+ return projects |
52 |
+ |
53 |
+def sync_projects(): |
54 |
+ projects = get_project_data() |
55 |
+ existing_maintainers = {} |
56 |
+ # TODO: Use UPSERT instead (on_conflict_do_update) if we can rely on postgresql:9.5 |
57 |
+ for maintainer in Maintainer.query.all(): |
58 |
+ existing_maintainers[maintainer.email] = maintainer |
59 |
+ for email, data in projects.items(): |
60 |
+ if email in existing_maintainers: |
61 |
+ print ("Updating project %s" % email) |
62 |
+ existing_maintainers[email].is_project = True |
63 |
+ if 'description' in data: |
64 |
+ existing_maintainers[email].description = data['description'] |
65 |
+ if 'name' in data: |
66 |
+ existing_maintainers[email].name = data['name'] |
67 |
+ if 'url' in data: |
68 |
+ existing_maintainers[email].url = data['url'] |
69 |
+ else: |
70 |
+ print ("Adding project %s" % email) |
71 |
+ new_maintainer = Maintainer(email=data['email'], is_project=True, description=data['description'], name=data['name'], url=data['url']) |
72 |
+ db.session.add(new_maintainer) |
73 |
+ db.session.commit() |
74 |
+ |
75 |
|
76 |
def sync_categories(): |
77 |
url = pkg_url_base + "categories.json" |