1 |
commit: eabccfc8eeef5d0c76a444545fa390f400a9d4ed |
2 |
Author: Brian Dolbec <dolsen <AT> gentoo <DOT> org> |
3 |
AuthorDate: Sun Oct 20 05:20:39 2013 +0000 |
4 |
Commit: Brian Dolbec <brian.dolbec <AT> gmail <DOT> com> |
5 |
CommitDate: Sun Oct 20 18:14:21 2013 +0000 |
6 |
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/mirrorselect.git;a=commit;h=eabccfc8 |
7 |
|
8 |
Work in progress for adding ssl support for downloading the mirrors lists. |
9 |
|
10 |
add the request code in it's own file and class. |
11 |
|
12 |
--- |
13 |
mirrorselect/connections.py | 182 ++++++++++++++++++++++++++++++++++++++++++++ |
14 |
mirrorselect/extractor.py | 36 ++++----- |
15 |
2 files changed, 200 insertions(+), 18 deletions(-) |
16 |
|
17 |
diff --git a/mirrorselect/connections.py b/mirrorselect/connections.py |
18 |
new file mode 100644 |
19 |
index 0000000..ca4aa88 |
20 |
--- /dev/null |
21 |
+++ b/mirrorselect/connections.py |
22 |
@@ -0,0 +1,182 @@ |
23 |
+#-*- coding:utf-8 -*- |
24 |
+ |
25 |
+"""Mirrorselect 2.x |
26 |
+ Tool for selecting Gentoo source and rsync mirrors. |
27 |
+ |
28 |
+Copyright 2005-2012 Gentoo Foundation |
29 |
+ |
30 |
+ Copyright (C) 2005 Colin Kingsley <tercel@g.o> |
31 |
+ Copyright (C) 2008 Zac Medico <zmedico@g.o> |
32 |
+ Copyright (C) 2009 Sebastian Pipping <sebastian@×××××××.org> |
33 |
+ Copyright (C) 2009 Christian Ruppert <idl0r@g.o> |
34 |
+ Copyright (C) 2012 Brian Dolbec <dolsen@g.o> |
35 |
+ |
36 |
+Distributed under the terms of the GNU General Public License v2 |
37 |
+ This program is free software; you can redistribute it and/or modify |
38 |
+ it under the terms of the GNU General Public License as published by |
39 |
+ the Free Software Foundation, version 2 of the License. |
40 |
+ |
41 |
+ This program is distributed in the hope that it will be useful, |
42 |
+ but WITHOUT ANY WARRANTY; without even the implied warranty of |
43 |
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
44 |
+ GNU General Public License for more details. |
45 |
+ |
46 |
+ You should have received a copy of the GNU General Public License |
47 |
+ along with this program; if not, write to the Free Software |
48 |
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. |
49 |
+ |
50 |
+""" |
51 |
+ |
52 |
+import sys |
53 |
+import os |
54 |
+ |
55 |
+VERIFY_SSL = False |
56 |
+VERIFY_MSGS = [] |
57 |
+ |
58 |
+import requests |
59 |
+from requests.exceptions import SSLError |
60 |
+ |
61 |
+# py3.2 |
62 |
+if sys.hexversion >= 0x30200f0: |
63 |
+ VERIFY_SSL = True |
64 |
+else: |
65 |
+ try: # import and enable SNI support for py2 |
66 |
+ from requests.packages.urllib3.contrib import pyopenssl |
67 |
+ pyopenssl.inject_into_urllib3() |
68 |
+ VERIFY_SSL = True |
69 |
+ VERIFY_MSGS = ["Successfully enabled ssl certificate verification."] |
70 |
+ except ImportError as e: |
71 |
+ VERIFY_MSGS = [ |
72 |
+ "Failed to import and inject pyopenssl/SNI support into urllib3", |
73 |
+ "Disabling certificate verification", |
74 |
+ "Error was:" + e |
75 |
+ ] |
76 |
+ VERIFY_SSL = False |
77 |
+ |
78 |
+ |
79 |
+from mirrorselect.version import version |
80 |
+ |
81 |
+ |
82 |
+class Connector(object): |
83 |
+ """Primary connection interface using the dev-python/requests package |
84 |
+ """ |
85 |
+ |
86 |
+ def __init__(self, output, proxies): |
87 |
+ self.output = output |
88 |
+ self.proxies = proxies |
89 |
+ self.headers = {'Accept-Charset': 'utf-8', |
90 |
+ 'User-Agent': 'Mirrorselect-' + version} |
91 |
+ |
92 |
+ if VERIFY_MSGS: |
93 |
+ for msg in VERIFY_MSGS: |
94 |
+ self.output.write(msg + '\n', 2) |
95 |
+ |
96 |
+ |
97 |
+ def add_timestamp(self, headers, tpath=None, timestamp=None): |
98 |
+ """for possilble future caching of the list""" |
99 |
+ if tpath and os.path.exists(tpath): |
100 |
+ # fileopen is a layman comaptibility function not yet implemented here |
101 |
+ with fileopen(tpath,'r') as previous: |
102 |
+ timestamp = previous.read() |
103 |
+ if timestamp: |
104 |
+ headers['If-Modified-Since'] = timestamp |
105 |
+ self.output.write('Current-modified: %s\n' % timestamp, 2) |
106 |
+ return headers |
107 |
+ |
108 |
+ |
109 |
+ def fetch_url(self, url, headers=None, timestamp=None): |
110 |
+ """Fetches the url |
111 |
+ |
112 |
+ @param url: string |
113 |
+ @param headers: dictionary, optional headers to use |
114 |
+ @param tpath: string, optional filepath to a timestamp file |
115 |
+ to use in the headers |
116 |
+ @param timestamp: string, optional timestamp to use in the headers |
117 |
+ |
118 |
+ """ |
119 |
+ |
120 |
+ if not headers: |
121 |
+ headers = self.headers |
122 |
+ |
123 |
+ if timestamp: |
124 |
+ self.add_timestamp(headers, timestamp=timestamp) |
125 |
+ |
126 |
+ verify = 'https' in url and VERIFY_SSL |
127 |
+ self.output.write("Enabled ssl certificate verification: %s, for: %s\n" |
128 |
+ %(str(verify), url), 3) |
129 |
+ |
130 |
+ self.output.write('Connector.fetch_url(); headers = %s\n' %str(headers), 4) |
131 |
+ self.output.write('Connector.fetch_url(); connecting to opener\n', 2) |
132 |
+ |
133 |
+ try: |
134 |
+ connection = requests.get( |
135 |
+ url, |
136 |
+ headers=headers, |
137 |
+ verify=verify, |
138 |
+ proxies=self.proxies, |
139 |
+ ) |
140 |
+ except SSLError as error: |
141 |
+ self.output.print_err('Connector.fetch_url(); Failed to update the ' |
142 |
+ 'mirror list from: %s\nSSLError was:%s\n' |
143 |
+ % (url, str(error))) |
144 |
+ except Exception as error: |
145 |
+ self.output.print_err('Connector.fetch_url(); Failed to retrieve ' |
146 |
+ 'the content from: %s\nError was: %s\n' |
147 |
+ % (url, str(error))) |
148 |
+ |
149 |
+ self.output.write('Connector.fetch_url() HEADERS = %s\n' %str(connection.headers), 4) |
150 |
+ self.output.write('Connector.fetch_url() Status_code = %i\n' % connection.status_code, 2) |
151 |
+ return connection |
152 |
+ |
153 |
+ |
154 |
+ @staticmethod |
155 |
+ def normalize_headers(headers, to_lower=True): |
156 |
+ """ py2, py3 compatibility function, since only py2 returns keys as lower() |
157 |
+ """ |
158 |
+ if to_lower: |
159 |
+ return dict((x.lower(), x) for x in list(headers)) |
160 |
+ return dict((x.upper(), x) for x in list(headers)) |
161 |
+ |
162 |
+ |
163 |
+ def fetch_content(self, url, tpath=None): |
164 |
+ """Fetch the mirror list |
165 |
+ |
166 |
+ @param url: string of the content to fetch |
167 |
+ @param headers: dictionary, optional headers to use |
168 |
+ @param tpath: string, optional filepath to a timestamp file |
169 |
+ to use in the headers |
170 |
+ @returns (success bool, content fetched , timestamp of fetched content, |
171 |
+ content headers returned) |
172 |
+ """ |
173 |
+ |
174 |
+ fheaders = self.headers |
175 |
+ |
176 |
+ if tpath: |
177 |
+ fheaders = self.add_timestamp(fheaders, tpath) |
178 |
+ |
179 |
+ connection = self.fetch_url(url, fheaders) |
180 |
+ |
181 |
+ headers = self.normalize_headers(connection.headers) |
182 |
+ |
183 |
+ if 'last-modified' in headers: |
184 |
+ timestamp = headers['last-modified'] |
185 |
+ elif 'date' in headers: |
186 |
+ timestamp = headers['date'] |
187 |
+ else: |
188 |
+ timestamp = None |
189 |
+ |
190 |
+ if connection.status_code in [304]: |
191 |
+ self.output.write('Content already up to date: %s\n' |
192 |
+ % url, 4) |
193 |
+ self.output.write('Last-modified: %s\n' % timestamp, 4) |
194 |
+ elif connection.status_code not in [200]: |
195 |
+ self.output.print_err('Connector.fetch_content(); HTTP Status-Code was:\n' |
196 |
+ 'url: %s\n%s' |
197 |
+ % (url, str(connection.status_code))) |
198 |
+ |
199 |
+ if connection.status_code in [200]: |
200 |
+ self.output.write('New content downloaded for: %s\n' |
201 |
+ % url, 4) |
202 |
+ return (True, connection.content, timestamp) |
203 |
+ return (False, '', '') |
204 |
+ |
205 |
|
206 |
diff --git a/mirrorselect/extractor.py b/mirrorselect/extractor.py |
207 |
index 3a113fb..c8d5bd5 100644 |
208 |
--- a/mirrorselect/extractor.py |
209 |
+++ b/mirrorselect/extractor.py |
210 |
@@ -27,20 +27,10 @@ Distributed under the terms of the GNU General Public License v2 |
211 |
|
212 |
""" |
213 |
|
214 |
-import sys |
215 |
- |
216 |
-if sys.version_info[0] >= 3: |
217 |
- import urllib.request, urllib.parse, urllib.error |
218 |
- url_parse = urllib.parse |
219 |
- url_open = urllib.request.urlopen |
220 |
-else: |
221 |
- import urllib |
222 |
- import urlparse |
223 |
- url_parse = urlparse.urlparse |
224 |
- url_open = urllib.urlopen |
225 |
- |
226 |
+import os |
227 |
|
228 |
from mirrorselect.mirrorparser3 import MirrorParser3 |
229 |
+from mirrorselect.connections import Connector |
230 |
|
231 |
|
232 |
class Extractor(object): |
233 |
@@ -50,6 +40,7 @@ class Extractor(object): |
234 |
|
235 |
def __init__(self, list_url, options, output): |
236 |
self.output = output |
237 |
+ self.output.print_info('Using url: %s\n' % list_url) |
238 |
filters = {} |
239 |
for opt in ["country", "region"]: |
240 |
value = getattr(options, opt) |
241 |
@@ -61,6 +52,15 @@ class Extractor(object): |
242 |
if getattr(options, opt): |
243 |
filters["proto"] = opt |
244 |
self.output.print_info('Limiting test to %s hosts. \n' % opt ) |
245 |
+ |
246 |
+ self.proxies = {} |
247 |
+ |
248 |
+ for proxy in ['http_proxy', 'https_proxy']: |
249 |
+ if options.proxy: |
250 |
+ self.proxies[proxy.split('_')[0]] = options.proxy |
251 |
+ elif os.getenv(proxy): |
252 |
+ self.proxies[proxy.split('_')[0]] = os.getenv(proxy) |
253 |
+ |
254 |
parser = MirrorParser3() |
255 |
self.hosts = [] |
256 |
|
257 |
@@ -99,14 +99,13 @@ class Extractor(object): |
258 |
|
259 |
self.output.write('getlist(): fetching ' + url + '\n', 2) |
260 |
|
261 |
- self.output.print_info('Downloading a list of mirrors...') |
262 |
+ self.output.print_info('Downloading a list of mirrors...\n') |
263 |
|
264 |
- try: |
265 |
- parser.parse(url_open(url).read()) |
266 |
- except EnvironmentError: |
267 |
- pass |
268 |
+ fetcher = Connector(self.output, self.proxies) |
269 |
+ success, mirrorlist, timestamp = fetcher.fetch_content(url) |
270 |
+ parser.parse(mirrorlist) |
271 |
|
272 |
- if len(parser.tuples()) == 0: |
273 |
+ if (not mirrorlist) or len(parser.tuples()) == 0: |
274 |
self.output.print_err('Could not get mirror list. ' |
275 |
'Check your internet connection.') |
276 |
|
277 |
@@ -114,3 +113,4 @@ class Extractor(object): |
278 |
|
279 |
return parser.tuples() |
280 |
|
281 |
+ |