Gentoo Archives: gentoo-commits

From: Brian Dolbec <brian.dolbec@×××××.com>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/mirrorselect:master commit in: mirrorselect/
Date: Sun, 02 Mar 2014 07:44:14
Message-Id: 1382292861.eabccfc8eeef5d0c76a444545fa390f400a9d4ed.dol-sen@gentoo
1 commit: eabccfc8eeef5d0c76a444545fa390f400a9d4ed
2 Author: Brian Dolbec <dolsen <AT> gentoo <DOT> org>
3 AuthorDate: Sun Oct 20 05:20:39 2013 +0000
4 Commit: Brian Dolbec <brian.dolbec <AT> gmail <DOT> com>
5 CommitDate: Sun Oct 20 18:14:21 2013 +0000
6 URL: http://git.overlays.gentoo.org/gitweb/?p=proj/mirrorselect.git;a=commit;h=eabccfc8
7
8 Work in progress for adding ssl support for downloading the mirrors lists.
9
10 add the request code in it's own file and class.
11
12 ---
13 mirrorselect/connections.py | 182 ++++++++++++++++++++++++++++++++++++++++++++
14 mirrorselect/extractor.py | 36 ++++-----
15 2 files changed, 200 insertions(+), 18 deletions(-)
16
17 diff --git a/mirrorselect/connections.py b/mirrorselect/connections.py
18 new file mode 100644
19 index 0000000..ca4aa88
20 --- /dev/null
21 +++ b/mirrorselect/connections.py
22 @@ -0,0 +1,182 @@
23 +#-*- coding:utf-8 -*-
24 +
25 +"""Mirrorselect 2.x
26 + Tool for selecting Gentoo source and rsync mirrors.
27 +
28 +Copyright 2005-2012 Gentoo Foundation
29 +
30 + Copyright (C) 2005 Colin Kingsley <tercel@g.o>
31 + Copyright (C) 2008 Zac Medico <zmedico@g.o>
32 + Copyright (C) 2009 Sebastian Pipping <sebastian@×××××××.org>
33 + Copyright (C) 2009 Christian Ruppert <idl0r@g.o>
34 + Copyright (C) 2012 Brian Dolbec <dolsen@g.o>
35 +
36 +Distributed under the terms of the GNU General Public License v2
37 + This program is free software; you can redistribute it and/or modify
38 + it under the terms of the GNU General Public License as published by
39 + the Free Software Foundation, version 2 of the License.
40 +
41 + This program is distributed in the hope that it will be useful,
42 + but WITHOUT ANY WARRANTY; without even the implied warranty of
43 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 + GNU General Public License for more details.
45 +
46 + You should have received a copy of the GNU General Public License
47 + along with this program; if not, write to the Free Software
48 + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
49 +
50 +"""
51 +
52 +import sys
53 +import os
54 +
55 +VERIFY_SSL = False
56 +VERIFY_MSGS = []
57 +
58 +import requests
59 +from requests.exceptions import SSLError
60 +
61 +# py3.2
62 +if sys.hexversion >= 0x30200f0:
63 + VERIFY_SSL = True
64 +else:
65 + try: # import and enable SNI support for py2
66 + from requests.packages.urllib3.contrib import pyopenssl
67 + pyopenssl.inject_into_urllib3()
68 + VERIFY_SSL = True
69 + VERIFY_MSGS = ["Successfully enabled ssl certificate verification."]
70 + except ImportError as e:
71 + VERIFY_MSGS = [
72 + "Failed to import and inject pyopenssl/SNI support into urllib3",
73 + "Disabling certificate verification",
74 + "Error was:" + e
75 + ]
76 + VERIFY_SSL = False
77 +
78 +
79 +from mirrorselect.version import version
80 +
81 +
82 +class Connector(object):
83 + """Primary connection interface using the dev-python/requests package
84 + """
85 +
86 + def __init__(self, output, proxies):
87 + self.output = output
88 + self.proxies = proxies
89 + self.headers = {'Accept-Charset': 'utf-8',
90 + 'User-Agent': 'Mirrorselect-' + version}
91 +
92 + if VERIFY_MSGS:
93 + for msg in VERIFY_MSGS:
94 + self.output.write(msg + '\n', 2)
95 +
96 +
97 + def add_timestamp(self, headers, tpath=None, timestamp=None):
98 + """for possilble future caching of the list"""
99 + if tpath and os.path.exists(tpath):
100 + # fileopen is a layman comaptibility function not yet implemented here
101 + with fileopen(tpath,'r') as previous:
102 + timestamp = previous.read()
103 + if timestamp:
104 + headers['If-Modified-Since'] = timestamp
105 + self.output.write('Current-modified: %s\n' % timestamp, 2)
106 + return headers
107 +
108 +
109 + def fetch_url(self, url, headers=None, timestamp=None):
110 + """Fetches the url
111 +
112 + @param url: string
113 + @param headers: dictionary, optional headers to use
114 + @param tpath: string, optional filepath to a timestamp file
115 + to use in the headers
116 + @param timestamp: string, optional timestamp to use in the headers
117 +
118 + """
119 +
120 + if not headers:
121 + headers = self.headers
122 +
123 + if timestamp:
124 + self.add_timestamp(headers, timestamp=timestamp)
125 +
126 + verify = 'https' in url and VERIFY_SSL
127 + self.output.write("Enabled ssl certificate verification: %s, for: %s\n"
128 + %(str(verify), url), 3)
129 +
130 + self.output.write('Connector.fetch_url(); headers = %s\n' %str(headers), 4)
131 + self.output.write('Connector.fetch_url(); connecting to opener\n', 2)
132 +
133 + try:
134 + connection = requests.get(
135 + url,
136 + headers=headers,
137 + verify=verify,
138 + proxies=self.proxies,
139 + )
140 + except SSLError as error:
141 + self.output.print_err('Connector.fetch_url(); Failed to update the '
142 + 'mirror list from: %s\nSSLError was:%s\n'
143 + % (url, str(error)))
144 + except Exception as error:
145 + self.output.print_err('Connector.fetch_url(); Failed to retrieve '
146 + 'the content from: %s\nError was: %s\n'
147 + % (url, str(error)))
148 +
149 + self.output.write('Connector.fetch_url() HEADERS = %s\n' %str(connection.headers), 4)
150 + self.output.write('Connector.fetch_url() Status_code = %i\n' % connection.status_code, 2)
151 + return connection
152 +
153 +
154 + @staticmethod
155 + def normalize_headers(headers, to_lower=True):
156 + """ py2, py3 compatibility function, since only py2 returns keys as lower()
157 + """
158 + if to_lower:
159 + return dict((x.lower(), x) for x in list(headers))
160 + return dict((x.upper(), x) for x in list(headers))
161 +
162 +
163 + def fetch_content(self, url, tpath=None):
164 + """Fetch the mirror list
165 +
166 + @param url: string of the content to fetch
167 + @param headers: dictionary, optional headers to use
168 + @param tpath: string, optional filepath to a timestamp file
169 + to use in the headers
170 + @returns (success bool, content fetched , timestamp of fetched content,
171 + content headers returned)
172 + """
173 +
174 + fheaders = self.headers
175 +
176 + if tpath:
177 + fheaders = self.add_timestamp(fheaders, tpath)
178 +
179 + connection = self.fetch_url(url, fheaders)
180 +
181 + headers = self.normalize_headers(connection.headers)
182 +
183 + if 'last-modified' in headers:
184 + timestamp = headers['last-modified']
185 + elif 'date' in headers:
186 + timestamp = headers['date']
187 + else:
188 + timestamp = None
189 +
190 + if connection.status_code in [304]:
191 + self.output.write('Content already up to date: %s\n'
192 + % url, 4)
193 + self.output.write('Last-modified: %s\n' % timestamp, 4)
194 + elif connection.status_code not in [200]:
195 + self.output.print_err('Connector.fetch_content(); HTTP Status-Code was:\n'
196 + 'url: %s\n%s'
197 + % (url, str(connection.status_code)))
198 +
199 + if connection.status_code in [200]:
200 + self.output.write('New content downloaded for: %s\n'
201 + % url, 4)
202 + return (True, connection.content, timestamp)
203 + return (False, '', '')
204 +
205
206 diff --git a/mirrorselect/extractor.py b/mirrorselect/extractor.py
207 index 3a113fb..c8d5bd5 100644
208 --- a/mirrorselect/extractor.py
209 +++ b/mirrorselect/extractor.py
210 @@ -27,20 +27,10 @@ Distributed under the terms of the GNU General Public License v2
211
212 """
213
214 -import sys
215 -
216 -if sys.version_info[0] >= 3:
217 - import urllib.request, urllib.parse, urllib.error
218 - url_parse = urllib.parse
219 - url_open = urllib.request.urlopen
220 -else:
221 - import urllib
222 - import urlparse
223 - url_parse = urlparse.urlparse
224 - url_open = urllib.urlopen
225 -
226 +import os
227
228 from mirrorselect.mirrorparser3 import MirrorParser3
229 +from mirrorselect.connections import Connector
230
231
232 class Extractor(object):
233 @@ -50,6 +40,7 @@ class Extractor(object):
234
235 def __init__(self, list_url, options, output):
236 self.output = output
237 + self.output.print_info('Using url: %s\n' % list_url)
238 filters = {}
239 for opt in ["country", "region"]:
240 value = getattr(options, opt)
241 @@ -61,6 +52,15 @@ class Extractor(object):
242 if getattr(options, opt):
243 filters["proto"] = opt
244 self.output.print_info('Limiting test to %s hosts. \n' % opt )
245 +
246 + self.proxies = {}
247 +
248 + for proxy in ['http_proxy', 'https_proxy']:
249 + if options.proxy:
250 + self.proxies[proxy.split('_')[0]] = options.proxy
251 + elif os.getenv(proxy):
252 + self.proxies[proxy.split('_')[0]] = os.getenv(proxy)
253 +
254 parser = MirrorParser3()
255 self.hosts = []
256
257 @@ -99,14 +99,13 @@ class Extractor(object):
258
259 self.output.write('getlist(): fetching ' + url + '\n', 2)
260
261 - self.output.print_info('Downloading a list of mirrors...')
262 + self.output.print_info('Downloading a list of mirrors...\n')
263
264 - try:
265 - parser.parse(url_open(url).read())
266 - except EnvironmentError:
267 - pass
268 + fetcher = Connector(self.output, self.proxies)
269 + success, mirrorlist, timestamp = fetcher.fetch_content(url)
270 + parser.parse(mirrorlist)
271
272 - if len(parser.tuples()) == 0:
273 + if (not mirrorlist) or len(parser.tuples()) == 0:
274 self.output.print_err('Could not get mirror list. '
275 'Check your internet connection.')
276
277 @@ -114,3 +113,4 @@ class Extractor(object):
278
279 return parser.tuples()
280
281 +