Gentoo Archives: gentoo-commits

From: Zac Medico <zmedico@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/portage:master commit in: pym/portage/dbapi/, pym/portage/util/
Date: Thu, 02 Aug 2012 00:57:30
Message-Id: 1343868701.e06cb6d66db37ac7ab77acf65038b1f770c13c96.zmedico@gentoo
1 commit: e06cb6d66db37ac7ab77acf65038b1f770c13c96
2 Author: W-Mark Kubacki <wmark <AT> hurrikane <DOT> de>
3 AuthorDate: Wed Aug 1 17:49:34 2012 +0000
4 Commit: Zac Medico <zmedico <AT> gentoo <DOT> org>
5 CommitDate: Thu Aug 2 00:51:41 2012 +0000
6 URL: http://git.overlays.gentoo.org/gitweb/?p=proj/portage.git;a=commit;h=e06cb6d6
7
8 Use If-Modified-Since HTTP-header and avoid downloading a remote index if the local copy is recent enough.
9
10 ---
11 pym/portage/dbapi/bintree.py | 24 +++++++++++++++++++--
12 pym/portage/util/_urlopen.py | 45 ++++++++++++++++++++++++++++++++++++++---
13 2 files changed, 62 insertions(+), 7 deletions(-)
14
15 diff --git a/pym/portage/dbapi/bintree.py b/pym/portage/dbapi/bintree.py
16 index 9527b07..16ae8ec 100644
17 --- a/pym/portage/dbapi/bintree.py
18 +++ b/pym/portage/dbapi/bintree.py
19 @@ -54,6 +54,11 @@ if sys.hexversion >= 0x3000000:
20 else:
21 _unicode = unicode
22
23 +class UseCachedCopyOfRemoteIndex(Exception):
24 + # If the local copy is recent enough
25 + # then fetching the remote index can be skipped.
26 + pass
27 +
28 class bindbapi(fakedbapi):
29 _known_keys = frozenset(list(fakedbapi._known_keys) + \
30 ["CHOST", "repository", "USE"])
31 @@ -852,6 +857,7 @@ class binarytree(object):
32 if e.errno != errno.ENOENT:
33 raise
34 local_timestamp = pkgindex.header.get("TIMESTAMP", None)
35 + remote_timestamp = None
36 rmt_idx = self._new_pkgindex()
37 proc = None
38 tmp_filename = None
39 @@ -861,8 +867,13 @@ class binarytree(object):
40 # slash, so join manually...
41 url = base_url.rstrip("/") + "/Packages"
42 try:
43 - f = _urlopen(url)
44 - except IOError:
45 + f = _urlopen(url, if_modified_since=local_timestamp)
46 + if hasattr(f, 'headers') and f.headers.get('timestamp', ''):
47 + remote_timestamp = f.headers.get('timestamp')
48 + except IOError as err:
49 + if hasattr(err, 'code') and err.code == 304: # not modified (since local_timestamp)
50 + raise UseCachedCopyOfRemoteIndex()
51 +
52 path = parsed_url.path.rstrip("/") + "/Packages"
53
54 if parsed_url.scheme == 'sftp':
55 @@ -903,7 +914,8 @@ class binarytree(object):
56 _encodings['repo.content'], errors='replace')
57 try:
58 rmt_idx.readHeader(f_dec)
59 - remote_timestamp = rmt_idx.header.get("TIMESTAMP", None)
60 + if not remote_timestamp: # in case it had not been read from HTTP header
61 + remote_timestamp = rmt_idx.header.get("TIMESTAMP", None)
62 if not remote_timestamp:
63 # no timestamp in the header, something's wrong
64 pkgindex = None
65 @@ -931,6 +943,12 @@ class binarytree(object):
66 writemsg("\n\n!!! %s\n" % \
67 _("Timed out while closing connection to binhost"),
68 noiselevel=-1)
69 + except UseCachedCopyOfRemoteIndex:
70 + writemsg_stdout("\n")
71 + writemsg_stdout(
72 + colorize("GOOD", _("Local copy of remote index is up-to-date and will be used.")) + \
73 + "\n")
74 + rmt_idx = pkgindex
75 except EnvironmentError as e:
76 writemsg(_("\n\n!!! Error fetching binhost package" \
77 " info from '%s'\n") % _hide_url_passwd(base_url))
78
79 diff --git a/pym/portage/util/_urlopen.py b/pym/portage/util/_urlopen.py
80 index 307624b..4296188 100644
81 --- a/pym/portage/util/_urlopen.py
82 +++ b/pym/portage/util/_urlopen.py
83 @@ -2,6 +2,9 @@
84 # Distributed under the terms of the GNU General Public License v2
85
86 import sys
87 +from datetime import datetime
88 +from time import mktime
89 +from email.utils import formatdate, parsedate
90
91 try:
92 from urllib.request import urlopen as _urlopen
93 @@ -14,15 +17,39 @@ except ImportError:
94 import urllib2 as urllib_request
95 from urllib import splituser as urllib_parse_splituser
96
97 -def urlopen(url):
98 +if sys.hexversion >= 0x3000000:
99 + long = int
100 +
101 +# to account for the difference between TIMESTAMP of the index' contents
102 +# and the file-'mtime'
103 +TIMESTAMP_TOLERANCE=5
104 +
105 +def urlopen(url, if_modified_since=None):
106 + parse_result = urllib_parse.urlparse(url)
107 try:
108 - return _urlopen(url)
109 + if parse_result.scheme not in ("http", "https"):
110 + return _urlopen(url)
111 + request = urllib_request.Request(url)
112 + request.add_header('User-Agent', 'Gentoo Portage')
113 + if if_modified_since:
114 + request.add_header('If-Modified-Since', _timestamp_to_http(if_modified_since))
115 + opener = urllib_request.build_opener()
116 + hdl = opener.open(request)
117 + if hdl.headers.get('last-modified', ''):
118 + try:
119 + add_header = hdl.headers.add_header
120 + except AttributeError:
121 + # Python 2
122 + add_header = hdl.headers.addheader
123 + add_header('timestamp', _http_to_timestamp(hdl.headers.get('last-modified')))
124 + return hdl
125 except SystemExit:
126 raise
127 - except Exception:
128 + except Exception as e:
129 + if hasattr(e, 'code') and e.code == 304: # HTTPError 304: not modified
130 + raise
131 if sys.hexversion < 0x3000000:
132 raise
133 - parse_result = urllib_parse.urlparse(url)
134 if parse_result.scheme not in ("http", "https") or \
135 not parse_result.username:
136 raise
137 @@ -40,3 +67,13 @@ def _new_urlopen(url):
138 auth_handler = urllib_request.HTTPBasicAuthHandler(password_manager)
139 opener = urllib_request.build_opener(auth_handler)
140 return opener.open(url)
141 +
142 +def _timestamp_to_http(timestamp):
143 + dt = datetime.fromtimestamp(float(long(timestamp)+TIMESTAMP_TOLERANCE))
144 + stamp = mktime(dt.timetuple())
145 + return formatdate(timeval=stamp, localtime=False, usegmt=True)
146 +
147 +def _http_to_timestamp(http_datetime_string):
148 + tuple = parsedate(http_datetime_string)
149 + timestamp = mktime(tuple)
150 + return str(long(timestamp))