1 |
commit: e06cb6d66db37ac7ab77acf65038b1f770c13c96 |
2 |
Author: W-Mark Kubacki <wmark <AT> hurrikane <DOT> de> |
3 |
AuthorDate: Wed Aug 1 17:49:34 2012 +0000 |
4 |
Commit: Zac Medico <zmedico <AT> gentoo <DOT> org> |
5 |
CommitDate: Thu Aug 2 00:51:41 2012 +0000 |
6 |
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/portage.git;a=commit;h=e06cb6d6 |
7 |
|
8 |
Use If-Modified-Since HTTP-header and avoid downloading a remote index if the local copy is recent enough. |
9 |
|
10 |
--- |
11 |
pym/portage/dbapi/bintree.py | 24 +++++++++++++++++++-- |
12 |
pym/portage/util/_urlopen.py | 45 ++++++++++++++++++++++++++++++++++++++--- |
13 |
2 files changed, 62 insertions(+), 7 deletions(-) |
14 |
|
15 |
diff --git a/pym/portage/dbapi/bintree.py b/pym/portage/dbapi/bintree.py |
16 |
index 9527b07..16ae8ec 100644 |
17 |
--- a/pym/portage/dbapi/bintree.py |
18 |
+++ b/pym/portage/dbapi/bintree.py |
19 |
@@ -54,6 +54,11 @@ if sys.hexversion >= 0x3000000: |
20 |
else: |
21 |
_unicode = unicode |
22 |
|
23 |
+class UseCachedCopyOfRemoteIndex(Exception): |
24 |
+ # If the local copy is recent enough |
25 |
+ # then fetching the remote index can be skipped. |
26 |
+ pass |
27 |
+ |
28 |
class bindbapi(fakedbapi): |
29 |
_known_keys = frozenset(list(fakedbapi._known_keys) + \ |
30 |
["CHOST", "repository", "USE"]) |
31 |
@@ -852,6 +857,7 @@ class binarytree(object): |
32 |
if e.errno != errno.ENOENT: |
33 |
raise |
34 |
local_timestamp = pkgindex.header.get("TIMESTAMP", None) |
35 |
+ remote_timestamp = None |
36 |
rmt_idx = self._new_pkgindex() |
37 |
proc = None |
38 |
tmp_filename = None |
39 |
@@ -861,8 +867,13 @@ class binarytree(object): |
40 |
# slash, so join manually... |
41 |
url = base_url.rstrip("/") + "/Packages" |
42 |
try: |
43 |
- f = _urlopen(url) |
44 |
- except IOError: |
45 |
+ f = _urlopen(url, if_modified_since=local_timestamp) |
46 |
+ if hasattr(f, 'headers') and f.headers.get('timestamp', ''): |
47 |
+ remote_timestamp = f.headers.get('timestamp') |
48 |
+ except IOError as err: |
49 |
+ if hasattr(err, 'code') and err.code == 304: # not modified (since local_timestamp) |
50 |
+ raise UseCachedCopyOfRemoteIndex() |
51 |
+ |
52 |
path = parsed_url.path.rstrip("/") + "/Packages" |
53 |
|
54 |
if parsed_url.scheme == 'sftp': |
55 |
@@ -903,7 +914,8 @@ class binarytree(object): |
56 |
_encodings['repo.content'], errors='replace') |
57 |
try: |
58 |
rmt_idx.readHeader(f_dec) |
59 |
- remote_timestamp = rmt_idx.header.get("TIMESTAMP", None) |
60 |
+ if not remote_timestamp: # in case it had not been read from HTTP header |
61 |
+ remote_timestamp = rmt_idx.header.get("TIMESTAMP", None) |
62 |
if not remote_timestamp: |
63 |
# no timestamp in the header, something's wrong |
64 |
pkgindex = None |
65 |
@@ -931,6 +943,12 @@ class binarytree(object): |
66 |
writemsg("\n\n!!! %s\n" % \ |
67 |
_("Timed out while closing connection to binhost"), |
68 |
noiselevel=-1) |
69 |
+ except UseCachedCopyOfRemoteIndex: |
70 |
+ writemsg_stdout("\n") |
71 |
+ writemsg_stdout( |
72 |
+ colorize("GOOD", _("Local copy of remote index is up-to-date and will be used.")) + \ |
73 |
+ "\n") |
74 |
+ rmt_idx = pkgindex |
75 |
except EnvironmentError as e: |
76 |
writemsg(_("\n\n!!! Error fetching binhost package" \ |
77 |
" info from '%s'\n") % _hide_url_passwd(base_url)) |
78 |
|
79 |
diff --git a/pym/portage/util/_urlopen.py b/pym/portage/util/_urlopen.py |
80 |
index 307624b..4296188 100644 |
81 |
--- a/pym/portage/util/_urlopen.py |
82 |
+++ b/pym/portage/util/_urlopen.py |
83 |
@@ -2,6 +2,9 @@ |
84 |
# Distributed under the terms of the GNU General Public License v2 |
85 |
|
86 |
import sys |
87 |
+from datetime import datetime |
88 |
+from time import mktime |
89 |
+from email.utils import formatdate, parsedate |
90 |
|
91 |
try: |
92 |
from urllib.request import urlopen as _urlopen |
93 |
@@ -14,15 +17,39 @@ except ImportError: |
94 |
import urllib2 as urllib_request |
95 |
from urllib import splituser as urllib_parse_splituser |
96 |
|
97 |
-def urlopen(url): |
98 |
+if sys.hexversion >= 0x3000000: |
99 |
+ long = int |
100 |
+ |
101 |
+# to account for the difference between TIMESTAMP of the index' contents |
102 |
+# and the file-'mtime' |
103 |
+TIMESTAMP_TOLERANCE=5 |
104 |
+ |
105 |
+def urlopen(url, if_modified_since=None): |
106 |
+ parse_result = urllib_parse.urlparse(url) |
107 |
try: |
108 |
- return _urlopen(url) |
109 |
+ if parse_result.scheme not in ("http", "https"): |
110 |
+ return _urlopen(url) |
111 |
+ request = urllib_request.Request(url) |
112 |
+ request.add_header('User-Agent', 'Gentoo Portage') |
113 |
+ if if_modified_since: |
114 |
+ request.add_header('If-Modified-Since', _timestamp_to_http(if_modified_since)) |
115 |
+ opener = urllib_request.build_opener() |
116 |
+ hdl = opener.open(request) |
117 |
+ if hdl.headers.get('last-modified', ''): |
118 |
+ try: |
119 |
+ add_header = hdl.headers.add_header |
120 |
+ except AttributeError: |
121 |
+ # Python 2 |
122 |
+ add_header = hdl.headers.addheader |
123 |
+ add_header('timestamp', _http_to_timestamp(hdl.headers.get('last-modified'))) |
124 |
+ return hdl |
125 |
except SystemExit: |
126 |
raise |
127 |
- except Exception: |
128 |
+ except Exception as e: |
129 |
+ if hasattr(e, 'code') and e.code == 304: # HTTPError 304: not modified |
130 |
+ raise |
131 |
if sys.hexversion < 0x3000000: |
132 |
raise |
133 |
- parse_result = urllib_parse.urlparse(url) |
134 |
if parse_result.scheme not in ("http", "https") or \ |
135 |
not parse_result.username: |
136 |
raise |
137 |
@@ -40,3 +67,13 @@ def _new_urlopen(url): |
138 |
auth_handler = urllib_request.HTTPBasicAuthHandler(password_manager) |
139 |
opener = urllib_request.build_opener(auth_handler) |
140 |
return opener.open(url) |
141 |
+ |
142 |
+def _timestamp_to_http(timestamp): |
143 |
+ dt = datetime.fromtimestamp(float(long(timestamp)+TIMESTAMP_TOLERANCE)) |
144 |
+ stamp = mktime(dt.timetuple()) |
145 |
+ return formatdate(timeval=stamp, localtime=False, usegmt=True) |
146 |
+ |
147 |
+def _http_to_timestamp(http_datetime_string): |
148 |
+ tuple = parsedate(http_datetime_string) |
149 |
+ timestamp = mktime(tuple) |
150 |
+ return str(long(timestamp)) |