1 |
On Sun, May 31, 2020 at 3:36 PM Zac Medico <zmedico@g.o> wrote: |
2 |
> |
3 |
> On 5/31/20 12:21 PM, Mike Gilbert wrote: |
4 |
> > On Sun, May 31, 2020 at 2:01 PM Zac Medico <zmedico@g.o> wrote: |
5 |
> >> |
6 |
> >> On 5/31/20 6:17 AM, Mike Gilbert wrote: |
7 |
> >>> Unquote the URL basename when fetching from upstream. |
8 |
> >>> Quote the filename when fetching from mirrors. |
9 |
> >>> |
10 |
> >>> Bug: https://bugs.gentoo.org/719810 |
11 |
> >>> Signed-off-by: Mike Gilbert <floppym@g.o> |
12 |
> >>> --- |
13 |
> >>> lib/portage/dbapi/porttree.py | 7 ++++++- |
14 |
> >>> lib/portage/package/ebuild/fetch.py | 9 +++++++-- |
15 |
> >>> 2 files changed, 13 insertions(+), 3 deletions(-) |
16 |
> >>> |
17 |
> >>> diff --git a/lib/portage/dbapi/porttree.py b/lib/portage/dbapi/porttree.py |
18 |
> >>> index 08af17bcd..984263039 100644 |
19 |
> >>> --- a/lib/portage/dbapi/porttree.py |
20 |
> >>> +++ b/lib/portage/dbapi/porttree.py |
21 |
> >>> @@ -55,6 +55,11 @@ try: |
22 |
> >>> except ImportError: |
23 |
> >>> from urlparse import urlparse |
24 |
> >>> |
25 |
> >>> +try: |
26 |
> >>> + from urllib.parse import unquote as urlunquote |
27 |
> >>> +except ImportError: |
28 |
> >>> + from urllib import unquote as urlunquote |
29 |
> >>> + |
30 |
> >>> if sys.hexversion >= 0x3000000: |
31 |
> >>> # pylint: disable=W0622 |
32 |
> >>> basestring = str |
33 |
> >>> @@ -1547,7 +1552,7 @@ def _parse_uri_map(cpv, metadata, use=None): |
34 |
> >>> myuris.pop() |
35 |
> >>> distfile = myuris.pop() |
36 |
> >>> else: |
37 |
> >>> - distfile = os.path.basename(uri) |
38 |
> >>> + distfile = urlunquote(os.path.basename(uri)) |
39 |
> >>> if not distfile: |
40 |
> >>> raise portage.exception.InvalidDependString( |
41 |
> >>> ("getFetchMap(): '%s' SRC_URI has no file " + \ |
42 |
> >>> diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py |
43 |
> >>> index 28e7caf53..47c3ad28f 100644 |
44 |
> >>> --- a/lib/portage/package/ebuild/fetch.py |
45 |
> >>> +++ b/lib/portage/package/ebuild/fetch.py |
46 |
> >>> @@ -26,6 +26,11 @@ try: |
47 |
> >>> except ImportError: |
48 |
> >>> from urlparse import urlparse |
49 |
> >>> |
50 |
> >>> +try: |
51 |
> >>> + from urllib.parse import quote as urlquote |
52 |
> >>> +except ImportError: |
53 |
> >>> + from urllib import quote as urlquote |
54 |
> >>> + |
55 |
> >>> import portage |
56 |
> >>> portage.proxy.lazyimport.lazyimport(globals(), |
57 |
> >>> 'portage.package.ebuild.config:check_config_instance,config', |
58 |
> >>> @@ -351,7 +356,7 @@ _size_suffix_map = { |
59 |
> >>> |
60 |
> >>> class FlatLayout(object): |
61 |
> >>> def get_path(self, filename): |
62 |
> >>> - return filename |
63 |
> >>> + return urlquote(filename) |
64 |
> >>> |
65 |
> >>> def get_filenames(self, distdir): |
66 |
> >>> for dirpath, dirnames, filenames in os.walk(distdir, |
67 |
> >>> @@ -382,7 +387,7 @@ class FilenameHashLayout(object): |
68 |
> >>> c = c // 4 |
69 |
> >>> ret += fnhash[:c] + '/' |
70 |
> >>> fnhash = fnhash[c:] |
71 |
> >>> - return ret + filename |
72 |
> >>> + return ret + urlquote(filename) |
73 |
> >>> |
74 |
> >>> def get_filenames(self, distdir): |
75 |
> >>> pattern = '' |
76 |
> >>> |
77 |
> >> |
78 |
> >> We've also got these other basename calls in fetch.py: |
79 |
> >> |
80 |
> >>> diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py |
81 |
> >>> index 28e7caf53..56b375d58 100644 |
82 |
> >>> --- a/lib/portage/package/ebuild/fetch.py |
83 |
> >>> +++ b/lib/portage/package/ebuild/fetch.py |
84 |
> >>> @@ -730,9 +730,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0, |
85 |
> >>> else: |
86 |
> >>> for myuri in myuris: |
87 |
> >>> if urlparse(myuri).scheme: |
88 |
> >>> - file_uri_tuples.append((os.path.basename(myuri), myuri)) |
89 |
> >>> + file_uri_tuples.append((urlunquote(os.path.basename(myuri)), myuri)) |
90 |
> >>> else: |
91 |
> >>> - file_uri_tuples.append((os.path.basename(myuri), None)) |
92 |
> >>> + file_uri_tuples.append((urlunquote(os.path.basename(myuri)), None)) |
93 |
> >> |
94 |
> >> The case with no scheme is not a URI, so we need to decide whether |
95 |
> >> or not to unquote, and we should make _parse_uri_map behavior |
96 |
> >> consistent for this case. |
97 |
> > |
98 |
> > Backing up, I think unquoting the basename is really a separate issue |
99 |
> > from quoting the filename in Layout.get_path(). The latter fixes a |
100 |
> > known bug when fetching from mirrors, whereas the former seems like a |
101 |
> > cosmetic issue. I don't think we should mix these two up into the same |
102 |
> > commit. |
103 |
> > |
104 |
> > Could I get your approval to push my original patch (Escape |
105 |
> > percent-signs in filename when fetching from mirrors)? |
106 |
> |
107 |
> Separate patches are fine, but both patches really should be merged at |
108 |
> the same time since the quoting and unquoting are inherently coupled. |
109 |
|
110 |
I think that they are not actually coupled at all. It's two sets of |
111 |
code used in different contexts. |
112 |
|
113 |
My original patch could and should be pushed independently of any |
114 |
subsequent changes. |
115 |
|
116 |
The unquoting code in this subsequent discussion only affects how a |
117 |
URL is translated to a filename on disk when fetching from upstream |
118 |
and generating the Manifest. Ebuild developers can always override |
119 |
this translation with an arrow operator in SRC_URI, as happens in |
120 |
go-module.eclass. |
121 |
|
122 |
The quoting code deals with escaping whatever on-disk filename portage |
123 |
happens to be using (based on the URL basename or the right-hand side |
124 |
of the SRC_URI arrow) so that it can be fetched from a mirror. It |
125 |
doesn't matter if that disk filename has some ugly percent signs or |
126 |
not, so long as we can send an appropriate request to the mirror web |
127 |
server. |
128 |
|
129 |
If you have a counter-example where things will fail with my original |
130 |
patch, please elaborate. |