1 |
commit: 5b6cf172e378f6da88e9634aa4e89f2f34390659 |
2 |
Author: Zac Medico <zmedico <AT> gentoo <DOT> org> |
3 |
AuthorDate: Mon Oct 23 01:55:36 2017 +0000 |
4 |
Commit: Zac Medico <zmedico <AT> gentoo <DOT> org> |
5 |
CommitDate: Fri Oct 27 18:38:02 2017 +0000 |
6 |
URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=5b6cf172 |
7 |
|
8 |
file_copy: use sendfile return value to measure bytes copied (bug 635126) |
9 |
|
10 |
The sendfile *offset parameter refers to the input file offest, so |
11 |
it cannot be used in the same way as the copy_file_range *off_out |
12 |
parameter. Therefore, add sf_wrapper function which implements the |
13 |
*off_out behavior for sendfile. |
14 |
|
15 |
Also update cfr_wrapper so that it does not rely on the fd_in file |
16 |
offset, and remove corresponding fd_in lseek calls which are no |
17 |
longer needed. |
18 |
|
19 |
The file offset of fd_in is now completely unused, except in the |
20 |
plain read/write loop, where lseek is called prior to entering |
21 |
the loop. |
22 |
|
23 |
Bug: https://bugs.gentoo.org/635126 |
24 |
|
25 |
src/portage_util_file_copy_reflink_linux.c | 99 ++++++++++++++++++------------ |
26 |
1 file changed, 59 insertions(+), 40 deletions(-) |
27 |
|
28 |
diff --git a/src/portage_util_file_copy_reflink_linux.c b/src/portage_util_file_copy_reflink_linux.c |
29 |
index 4be9e0568..c3ce26b2b 100644 |
30 |
--- a/src/portage_util_file_copy_reflink_linux.c |
31 |
+++ b/src/portage_util_file_copy_reflink_linux.c |
32 |
@@ -56,12 +56,18 @@ initreflink_linux(void) |
33 |
|
34 |
/** |
35 |
* cfr_wrapper - A copy_file_range syscall wrapper function, having a |
36 |
- * function signature that is compatible with sendfile. |
37 |
+ * function signature that is compatible with sf_wrapper. |
38 |
* @fd_out: output file descriptor |
39 |
* @fd_in: input file descriptor |
40 |
- * @off_out: offset of the output file |
41 |
+ * @off_out: must point to a buffer that specifies the starting offset |
42 |
+ * where bytes will be copied to fd_out, and this buffer is adjusted by |
43 |
+ * the number of bytes copied. |
44 |
* @len: number of bytes to copy between the file descriptors |
45 |
* |
46 |
+ * Bytes are copied from fd_in starting from *off_out, and the file |
47 |
+ * offset of fd_in is not changed. Effects on the file offset of |
48 |
+ * fd_out are undefined. |
49 |
+ * |
50 |
* Return: Number of bytes written to out_fd on success, -1 on failure |
51 |
* (errno is set appropriately). |
52 |
*/ |
53 |
@@ -69,7 +75,8 @@ static ssize_t |
54 |
cfr_wrapper(int fd_out, int fd_in, off_t *off_out, size_t len) |
55 |
{ |
56 |
#ifdef __NR_copy_file_range |
57 |
- return syscall(__NR_copy_file_range, fd_in, NULL, fd_out, |
58 |
+ off_t off_in = *off_out; |
59 |
+ return syscall(__NR_copy_file_range, fd_in, &off_in, fd_out, |
60 |
off_out, len, 0); |
61 |
#else |
62 |
/* This is how it fails at runtime when the syscall is not supported. */ |
63 |
@@ -79,18 +86,50 @@ cfr_wrapper(int fd_out, int fd_in, off_t *off_out, size_t len) |
64 |
} |
65 |
|
66 |
/** |
67 |
+ * sf_wrapper - A sendfile wrapper function, having a function signature |
68 |
+ * that is compatible with cfr_wrapper. |
69 |
+ * @fd_out: output file descriptor |
70 |
+ * @fd_in: input file descriptor |
71 |
+ * @off_out: must point to a buffer that specifies the starting offset |
72 |
+ * where bytes will be copied to fd_out, and this buffer is adjusted by |
73 |
+ * the number of bytes copied. |
74 |
+ * @len: number of bytes to copy between the file descriptors |
75 |
+ * |
76 |
+ * Bytes are copied from fd_in starting from *off_out, and the file |
77 |
+ * offset of fd_in is not changed. Effects on the file offset of |
78 |
+ * fd_out are undefined. |
79 |
+ * |
80 |
+ * Return: Number of bytes written to out_fd on success, -1 on failure |
81 |
+ * (errno is set appropriately). |
82 |
+ */ |
83 |
+static ssize_t |
84 |
+sf_wrapper(int fd_out, int fd_in, off_t *off_out, size_t len) |
85 |
+{ |
86 |
+ ssize_t ret; |
87 |
+ off_t off_in = *off_out; |
88 |
+ /* The sendfile docs do not specify behavior of the output file |
89 |
+ * offset, therefore it must be adjusted with lseek. |
90 |
+ */ |
91 |
+ if (lseek(fd_out, *off_out, SEEK_SET) < 0) |
92 |
+ return -1; |
93 |
+ ret = sendfile(fd_out, fd_in, &off_in, len); |
94 |
+ if (ret > 0) |
95 |
+ *off_out += ret; |
96 |
+ return ret; |
97 |
+} |
98 |
+ |
99 |
+ |
100 |
+/** |
101 |
* do_lseek_data - Adjust file offsets to the next location containing |
102 |
* data, creating sparse empty blocks in the output file as needed. |
103 |
* @fd_in: input file descriptor |
104 |
* @fd_out: output file descriptor |
105 |
* @off_out: offset of the output file |
106 |
* |
107 |
- * Use lseek SEEK_DATA to adjust the fd_in file offset to the next |
108 |
- * location containing data, and adjust the fd_in file offset and |
109 |
- * off_out to the same location (creating sparse empty blocks as |
110 |
- * needed). On success, both fd_in and fd_out file offsets are |
111 |
- * guaranteed to be exactly equal to the value that off_out points to. |
112 |
- * |
113 |
+ * Use lseek SEEK_DATA to adjust off_out to the next location from fd_in |
114 |
+ * containing data (creates sparse empty blocks when appropriate). Effects |
115 |
+ * on file offsets are undefined. |
116 |
+ * |
117 |
* Return: On success, the number of bytes to copy before the next hole, |
118 |
* and -1 on failure (errno is set appropriately). Returns 0 when fd_in |
119 |
* reaches EOF. |
120 |
@@ -145,13 +184,6 @@ do_lseek_data(int fd_out, int fd_in, off_t *off_out) { |
121 |
return -1; |
122 |
} |
123 |
|
124 |
- /* Revert SEEK_HOLE offset change, since we're going |
125 |
- * to copy the data that comes before the hole. |
126 |
- */ |
127 |
- if (lseek(fd_in, offset_data, SEEK_SET) < 0) { |
128 |
- return -1; |
129 |
- } |
130 |
- |
131 |
return offset_hole - offset_data; |
132 |
#else |
133 |
/* This is how it fails at runtime when lseek SEEK_DATA is not supported. */ |
134 |
@@ -232,10 +264,6 @@ _reflink_linux_file_copy(PyObject *self, PyObject *args) |
135 |
break; |
136 |
} |
137 |
|
138 |
- /* For the copyfunc call, the fd_in file offset must be |
139 |
- * exactly equal to offset_out. The above do_lseek_data |
140 |
- * function guarantees correct state. |
141 |
- */ |
142 |
copyfunc_ret = copyfunc(fd_out, |
143 |
fd_in, |
144 |
&offset_out, |
145 |
@@ -250,7 +278,7 @@ _reflink_linux_file_copy(PyObject *self, PyObject *args) |
146 |
* syscall is not available (less than Linux 4.5). |
147 |
*/ |
148 |
error = 0; |
149 |
- copyfunc = sendfile; |
150 |
+ copyfunc = sf_wrapper; |
151 |
copyfunc_ret = copyfunc(fd_out, |
152 |
fd_in, |
153 |
&offset_out, |
154 |
@@ -284,27 +312,18 @@ _reflink_linux_file_copy(PyObject *self, PyObject *args) |
155 |
} else { |
156 |
stat_in_acquired = 1; |
157 |
|
158 |
- /* For the sendfile call, the fd_in file offset must be |
159 |
- * exactly equal to offset_out. Use lseek to ensure |
160 |
- * correct state, in case an EINTR retry caused it to |
161 |
- * get out of sync somewhow. |
162 |
- */ |
163 |
- if (lseek(fd_in, offset_out, SEEK_SET) < 0) { |
164 |
- error = errno; |
165 |
- } else { |
166 |
- while (offset_out < stat_in.st_size) { |
167 |
- copyfunc_ret = sendfile(fd_out, |
168 |
- fd_in, |
169 |
- &offset_out, |
170 |
- stat_in.st_size - offset_out); |
171 |
+ while (offset_out < stat_in.st_size) { |
172 |
+ copyfunc_ret = sf_wrapper(fd_out, |
173 |
+ fd_in, |
174 |
+ &offset_out, |
175 |
+ stat_in.st_size - offset_out); |
176 |
|
177 |
- if (copyfunc_ret < 0) { |
178 |
- error = errno; |
179 |
- if (errno == EINVAL && !offset_out) { |
180 |
- sendfile_works = 0; |
181 |
- } |
182 |
- break; |
183 |
+ if (copyfunc_ret < 0) { |
184 |
+ error = errno; |
185 |
+ if (errno == EINVAL && !offset_out) { |
186 |
+ sendfile_works = 0; |
187 |
} |
188 |
+ break; |
189 |
} |
190 |
} |
191 |
} |