1 |
W dniu 02.03.2017, czw o godzinie 17∶09 -0800, użytkownik Zac Medico |
2 |
napisał: |
3 |
> Perform in-kernel file copying when possible, and also support |
4 |
> reflinks and sparse files. If the optimized implementation |
5 |
> fails at runtime, gracefully fallback to a plain read/write |
6 |
> loop. |
7 |
> |
8 |
> Compile-time and run-time fallbacks are implemented, so that |
9 |
> any incompatiblities will be handled gracefully. For example, |
10 |
> if the code is compiled on a system that supports the |
11 |
> copy_file_range syscall, but at run-time an older kernel that |
12 |
> does not support this syscall is detected, it will be handled |
13 |
> gracefully. There are similar fallbacks for lack of lseek |
14 |
> SEEK_DATA and sendfile support. |
15 |
> |
16 |
> X-Gentoo-Bug: 607868 |
17 |
> X-Gentoo-Bug-Url: https://bugs.gentoo.org/show_bug.cgi?id=607868 |
18 |
> --- |
19 |
> [PATCH v2] adds a native fallback that will work on any kernel, |
20 |
> using a plain read/write loop. |
21 |
> |
22 |
> pym/portage/tests/util/file_copy/__init__.py | 0 |
23 |
> pym/portage/tests/util/file_copy/__test__.py | 0 |
24 |
> pym/portage/tests/util/file_copy/test_copyfile.py | 68 +++++ |
25 |
> pym/portage/util/file_copy/__init__.py | 45 ++++ |
26 |
> pym/portage/util/movefile.py | 4 +- |
27 |
> setup.py | 9 + |
28 |
> src/portage_util_file_copy_reflink_linux.c | 298 ++++++++++++++++++++++ |
29 |
> 7 files changed, 423 insertions(+), 1 deletion(-) |
30 |
> create mode 100644 pym/portage/tests/util/file_copy/__init__.py |
31 |
> create mode 100644 pym/portage/tests/util/file_copy/__test__.py |
32 |
> create mode 100644 pym/portage/tests/util/file_copy/test_copyfile.py |
33 |
> create mode 100644 pym/portage/util/file_copy/__init__.py |
34 |
> create mode 100644 src/portage_util_file_copy_reflink_linux.c |
35 |
> |
36 |
> diff --git a/pym/portage/tests/util/file_copy/__init__.py b/pym/portage/tests/util/file_copy/__init__.py |
37 |
> new file mode 100644 |
38 |
> index 0000000..e69de29 |
39 |
> diff --git a/pym/portage/tests/util/file_copy/__test__.py b/pym/portage/tests/util/file_copy/__test__.py |
40 |
> new file mode 100644 |
41 |
> index 0000000..e69de29 |
42 |
> diff --git a/pym/portage/tests/util/file_copy/test_copyfile.py b/pym/portage/tests/util/file_copy/test_copyfile.py |
43 |
> new file mode 100644 |
44 |
> index 0000000..94fb4d7 |
45 |
> --- /dev/null |
46 |
> +++ b/pym/portage/tests/util/file_copy/test_copyfile.py |
47 |
> @@ -0,0 +1,68 @@ |
48 |
> +# Copyright 2017 Gentoo Foundation |
49 |
> +# Distributed under the terms of the GNU General Public License v2 |
50 |
> + |
51 |
> +import shutil |
52 |
> +import tempfile |
53 |
> + |
54 |
> +from portage import os |
55 |
> +from portage.tests import TestCase |
56 |
> +from portage.checksum import perform_md5 |
57 |
> +from portage.util.file_copy import copyfile |
58 |
> + |
59 |
> + |
60 |
> +class CopyFileTestCase(TestCase): |
61 |
> + |
62 |
> + def testCopyFile(self): |
63 |
> + |
64 |
> + tempdir = tempfile.mkdtemp() |
65 |
> + try: |
66 |
> + src_path = os.path.join(tempdir, 'src') |
67 |
> + dest_path = os.path.join(tempdir, 'dest') |
68 |
> + content = b'foo' |
69 |
> + |
70 |
> + with open(src_path, 'wb') as f: |
71 |
> + f.write(content) |
72 |
> + |
73 |
> + copyfile(src_path, dest_path) |
74 |
> + |
75 |
> + self.assertEqual(perform_md5(src_path), perform_md5(dest_path)) |
76 |
> + finally: |
77 |
> + shutil.rmtree(tempdir) |
78 |
> + |
79 |
> + |
80 |
> +class CopyFileSparseTestCase(TestCase): |
81 |
> + |
82 |
> + def testCopyFileSparse(self): |
83 |
> + |
84 |
> + tempdir = tempfile.mkdtemp() |
85 |
> + try: |
86 |
> + src_path = os.path.join(tempdir, 'src') |
87 |
> + dest_path = os.path.join(tempdir, 'dest') |
88 |
> + content = b'foo' |
89 |
> + |
90 |
> + # Use seek to create some sparse blocks. Don't make these |
91 |
> + # files too big, in case the filesystem doesn't support |
92 |
> + # sparse files. |
93 |
> + with open(src_path, 'wb') as f: |
94 |
> + f.write(content) |
95 |
> + f.seek(2**18, 1) |
96 |
> + f.write(content) |
97 |
> + f.seek(2**19, 1) |
98 |
> + f.write(content) |
99 |
> + |
100 |
> + copyfile(src_path, dest_path) |
101 |
> + |
102 |
> + self.assertEqual(perform_md5(src_path), perform_md5(dest_path)) |
103 |
> + |
104 |
> + # This last part of the test is expected to fail when sparse |
105 |
> + # copy is not implemented, so set the todo flag in order |
106 |
> + # to tolerate failures. |
107 |
> + self.todo = True |
108 |
> + |
109 |
> + # If sparse blocks were preserved, then both files should |
110 |
> + # consume the same number of blocks. |
111 |
> + self.assertEqual( |
112 |
> + os.stat(src_path).st_blocks, |
113 |
> + os.stat(dest_path).st_blocks) |
114 |
> + finally: |
115 |
> + shutil.rmtree(tempdir) |
116 |
> diff --git a/pym/portage/util/file_copy/__init__.py b/pym/portage/util/file_copy/__init__.py |
117 |
> new file mode 100644 |
118 |
> index 0000000..34c46ad |
119 |
> --- /dev/null |
120 |
> +++ b/pym/portage/util/file_copy/__init__.py |
121 |
> @@ -0,0 +1,45 @@ |
122 |
> +# Copyright 2017 Gentoo Foundation |
123 |
> +# Distributed under the terms of the GNU General Public License v2 |
124 |
> + |
125 |
> +import os |
126 |
> +import shutil |
127 |
> +import tempfile |
128 |
> + |
129 |
> +try: |
130 |
> + from portage.util.file_copy.reflink_linux import file_copy as _file_copy |
131 |
> +except ImportError: |
132 |
> + _file_copy = None |
133 |
> + |
134 |
> + |
135 |
> +_copyfile = None |
136 |
> + |
137 |
> + |
138 |
> +def _optimized_copyfile(src, dst): |
139 |
> + with open(src, 'rb', buffering=0) as src_file, \ |
140 |
> + open(dst, 'wb', buffering=0) as dst_file: |
141 |
> + _file_copy(src_file.fileno(), dst_file.fileno()) |
142 |
> + |
143 |
> + |
144 |
> +def copyfile(src, dst): |
145 |
> + """ |
146 |
> + Copy the contents (no metadata) of the file named src to a file |
147 |
> + named dst. |
148 |
> + |
149 |
> + If possible, copying is done within the kernel, and uses |
150 |
> + "copy acceleration" techniques (such as reflinks). This also |
151 |
> + supports sparse files. |
152 |
> + |
153 |
> + @param src: path of source file |
154 |
> + @type src: str |
155 |
> + @param dst: path of destination file |
156 |
> + @type dst: str |
157 |
> + """ |
158 |
> + global _copyfile |
159 |
> + |
160 |
> + if _copyfile is None: |
161 |
> + if _file_copy is None: |
162 |
> + _copyfile = shutil.copyfile |
163 |
> + else: |
164 |
> + _copyfile = _optimized_copyfile |
165 |
|
166 |
I dare say the caching logic takes more time than the actual logic. |
167 |
However, this could be made even simpler -- you could just check |
168 |
the condition in global scope and set _copyfile value there. |
169 |
|
170 |
> + |
171 |
> + _copyfile(src, dst) |
172 |
> diff --git a/pym/portage/util/movefile.py b/pym/portage/util/movefile.py |
173 |
> index 4be1c3b..88b35d3 100644 |
174 |
> --- a/pym/portage/util/movefile.py |
175 |
> +++ b/pym/portage/util/movefile.py |
176 |
> @@ -23,6 +23,8 @@ from portage.localization import _ |
177 |
> from portage.process import spawn |
178 |
> from portage.util import writemsg |
179 |
> from portage.util._xattr import xattr |
180 |
> +from portage.util.file_copy import copyfile |
181 |
> + |
182 |
> |
183 |
> def _apply_stat(src_stat, dest): |
184 |
> _os.chown(dest, src_stat.st_uid, src_stat.st_gid) |
185 |
> @@ -114,7 +116,7 @@ def movefile(src, dest, newmtime=None, sstat=None, mysettings=None, |
186 |
> _copyfile = selinux.copyfile |
187 |
> _rename = selinux.rename |
188 |
> else: |
189 |
> - _copyfile = _shutil.copyfile |
190 |
> + _copyfile = copyfile |
191 |
> _rename = _os.rename |
192 |
> |
193 |
> lchown = _unicode_func_wrapper(portage.data.lchown, encoding=encoding) |
194 |
> diff --git a/setup.py b/setup.py |
195 |
> index a346bd4..b624767 100755 |
196 |
> --- a/setup.py |
197 |
> +++ b/setup.py |
198 |
> @@ -23,6 +23,7 @@ import collections |
199 |
> import glob |
200 |
> import os |
201 |
> import os.path |
202 |
> +import platform |
203 |
> import re |
204 |
> import subprocess |
205 |
> import sys |
206 |
> @@ -54,6 +55,14 @@ x_c_helpers = { |
207 |
> ], |
208 |
> } |
209 |
> |
210 |
> +if platform.system() == 'Linux': |
211 |
> + x_c_helpers.update({ |
212 |
> + 'portage.util.file_copy.reflink_linux': [ |
213 |
> + 'src/portage_util_file_copy_reflink_linux.c', |
214 |
> + ], |
215 |
> + }) |
216 |
> + |
217 |
> + |
218 |
> class x_build(build): |
219 |
> """ Build command with extra build_man call. """ |
220 |
> |
221 |
> diff --git a/src/portage_util_file_copy_reflink_linux.c b/src/portage_util_file_copy_reflink_linux.c |
222 |
> new file mode 100644 |
223 |
> index 0000000..fde3cd3 |
224 |
> --- /dev/null |
225 |
> +++ b/src/portage_util_file_copy_reflink_linux.c |
226 |
> @@ -0,0 +1,298 @@ |
227 |
> +/* Copyright 2017 Gentoo Foundation |
228 |
> + * Distributed under the terms of the GNU General Public License v2 |
229 |
> + */ |
230 |
> + |
231 |
> +#include <Python.h> |
232 |
> +#include <errno.h> |
233 |
> +#include <stdlib.h> |
234 |
> +#include <ctype.h> |
235 |
> +#include <sys/sendfile.h> |
236 |
> +#include <sys/stat.h> |
237 |
> +#include <sys/syscall.h> |
238 |
> +#include <sys/types.h> |
239 |
> +#include <unistd.h> |
240 |
> + |
241 |
> +static PyObject * _reflink_linux_file_copy(PyObject *, PyObject *); |
242 |
> + |
243 |
> +static PyMethodDef reflink_linuxMethods[] = { |
244 |
> + { |
245 |
> + "file_copy", |
246 |
> + _reflink_linux_file_copy, |
247 |
> + METH_VARARGS, |
248 |
> + "Copy between two file descriptors, " |
249 |
> + "with reflink and sparse file support." |
250 |
> + }, |
251 |
> + {NULL, NULL, 0, NULL} |
252 |
> +}; |
253 |
> + |
254 |
> +#if PY_MAJOR_VERSION >= 3 |
255 |
> +static struct PyModuleDef moduledef = { |
256 |
> + PyModuleDef_HEAD_INIT, |
257 |
> + "reflink_linux", /* m_name */ |
258 |
> + "Module for reflink_linux copy operations", /* m_doc */ |
259 |
> + -1, /* m_size */ |
260 |
> + reflink_linuxMethods, /* m_methods */ |
261 |
> + NULL, /* m_reload */ |
262 |
> + NULL, /* m_traverse */ |
263 |
> + NULL, /* m_clear */ |
264 |
> + NULL, /* m_free */ |
265 |
> +}; |
266 |
> + |
267 |
> +PyMODINIT_FUNC |
268 |
> +PyInit_reflink_linux(void) |
269 |
> +{ |
270 |
> + PyObject *m; |
271 |
> + m = PyModule_Create(&moduledef); |
272 |
> + return m; |
273 |
> +} |
274 |
> +#else |
275 |
> +PyMODINIT_FUNC |
276 |
> +initreflink_linux(void) |
277 |
> +{ |
278 |
> + Py_InitModule("reflink_linux", reflink_linuxMethods); |
279 |
> +} |
280 |
> +#endif |
281 |
> + |
282 |
> + |
283 |
> +static ssize_t |
284 |
> +cfr_wrapper(int fd_out, int fd_in, loff_t *off_out, size_t len) |
285 |
> +{ |
286 |
> +#ifdef __NR_copy_file_range |
287 |
> + return syscall(__NR_copy_file_range, fd_in, NULL, fd_out, |
288 |
> + off_out, len, 0); |
289 |
> +#else |
290 |
> + /* This is how it fails at runtime when the syscall is not supported. */ |
291 |
> + errno = ENOSYS; |
292 |
> + return -1; |
293 |
> +#endif |
294 |
> +} |
295 |
> + |
296 |
> + |
297 |
> +static off_t |
298 |
> +do_lseek(int fd_out, int fd_in, loff_t *off_out) { |
299 |
|
300 |
Maybe name it do_lseek_data() to make the goal clearer? Some function |
301 |
docs in comments would also be helpful. |
302 |
|
303 |
> +#ifdef SEEK_DATA |
304 |
> + /* Use lseek SEEK_DATA/SEEK_HOLE for sparse file support, |
305 |
> + * as suggested in the copy_file_range man page. |
306 |
> + */ |
307 |
> + off_t offset_data, offset_hole; |
308 |
> + |
309 |
> + offset_data = lseek(fd_in, *off_out, SEEK_DATA); |
310 |
> + if (offset_data < 0) { |
311 |
> + if (errno == ENXIO) { |
312 |
> + return 0; |
313 |
> + } |
314 |
> + return -1; |
315 |
> + } |
316 |
> + |
317 |
> + /* Create sparse empty blocks in the output file, up |
318 |
> + * until the next location that will contain data. |
319 |
> + */ |
320 |
> + if (offset_data > *off_out) { |
321 |
> + if (lseek(fd_out, offset_data, SEEK_SET) < 0) { |
322 |
> + return -1; |
323 |
> + } |
324 |
> + *off_out = offset_data; |
325 |
> + } |
326 |
> + |
327 |
> + /* Locate the next hole, so that we know when to |
328 |
> + * stop copying. There is an implicit hole at the |
329 |
> + * end of the file. |
330 |
> + */ |
331 |
> + offset_hole = lseek(fd_in, offset_data, SEEK_HOLE); |
332 |
> + if (offset_hole < 0) { |
333 |
> + return -1; |
334 |
> + } |
335 |
> + |
336 |
> + /* Revert SEEK_HOLE offset change, since we're going |
337 |
> + * to copy the data that comes before the hole. |
338 |
> + */ |
339 |
> + if (lseek(fd_in, *off_out, SEEK_SET) < 0) { |
340 |
> + return -1; |
341 |
> + } |
342 |
> + |
343 |
> + return offset_hole - offset_data; |
344 |
> +#else |
345 |
> + /* This is how it fails at runtime when lseek SEEK_DATA is not supported. */ |
346 |
> + errno = EINVAL; |
347 |
> + return -1; |
348 |
> +#endif |
349 |
> +} |
350 |
> + |
351 |
> + |
352 |
> +static PyObject * |
353 |
> +_reflink_linux_file_copy(PyObject *self, PyObject *args) |
354 |
> +{ |
355 |
> + int eintr_retry, error, fd_in, fd_out, stat_in_acquired, stat_out_acquired; |
356 |
> + int lseek_works, sendfile_works; |
357 |
> + off_t offset_out, len; |
358 |
> + ssize_t buf_bytes, buf_offset, copyfunc_ret; |
359 |
> + struct stat stat_in, stat_out; |
360 |
> + char* buf; |
361 |
> + ssize_t (*copyfunc)(int, int, loff_t *, size_t); |
362 |
> + |
363 |
> + if (!PyArg_ParseTuple(args, "ii", &fd_in, &fd_out)) |
364 |
> + return NULL; |
365 |
> + |
366 |
> + eintr_retry = 1; |
367 |
> + offset_out = 0; |
368 |
> + stat_in_acquired = 0; |
369 |
> + stat_out_acquired = 0; |
370 |
> + buf = NULL; |
371 |
> + buf_bytes = 0; |
372 |
> + buf_offset = 0; |
373 |
> + copyfunc = cfr_wrapper; |
374 |
> + lseek_works = 1; |
375 |
> + sendfile_works = 1; |
376 |
> + |
377 |
> + while (eintr_retry) { |
378 |
> + |
379 |
> + Py_BEGIN_ALLOW_THREADS |
380 |
> + |
381 |
> + /* Linux 3.1 and later support SEEK_DATA (for sparse file support). |
382 |
> + * This code uses copy_file_range if possible, and falls back to |
383 |
> + * sendfile for cross-device or when the copy_file_range syscall |
384 |
> + * is not available (less than Linux 4.5). This will fail for |
385 |
> + * Linux less than 3.1, which does not support the lseek SEEK_DATA |
386 |
> + * parameter. |
387 |
> + */ |
388 |
> + if (sendfile_works && lseek_works) { |
389 |
> + error = 0; |
390 |
> + |
391 |
> + while (1) { |
392 |
> + len = do_lseek(fd_out, fd_in, &offset_out); |
393 |
> + if (!len) { |
394 |
> + /* EOF */ |
395 |
> + break; |
396 |
> + } |
397 |
> + else if (len < 0) { |
398 |
> + if (errno == EINVAL && !offset_out) { |
399 |
> + lseek_works = 0; |
400 |
> + } |
401 |
> + error = 1; |
402 |
> + break; |
403 |
> + } |
404 |
> + |
405 |
> + copyfunc_ret = copyfunc(fd_out, |
406 |
> + fd_in, |
407 |
> + &offset_out, |
408 |
> + len); |
409 |
> + |
410 |
> + if (copyfunc_ret < 0) { |
411 |
> + if ((errno == EXDEV || errno == ENOSYS) && |
412 |
> + copyfunc == cfr_wrapper) { |
413 |
> + /* Use sendfile instead of copy_file_range for |
414 |
> + * cross-device copies, or when the copy_file_range |
415 |
> + * syscall is not available (less than Linux 4.5). |
416 |
> + */ |
417 |
> + copyfunc = sendfile; |
418 |
> + copyfunc_ret = copyfunc(fd_out, |
419 |
> + fd_in, |
420 |
> + &offset_out, |
421 |
> + len); |
422 |
> + |
423 |
> + if (copyfunc_ret < 0) { |
424 |
> + /* On Linux, if lseek succeeded above, then |
425 |
> + * sendfile should have worked here too, so |
426 |
> + * don't bother to fallback for EINVAL here. |
427 |
> + */ |
428 |
> + error = 1; |
429 |
> + break; |
430 |
> + } |
431 |
> + } |
432 |
> + else { |
433 |
> + error = 1; |
434 |
> + break; |
435 |
> + } |
436 |
> + } |
437 |
> + } |
438 |
> + } |
439 |
> + |
440 |
> + /* Less than Linux 3.1 does not support SEEK_DATA or copy_file_range, |
441 |
> + * so just use sendfile for in-kernel copy. This will fail for Linux |
442 |
> + * versions from 2.6.0 to 2.6.32, because sendfile does not support |
443 |
> + * writing to regular files. |
444 |
> + */ |
445 |
> + if (sendfile_works && !lseek_works) { |
446 |
> + error = 0; |
447 |
> + |
448 |
> + if (!stat_in_acquired && fstat(fd_in, &stat_in) < 0) { |
449 |
> + error = 1; |
450 |
> + } |
451 |
> + else { |
452 |
> + stat_in_acquired = 1; |
453 |
> + while (offset_out < stat_in.st_size) { |
454 |
> + copyfunc_ret = sendfile(fd_out, |
455 |
> + fd_in, |
456 |
> + &offset_out, |
457 |
> + stat_in.st_size - offset_out); |
458 |
> + |
459 |
> + if (copyfunc_ret < 0) { |
460 |
> + if (errno == EINVAL && !offset_out) { |
461 |
> + sendfile_works = 0; |
462 |
> + } |
463 |
> + error = 1; |
464 |
> + break; |
465 |
> + } |
466 |
> + } |
467 |
> + } |
468 |
> + } |
469 |
> + |
470 |
> + /* This implementation will work on any kernel. */ |
471 |
> + if (!sendfile_works) { |
472 |
> + error = 0; |
473 |
> + |
474 |
> + if (!stat_out_acquired && fstat(fd_in, &stat_out) < 0) { |
475 |
> + error = 1; |
476 |
> + } |
477 |
> + else { |
478 |
> + stat_out_acquired = 1; |
479 |
> + if (buf == NULL) |
480 |
> + buf = malloc(stat_out.st_blksize); |
481 |
> + if (buf == NULL) { |
482 |
> + error = 1; |
483 |
> + } |
484 |
> + else { |
485 |
> + while (1) { |
486 |
> + /* Some bytes may still be buffered from the |
487 |
> + * previous iteration of the outer loop. |
488 |
> + */ |
489 |
> + if (!buf_bytes) { |
490 |
> + buf_offset = 0; |
491 |
> + buf_bytes = read(fd_in, buf, stat_out.st_blksize); |
492 |
> + |
493 |
> + if (!buf_bytes) { |
494 |
> + /* EOF */ |
495 |
> + break; |
496 |
> + } |
497 |
> + else if (buf_bytes < 0) { |
498 |
> + error = 1; |
499 |
> + break; |
500 |
> + } |
501 |
> + } |
502 |
> + |
503 |
> + copyfunc_ret = write(fd_out, buf + buf_offset, buf_bytes); |
504 |
> + if (copyfunc_ret < 0) { |
505 |
> + error = 1; |
506 |
> + break; |
507 |
> + } |
508 |
> + |
509 |
> + buf_bytes -= copyfunc_ret; |
510 |
> + buf_offset += copyfunc_ret; |
511 |
> + } |
512 |
> + } |
513 |
> + } |
514 |
> + } |
515 |
> + |
516 |
> + Py_END_ALLOW_THREADS |
517 |
> + |
518 |
> + if (!(error && errno == EINTR && PyErr_CheckSignals() == 0)) |
519 |
> + eintr_retry = 0; |
520 |
|
521 |
To be honest, I can't do it but it'd be really a good idea if someone |
522 |
could analyze all the code paths where the code above could give 'error |
523 |
= 1' and ensure that: |
524 |
|
525 |
a. there is no case when we error=1 and leave errno unmodified/unset |
526 |
(i.e. can accidentally leave earlier EINTR there), |
527 |
|
528 |
b. we can correctly recover from any EINTR, i.e. if EINTR causes some |
529 |
function to be interrupted in the middle, our next iteration starts |
530 |
safely. |
531 |
|
532 |
> + } |
533 |
> + |
534 |
> + free(buf); |
535 |
|
536 |
if (buf != NULL) ? |
537 |
|
538 |
> + |
539 |
> + if (error) |
540 |
> + return PyErr_SetFromErrno(PyExc_OSError); |
541 |
> + |
542 |
> + return Py_BuildValue("i", offset_out); |
543 |
> +} |
544 |
|
545 |
-- |
546 |
Best regards, |
547 |
Michał Górny |