Gentoo Archives: gentoo-portage-dev

From: Zac Medico <zmedico@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Zac Medico <zmedico@g.o>
Subject: [gentoo-portage-dev] [PATCH v3] movefile: support in-kernel file copying on Linux (bug 607868)
Date: Sat, 04 Mar 2017 02:19:32
Message-Id: 20170304021850.14835-1-zmedico@gentoo.org
In Reply to: [gentoo-portage-dev] [PATCH] movefile: support in-kernel file copying on Linux (bug 607868) by Zac Medico
1 Perform in-kernel file copying when possible, and also support
2 reflinks and sparse files. If the optimized implementation
3 fails at runtime, gracefully fallback to a plain read/write
4 loop.
5
6 Compile-time and run-time fallbacks are implemented, so that
7 any incompatiblities will be handled gracefully. For example,
8 if the code is compiled on a system that supports the
9 copy_file_range syscall, but at run-time an older kernel that
10 does not support this syscall is detected, it will be handled
11 gracefully. There are similar fallbacks for lack of lseek
12 SEEK_DATA and sendfile support.
13
14 X-Gentoo-Bug: 607868
15 X-Gentoo-Bug-Url: https://bugs.gentoo.org/show_bug.cgi?id=607868
16 ---
17 [PATCH v3] Changes:
18 * Add function documentation comments
19 * Rename do_lseek function to do_lseek_data
20 * Fix do_lseek_data to handle sparse blocks at EOF, and
21 fix _reflink_linux_file_copy to call ftruncate
22 * Eliminate indirection in python copyfile function
23 * Cleaned up error variable handling
24 * Added lseek calls to ensure correct revovery from EINTR
25 * Added buf != NULL call before free(buf)
26
27 pym/portage/tests/util/file_copy/__init__.py | 0
28 pym/portage/tests/util/file_copy/__test__.py | 0
29 pym/portage/tests/util/file_copy/test_copyfile.py | 71 ++++
30 pym/portage/util/file_copy/__init__.py | 36 ++
31 pym/portage/util/movefile.py | 4 +-
32 setup.py | 9 +
33 src/portage_util_file_copy_reflink_linux.c | 385 ++++++++++++++++++++++
34 7 files changed, 504 insertions(+), 1 deletion(-)
35 create mode 100644 pym/portage/tests/util/file_copy/__init__.py
36 create mode 100644 pym/portage/tests/util/file_copy/__test__.py
37 create mode 100644 pym/portage/tests/util/file_copy/test_copyfile.py
38 create mode 100644 pym/portage/util/file_copy/__init__.py
39 create mode 100644 src/portage_util_file_copy_reflink_linux.c
40
41 diff --git a/pym/portage/tests/util/file_copy/__init__.py b/pym/portage/tests/util/file_copy/__init__.py
42 new file mode 100644
43 index 0000000..e69de29
44 diff --git a/pym/portage/tests/util/file_copy/__test__.py b/pym/portage/tests/util/file_copy/__test__.py
45 new file mode 100644
46 index 0000000..e69de29
47 diff --git a/pym/portage/tests/util/file_copy/test_copyfile.py b/pym/portage/tests/util/file_copy/test_copyfile.py
48 new file mode 100644
49 index 0000000..b900fde
50 --- /dev/null
51 +++ b/pym/portage/tests/util/file_copy/test_copyfile.py
52 @@ -0,0 +1,71 @@
53 +# Copyright 2017 Gentoo Foundation
54 +# Distributed under the terms of the GNU General Public License v2
55 +
56 +import shutil
57 +import tempfile
58 +
59 +from portage import os
60 +from portage.tests import TestCase
61 +from portage.checksum import perform_md5
62 +from portage.util.file_copy import copyfile
63 +
64 +
65 +class CopyFileTestCase(TestCase):
66 +
67 + def testCopyFile(self):
68 +
69 + tempdir = tempfile.mkdtemp()
70 + try:
71 + src_path = os.path.join(tempdir, 'src')
72 + dest_path = os.path.join(tempdir, 'dest')
73 + content = b'foo'
74 +
75 + with open(src_path, 'wb') as f:
76 + f.write(content)
77 +
78 + copyfile(src_path, dest_path)
79 +
80 + self.assertEqual(perform_md5(src_path), perform_md5(dest_path))
81 + finally:
82 + shutil.rmtree(tempdir)
83 +
84 +
85 +class CopyFileSparseTestCase(TestCase):
86 +
87 + def testCopyFileSparse(self):
88 +
89 + tempdir = tempfile.mkdtemp()
90 + try:
91 + src_path = os.path.join(tempdir, 'src')
92 + dest_path = os.path.join(tempdir, 'dest')
93 + content = b'foo'
94 +
95 + # Use seek to create some sparse blocks. Don't make these
96 + # files too big, in case the filesystem doesn't support
97 + # sparse files.
98 + with open(src_path, 'wb') as f:
99 + f.write(content)
100 + f.seek(2**17, 1)
101 + f.write(content)
102 + f.seek(2**18, 1)
103 + f.write(content)
104 + # Test that sparse blocks are handled correctly at
105 + # the end of the file (involves seek and truncate).
106 + f.seek(2**17, 1)
107 +
108 + copyfile(src_path, dest_path)
109 +
110 + self.assertEqual(perform_md5(src_path), perform_md5(dest_path))
111 +
112 + # This last part of the test is expected to fail when sparse
113 + # copy is not implemented, so set the todo flag in order
114 + # to tolerate failures.
115 + self.todo = True
116 +
117 + # If sparse blocks were preserved, then both files should
118 + # consume the same number of blocks.
119 + self.assertEqual(
120 + os.stat(src_path).st_blocks,
121 + os.stat(dest_path).st_blocks)
122 + finally:
123 + shutil.rmtree(tempdir)
124 diff --git a/pym/portage/util/file_copy/__init__.py b/pym/portage/util/file_copy/__init__.py
125 new file mode 100644
126 index 0000000..3d9b745
127 --- /dev/null
128 +++ b/pym/portage/util/file_copy/__init__.py
129 @@ -0,0 +1,36 @@
130 +# Copyright 2017 Gentoo Foundation
131 +# Distributed under the terms of the GNU General Public License v2
132 +
133 +import os
134 +import shutil
135 +import tempfile
136 +
137 +try:
138 + from portage.util.file_copy.reflink_linux import file_copy as _file_copy
139 +except ImportError:
140 + _file_copy = None
141 +
142 +
143 +def _optimized_copyfile(src, dst):
144 + """
145 + Copy the contents (no metadata) of the file named src to a file
146 + named dst.
147 +
148 + If possible, copying is done within the kernel, and uses
149 + "copy acceleration" techniques (such as reflinks). This also
150 + supports sparse files.
151 +
152 + @param src: path of source file
153 + @type src: str
154 + @param dst: path of destination file
155 + @type dst: str
156 + """
157 + with open(src, 'rb', buffering=0) as src_file, \
158 + open(dst, 'wb', buffering=0) as dst_file:
159 + _file_copy(src_file.fileno(), dst_file.fileno())
160 +
161 +
162 +if _file_copy is None:
163 + copyfile = shutil.copyfile
164 +else:
165 + copyfile = _optimized_copyfile
166 diff --git a/pym/portage/util/movefile.py b/pym/portage/util/movefile.py
167 index 4be1c3b..88b35d3 100644
168 --- a/pym/portage/util/movefile.py
169 +++ b/pym/portage/util/movefile.py
170 @@ -23,6 +23,8 @@ from portage.localization import _
171 from portage.process import spawn
172 from portage.util import writemsg
173 from portage.util._xattr import xattr
174 +from portage.util.file_copy import copyfile
175 +
176
177 def _apply_stat(src_stat, dest):
178 _os.chown(dest, src_stat.st_uid, src_stat.st_gid)
179 @@ -114,7 +116,7 @@ def movefile(src, dest, newmtime=None, sstat=None, mysettings=None,
180 _copyfile = selinux.copyfile
181 _rename = selinux.rename
182 else:
183 - _copyfile = _shutil.copyfile
184 + _copyfile = copyfile
185 _rename = _os.rename
186
187 lchown = _unicode_func_wrapper(portage.data.lchown, encoding=encoding)
188 diff --git a/setup.py b/setup.py
189 index a346bd4..b624767 100755
190 --- a/setup.py
191 +++ b/setup.py
192 @@ -23,6 +23,7 @@ import collections
193 import glob
194 import os
195 import os.path
196 +import platform
197 import re
198 import subprocess
199 import sys
200 @@ -54,6 +55,14 @@ x_c_helpers = {
201 ],
202 }
203
204 +if platform.system() == 'Linux':
205 + x_c_helpers.update({
206 + 'portage.util.file_copy.reflink_linux': [
207 + 'src/portage_util_file_copy_reflink_linux.c',
208 + ],
209 + })
210 +
211 +
212 class x_build(build):
213 """ Build command with extra build_man call. """
214
215 diff --git a/src/portage_util_file_copy_reflink_linux.c b/src/portage_util_file_copy_reflink_linux.c
216 new file mode 100644
217 index 0000000..b031d96
218 --- /dev/null
219 +++ b/src/portage_util_file_copy_reflink_linux.c
220 @@ -0,0 +1,385 @@
221 +/* Copyright 2017 Gentoo Foundation
222 + * Distributed under the terms of the GNU General Public License v2
223 + */
224 +
225 +#include <Python.h>
226 +#include <errno.h>
227 +#include <stdlib.h>
228 +#include <ctype.h>
229 +#include <sys/sendfile.h>
230 +#include <sys/stat.h>
231 +#include <sys/syscall.h>
232 +#include <sys/types.h>
233 +#include <unistd.h>
234 +
235 +static PyObject * _reflink_linux_file_copy(PyObject *, PyObject *);
236 +
237 +static PyMethodDef reflink_linuxMethods[] = {
238 + {
239 + "file_copy",
240 + _reflink_linux_file_copy,
241 + METH_VARARGS,
242 + "Copy between two file descriptors, "
243 + "with reflink and sparse file support."
244 + },
245 + {NULL, NULL, 0, NULL}
246 +};
247 +
248 +#if PY_MAJOR_VERSION >= 3
249 +static struct PyModuleDef moduledef = {
250 + PyModuleDef_HEAD_INIT,
251 + "reflink_linux", /* m_name */
252 + "Module for reflink_linux copy operations", /* m_doc */
253 + -1, /* m_size */
254 + reflink_linuxMethods, /* m_methods */
255 + NULL, /* m_reload */
256 + NULL, /* m_traverse */
257 + NULL, /* m_clear */
258 + NULL, /* m_free */
259 +};
260 +
261 +PyMODINIT_FUNC
262 +PyInit_reflink_linux(void)
263 +{
264 + PyObject *m;
265 + m = PyModule_Create(&moduledef);
266 + return m;
267 +}
268 +#else
269 +PyMODINIT_FUNC
270 +initreflink_linux(void)
271 +{
272 + Py_InitModule("reflink_linux", reflink_linuxMethods);
273 +}
274 +#endif
275 +
276 +
277 +/**
278 + * cfr_wrapper - A copy_file_range syscall wrapper function, having a
279 + * function signature that is compatible with sendfile.
280 + * @fd_out: output file descriptor
281 + * @fd_in: input file descriptor
282 + * @off_out: offset of the output file
283 + * @len: number of bytes to copy between the file descriptors
284 + *
285 + * Return: Number of bytes written to out_fd on success, -1 on failure
286 + * (errno is set appropriately).
287 + */
288 +static ssize_t
289 +cfr_wrapper(int fd_out, int fd_in, loff_t *off_out, size_t len)
290 +{
291 +#ifdef __NR_copy_file_range
292 + return syscall(__NR_copy_file_range, fd_in, NULL, fd_out,
293 + off_out, len, 0);
294 +#else
295 + /* This is how it fails at runtime when the syscall is not supported. */
296 + errno = ENOSYS;
297 + return -1;
298 +#endif
299 +}
300 +
301 +/**
302 + * do_lseek_data - Adjust file offsets to the next location containing
303 + * data, creating sparse empty blocks in the output file as needed.
304 + * @fd_in: input file descriptor
305 + * @fd_out: output file descriptor
306 + * @off_out: offset of the output file
307 + *
308 + * Use lseek SEEK_DATA to adjust the fd_in file offset to the next
309 + * location containing data, and adjust the fd_in file offset and
310 + * off_out to the same location (creating sparse empty blocks as
311 + * needed). On success, both fd_in and fd_out file offsets are
312 + * guaranteed to be exactly equal to the value that off_out points to.
313 + *
314 + * Return: On success, the number of bytes to copy before the next hole,
315 + * and -1 on failure (errno is set appropriately). Returns 0 when fd_in
316 + * reaches EOF.
317 + */
318 +static off_t
319 +do_lseek_data(int fd_out, int fd_in, loff_t *off_out) {
320 +#ifdef SEEK_DATA
321 + /* Use lseek SEEK_DATA/SEEK_HOLE for sparse file support,
322 + * as suggested in the copy_file_range man page.
323 + */
324 + off_t offset_data, offset_hole;
325 +
326 + offset_data = lseek(fd_in, *off_out, SEEK_DATA);
327 + if (offset_data < 0) {
328 + if (errno == ENXIO) {
329 + /* EOF - If the file ends with a hole, then use lseek SEEK_END
330 + * to find the end offset, and create sparse empty blocks in
331 + * the output file. It's the caller's responsibility to
332 + * truncate the file.
333 + */
334 + offset_hole = lseek(fd_in, 0, SEEK_END);
335 + if (offset_hole < 0) {
336 + return -1;
337 + } else if (offset_hole != *off_out) {
338 + if (lseek(fd_out, offset_hole, SEEK_SET) < 0) {
339 + return -1;
340 + }
341 + *off_out = offset_hole;
342 + }
343 + return 0;
344 + }
345 + return -1;
346 + }
347 +
348 + /* Create sparse empty blocks in the output file, up
349 + * until the next location that will contain data.
350 + */
351 + if (offset_data != *off_out) {
352 + if (lseek(fd_out, offset_data, SEEK_SET) < 0) {
353 + return -1;
354 + }
355 + *off_out = offset_data;
356 + }
357 +
358 + /* Locate the next hole, so that we know when to
359 + * stop copying. There is an implicit hole at the
360 + * end of the file. This should never result in ENXIO
361 + * after SEEK_DATA has succeeded above.
362 + */
363 + offset_hole = lseek(fd_in, offset_data, SEEK_HOLE);
364 + if (offset_hole < 0) {
365 + return -1;
366 + }
367 +
368 + /* Revert SEEK_HOLE offset change, since we're going
369 + * to copy the data that comes before the hole.
370 + */
371 + if (lseek(fd_in, offset_data, SEEK_SET) < 0) {
372 + return -1;
373 + }
374 +
375 + return offset_hole - offset_data;
376 +#else
377 + /* This is how it fails at runtime when lseek SEEK_DATA is not supported. */
378 + errno = EINVAL;
379 + return -1;
380 +#endif
381 +}
382 +
383 +
384 +/**
385 + * _reflink_linux_file_copy - Copy between two file descriptors, with
386 + * reflink and sparse file support.
387 + * @fd_in: input file descriptor
388 + * @fd_out: output file descriptor
389 + *
390 + * When supported, this uses copy_file_range for reflink support,
391 + * and lseek SEEK_DATA for sparse file support. It has graceful
392 + * fallbacks when support is unavailable for copy_file_range, lseek
393 + * SEEK_DATA, or sendfile operations. When all else fails, it uses
394 + * a plain read/write loop that works in any kernel version.
395 + *
396 + * If a syscall is interrupted by a signal, then the function will
397 + * automatically resume copying a the appropriate location which is
398 + * tracked internally by the offset_out variable.
399 + *
400 + * Return: The length of the output file on success. Raise OSError
401 + * on failure.
402 + */
403 +static PyObject *
404 +_reflink_linux_file_copy(PyObject *self, PyObject *args)
405 +{
406 + int eintr_retry, error, fd_in, fd_out, stat_in_acquired, stat_out_acquired;
407 + int lseek_works, sendfile_works;
408 + off_t offset_out, len;
409 + ssize_t buf_bytes, buf_offset, copyfunc_ret;
410 + struct stat stat_in, stat_out;
411 + char* buf;
412 + ssize_t (*copyfunc)(int, int, loff_t *, size_t);
413 +
414 + if (!PyArg_ParseTuple(args, "ii", &fd_in, &fd_out))
415 + return NULL;
416 +
417 + eintr_retry = 1;
418 + offset_out = 0;
419 + stat_in_acquired = 0;
420 + stat_out_acquired = 0;
421 + buf = NULL;
422 + buf_bytes = 0;
423 + buf_offset = 0;
424 + copyfunc = cfr_wrapper;
425 + lseek_works = 1;
426 + sendfile_works = 1;
427 +
428 + while (eintr_retry) {
429 +
430 + Py_BEGIN_ALLOW_THREADS
431 +
432 + /* Linux 3.1 and later support SEEK_DATA (for sparse file support).
433 + * This code uses copy_file_range if possible, and falls back to
434 + * sendfile for cross-device or when the copy_file_range syscall
435 + * is not available (less than Linux 4.5). This will fail for
436 + * Linux less than 3.1, which does not support the lseek SEEK_DATA
437 + * parameter.
438 + */
439 + if (sendfile_works && lseek_works) {
440 + error = 0;
441 +
442 + while (1) {
443 + len = do_lseek_data(fd_out, fd_in, &offset_out);
444 + if (!len) {
445 + /* EOF */
446 + break;
447 + } else if (len < 0) {
448 + error = errno;
449 + if (errno == EINVAL && !offset_out) {
450 + lseek_works = 0;
451 + }
452 + break;
453 + }
454 +
455 + /* For the copyfunc call, the fd_in file offset must be
456 + * exactly equal to offset_out. The above do_lseek_data
457 + * function guarantees correct state.
458 + */
459 + copyfunc_ret = copyfunc(fd_out,
460 + fd_in,
461 + &offset_out,
462 + len);
463 +
464 + if (copyfunc_ret < 0) {
465 + error = errno;
466 + if ((errno == EXDEV || errno == ENOSYS) &&
467 + copyfunc == cfr_wrapper) {
468 + /* Use sendfile instead of copy_file_range for
469 + * cross-device copies, or when the copy_file_range
470 + * syscall is not available (less than Linux 4.5).
471 + */
472 + error = 0;
473 + copyfunc = sendfile;
474 + copyfunc_ret = copyfunc(fd_out,
475 + fd_in,
476 + &offset_out,
477 + len);
478 +
479 + if (copyfunc_ret < 0) {
480 + error = errno;
481 + /* On Linux, if lseek succeeded above, then
482 + * sendfile should have worked here too, so
483 + * don't bother to fallback for EINVAL here.
484 + */
485 + break;
486 + }
487 + } else {
488 + break;
489 + }
490 + }
491 + }
492 + }
493 +
494 + /* Less than Linux 3.1 does not support SEEK_DATA or copy_file_range,
495 + * so just use sendfile for in-kernel copy. This will fail for Linux
496 + * versions from 2.6.0 to 2.6.32, because sendfile does not support
497 + * writing to regular files.
498 + */
499 + if (sendfile_works && !lseek_works) {
500 + error = 0;
501 +
502 + if (!stat_in_acquired && fstat(fd_in, &stat_in) < 0) {
503 + error = errno;
504 + } else {
505 + stat_in_acquired = 1;
506 +
507 + /* For the sendfile call, the fd_in file offset must be
508 + * exactly equal to offset_out. Use lseek to ensure
509 + * correct state, in case an EINTR retry caused it to
510 + * get out of sync somewhow.
511 + */
512 + if (lseek(fd_in, offset_out, SEEK_SET) < 0) {
513 + error = errno;
514 + } else {
515 + while (offset_out < stat_in.st_size) {
516 + copyfunc_ret = sendfile(fd_out,
517 + fd_in,
518 + &offset_out,
519 + stat_in.st_size - offset_out);
520 +
521 + if (copyfunc_ret < 0) {
522 + error = errno;
523 + if (errno == EINVAL && !offset_out) {
524 + sendfile_works = 0;
525 + }
526 + break;
527 + }
528 + }
529 + }
530 + }
531 + }
532 +
533 + /* This implementation will work on any kernel. */
534 + if (!sendfile_works) {
535 + error = 0;
536 +
537 + if (!stat_out_acquired && fstat(fd_in, &stat_out) < 0) {
538 + error = errno;
539 + } else {
540 + stat_out_acquired = 1;
541 + if (buf == NULL)
542 + buf = malloc(stat_out.st_blksize);
543 + if (buf == NULL) {
544 + error = errno;
545 +
546 + /* For the read call, the fd_in file offset must be
547 + * exactly equal to offset_out. Use lseek to ensure
548 + * correct state, in case an EINTR retry caused it to
549 + * get out of sync somewhow.
550 + */
551 + } else if (lseek(fd_in, offset_out, SEEK_SET) < 0) {
552 + error = errno;
553 + } else {
554 + while (1) {
555 + /* Some bytes may still be buffered from the
556 + * previous iteration of the outer loop.
557 + */
558 + if (!buf_bytes) {
559 + buf_offset = 0;
560 + buf_bytes = read(fd_in, buf, stat_out.st_blksize);
561 +
562 + if (!buf_bytes) {
563 + /* EOF */
564 + break;
565 +
566 + } else if (buf_bytes < 0) {
567 + error = errno;
568 + break;
569 + }
570 + }
571 +
572 + copyfunc_ret = write(fd_out,
573 + buf + buf_offset,
574 + buf_bytes);
575 +
576 + if (copyfunc_ret < 0) {
577 + error = errno;
578 + break;
579 + }
580 +
581 + buf_bytes -= copyfunc_ret;
582 + buf_offset += copyfunc_ret;
583 + offset_out += copyfunc_ret;
584 + }
585 + }
586 + }
587 + }
588 +
589 + if (!error && ftruncate(fd_out, offset_out) < 0)
590 + error = errno;
591 +
592 + Py_END_ALLOW_THREADS
593 +
594 + if (!(error == EINTR && PyErr_CheckSignals() == 0))
595 + eintr_retry = 0;
596 + }
597 +
598 + if (buf != NULL)
599 + free(buf);
600 +
601 + if (error)
602 + return PyErr_SetFromErrno(PyExc_OSError);
603 +
604 + return Py_BuildValue("i", offset_out);
605 +}
606 --
607 2.10.2

Replies