Gentoo Archives: gentoo-portage-dev

From: "Michał Górny" <mgorny@g.o>
To: gentoo-portage-dev@l.g.o
Cc: Zac Medico <zmedico@g.o>
Subject: Re: [gentoo-portage-dev] [PATCH v2] movefile: support in-kernel file copying on Linux (bug 607868)
Date: Fri, 03 Mar 2017 20:45:47
Message-Id: 1488573932.12765.2.camel@gentoo.org
In Reply to: [gentoo-portage-dev] [PATCH v2] movefile: support in-kernel file copying on Linux (bug 607868) by Zac Medico
1 W dniu 02.03.2017, czw o godzinie 17∶09 -0800, użytkownik Zac Medico
2 napisał:
3 > Perform in-kernel file copying when possible, and also support
4 > reflinks and sparse files. If the optimized implementation
5 > fails at runtime, gracefully fallback to a plain read/write
6 > loop.
7 >
8 > Compile-time and run-time fallbacks are implemented, so that
9 > any incompatiblities will be handled gracefully. For example,
10 > if the code is compiled on a system that supports the
11 > copy_file_range syscall, but at run-time an older kernel that
12 > does not support this syscall is detected, it will be handled
13 > gracefully. There are similar fallbacks for lack of lseek
14 > SEEK_DATA and sendfile support.
15 >
16 > X-Gentoo-Bug: 607868
17 > X-Gentoo-Bug-Url: https://bugs.gentoo.org/show_bug.cgi?id=607868
18 > ---
19 > [PATCH v2] adds a native fallback that will work on any kernel,
20 > using a plain read/write loop.
21 >
22 > pym/portage/tests/util/file_copy/__init__.py | 0
23 > pym/portage/tests/util/file_copy/__test__.py | 0
24 > pym/portage/tests/util/file_copy/test_copyfile.py | 68 +++++
25 > pym/portage/util/file_copy/__init__.py | 45 ++++
26 > pym/portage/util/movefile.py | 4 +-
27 > setup.py | 9 +
28 > src/portage_util_file_copy_reflink_linux.c | 298 ++++++++++++++++++++++
29 > 7 files changed, 423 insertions(+), 1 deletion(-)
30 > create mode 100644 pym/portage/tests/util/file_copy/__init__.py
31 > create mode 100644 pym/portage/tests/util/file_copy/__test__.py
32 > create mode 100644 pym/portage/tests/util/file_copy/test_copyfile.py
33 > create mode 100644 pym/portage/util/file_copy/__init__.py
34 > create mode 100644 src/portage_util_file_copy_reflink_linux.c
35 >
36 > diff --git a/pym/portage/tests/util/file_copy/__init__.py b/pym/portage/tests/util/file_copy/__init__.py
37 > new file mode 100644
38 > index 0000000..e69de29
39 > diff --git a/pym/portage/tests/util/file_copy/__test__.py b/pym/portage/tests/util/file_copy/__test__.py
40 > new file mode 100644
41 > index 0000000..e69de29
42 > diff --git a/pym/portage/tests/util/file_copy/test_copyfile.py b/pym/portage/tests/util/file_copy/test_copyfile.py
43 > new file mode 100644
44 > index 0000000..94fb4d7
45 > --- /dev/null
46 > +++ b/pym/portage/tests/util/file_copy/test_copyfile.py
47 > @@ -0,0 +1,68 @@
48 > +# Copyright 2017 Gentoo Foundation
49 > +# Distributed under the terms of the GNU General Public License v2
50 > +
51 > +import shutil
52 > +import tempfile
53 > +
54 > +from portage import os
55 > +from portage.tests import TestCase
56 > +from portage.checksum import perform_md5
57 > +from portage.util.file_copy import copyfile
58 > +
59 > +
60 > +class CopyFileTestCase(TestCase):
61 > +
62 > + def testCopyFile(self):
63 > +
64 > + tempdir = tempfile.mkdtemp()
65 > + try:
66 > + src_path = os.path.join(tempdir, 'src')
67 > + dest_path = os.path.join(tempdir, 'dest')
68 > + content = b'foo'
69 > +
70 > + with open(src_path, 'wb') as f:
71 > + f.write(content)
72 > +
73 > + copyfile(src_path, dest_path)
74 > +
75 > + self.assertEqual(perform_md5(src_path), perform_md5(dest_path))
76 > + finally:
77 > + shutil.rmtree(tempdir)
78 > +
79 > +
80 > +class CopyFileSparseTestCase(TestCase):
81 > +
82 > + def testCopyFileSparse(self):
83 > +
84 > + tempdir = tempfile.mkdtemp()
85 > + try:
86 > + src_path = os.path.join(tempdir, 'src')
87 > + dest_path = os.path.join(tempdir, 'dest')
88 > + content = b'foo'
89 > +
90 > + # Use seek to create some sparse blocks. Don't make these
91 > + # files too big, in case the filesystem doesn't support
92 > + # sparse files.
93 > + with open(src_path, 'wb') as f:
94 > + f.write(content)
95 > + f.seek(2**18, 1)
96 > + f.write(content)
97 > + f.seek(2**19, 1)
98 > + f.write(content)
99 > +
100 > + copyfile(src_path, dest_path)
101 > +
102 > + self.assertEqual(perform_md5(src_path), perform_md5(dest_path))
103 > +
104 > + # This last part of the test is expected to fail when sparse
105 > + # copy is not implemented, so set the todo flag in order
106 > + # to tolerate failures.
107 > + self.todo = True
108 > +
109 > + # If sparse blocks were preserved, then both files should
110 > + # consume the same number of blocks.
111 > + self.assertEqual(
112 > + os.stat(src_path).st_blocks,
113 > + os.stat(dest_path).st_blocks)
114 > + finally:
115 > + shutil.rmtree(tempdir)
116 > diff --git a/pym/portage/util/file_copy/__init__.py b/pym/portage/util/file_copy/__init__.py
117 > new file mode 100644
118 > index 0000000..34c46ad
119 > --- /dev/null
120 > +++ b/pym/portage/util/file_copy/__init__.py
121 > @@ -0,0 +1,45 @@
122 > +# Copyright 2017 Gentoo Foundation
123 > +# Distributed under the terms of the GNU General Public License v2
124 > +
125 > +import os
126 > +import shutil
127 > +import tempfile
128 > +
129 > +try:
130 > + from portage.util.file_copy.reflink_linux import file_copy as _file_copy
131 > +except ImportError:
132 > + _file_copy = None
133 > +
134 > +
135 > +_copyfile = None
136 > +
137 > +
138 > +def _optimized_copyfile(src, dst):
139 > + with open(src, 'rb', buffering=0) as src_file, \
140 > + open(dst, 'wb', buffering=0) as dst_file:
141 > + _file_copy(src_file.fileno(), dst_file.fileno())
142 > +
143 > +
144 > +def copyfile(src, dst):
145 > + """
146 > + Copy the contents (no metadata) of the file named src to a file
147 > + named dst.
148 > +
149 > + If possible, copying is done within the kernel, and uses
150 > + "copy acceleration" techniques (such as reflinks). This also
151 > + supports sparse files.
152 > +
153 > + @param src: path of source file
154 > + @type src: str
155 > + @param dst: path of destination file
156 > + @type dst: str
157 > + """
158 > + global _copyfile
159 > +
160 > + if _copyfile is None:
161 > + if _file_copy is None:
162 > + _copyfile = shutil.copyfile
163 > + else:
164 > + _copyfile = _optimized_copyfile
165
166 I dare say the caching logic takes more time than the actual logic.
167 However, this could be made even simpler -- you could just check
168 the condition in global scope and set _copyfile value there.
169
170 > +
171 > + _copyfile(src, dst)
172 > diff --git a/pym/portage/util/movefile.py b/pym/portage/util/movefile.py
173 > index 4be1c3b..88b35d3 100644
174 > --- a/pym/portage/util/movefile.py
175 > +++ b/pym/portage/util/movefile.py
176 > @@ -23,6 +23,8 @@ from portage.localization import _
177 > from portage.process import spawn
178 > from portage.util import writemsg
179 > from portage.util._xattr import xattr
180 > +from portage.util.file_copy import copyfile
181 > +
182 >
183 > def _apply_stat(src_stat, dest):
184 > _os.chown(dest, src_stat.st_uid, src_stat.st_gid)
185 > @@ -114,7 +116,7 @@ def movefile(src, dest, newmtime=None, sstat=None, mysettings=None,
186 > _copyfile = selinux.copyfile
187 > _rename = selinux.rename
188 > else:
189 > - _copyfile = _shutil.copyfile
190 > + _copyfile = copyfile
191 > _rename = _os.rename
192 >
193 > lchown = _unicode_func_wrapper(portage.data.lchown, encoding=encoding)
194 > diff --git a/setup.py b/setup.py
195 > index a346bd4..b624767 100755
196 > --- a/setup.py
197 > +++ b/setup.py
198 > @@ -23,6 +23,7 @@ import collections
199 > import glob
200 > import os
201 > import os.path
202 > +import platform
203 > import re
204 > import subprocess
205 > import sys
206 > @@ -54,6 +55,14 @@ x_c_helpers = {
207 > ],
208 > }
209 >
210 > +if platform.system() == 'Linux':
211 > + x_c_helpers.update({
212 > + 'portage.util.file_copy.reflink_linux': [
213 > + 'src/portage_util_file_copy_reflink_linux.c',
214 > + ],
215 > + })
216 > +
217 > +
218 > class x_build(build):
219 > """ Build command with extra build_man call. """
220 >
221 > diff --git a/src/portage_util_file_copy_reflink_linux.c b/src/portage_util_file_copy_reflink_linux.c
222 > new file mode 100644
223 > index 0000000..fde3cd3
224 > --- /dev/null
225 > +++ b/src/portage_util_file_copy_reflink_linux.c
226 > @@ -0,0 +1,298 @@
227 > +/* Copyright 2017 Gentoo Foundation
228 > + * Distributed under the terms of the GNU General Public License v2
229 > + */
230 > +
231 > +#include <Python.h>
232 > +#include <errno.h>
233 > +#include <stdlib.h>
234 > +#include <ctype.h>
235 > +#include <sys/sendfile.h>
236 > +#include <sys/stat.h>
237 > +#include <sys/syscall.h>
238 > +#include <sys/types.h>
239 > +#include <unistd.h>
240 > +
241 > +static PyObject * _reflink_linux_file_copy(PyObject *, PyObject *);
242 > +
243 > +static PyMethodDef reflink_linuxMethods[] = {
244 > + {
245 > + "file_copy",
246 > + _reflink_linux_file_copy,
247 > + METH_VARARGS,
248 > + "Copy between two file descriptors, "
249 > + "with reflink and sparse file support."
250 > + },
251 > + {NULL, NULL, 0, NULL}
252 > +};
253 > +
254 > +#if PY_MAJOR_VERSION >= 3
255 > +static struct PyModuleDef moduledef = {
256 > + PyModuleDef_HEAD_INIT,
257 > + "reflink_linux", /* m_name */
258 > + "Module for reflink_linux copy operations", /* m_doc */
259 > + -1, /* m_size */
260 > + reflink_linuxMethods, /* m_methods */
261 > + NULL, /* m_reload */
262 > + NULL, /* m_traverse */
263 > + NULL, /* m_clear */
264 > + NULL, /* m_free */
265 > +};
266 > +
267 > +PyMODINIT_FUNC
268 > +PyInit_reflink_linux(void)
269 > +{
270 > + PyObject *m;
271 > + m = PyModule_Create(&moduledef);
272 > + return m;
273 > +}
274 > +#else
275 > +PyMODINIT_FUNC
276 > +initreflink_linux(void)
277 > +{
278 > + Py_InitModule("reflink_linux", reflink_linuxMethods);
279 > +}
280 > +#endif
281 > +
282 > +
283 > +static ssize_t
284 > +cfr_wrapper(int fd_out, int fd_in, loff_t *off_out, size_t len)
285 > +{
286 > +#ifdef __NR_copy_file_range
287 > + return syscall(__NR_copy_file_range, fd_in, NULL, fd_out,
288 > + off_out, len, 0);
289 > +#else
290 > + /* This is how it fails at runtime when the syscall is not supported. */
291 > + errno = ENOSYS;
292 > + return -1;
293 > +#endif
294 > +}
295 > +
296 > +
297 > +static off_t
298 > +do_lseek(int fd_out, int fd_in, loff_t *off_out) {
299
300 Maybe name it do_lseek_data() to make the goal clearer? Some function
301 docs in comments would also be helpful.
302
303 > +#ifdef SEEK_DATA
304 > + /* Use lseek SEEK_DATA/SEEK_HOLE for sparse file support,
305 > + * as suggested in the copy_file_range man page.
306 > + */
307 > + off_t offset_data, offset_hole;
308 > +
309 > + offset_data = lseek(fd_in, *off_out, SEEK_DATA);
310 > + if (offset_data < 0) {
311 > + if (errno == ENXIO) {
312 > + return 0;
313 > + }
314 > + return -1;
315 > + }
316 > +
317 > + /* Create sparse empty blocks in the output file, up
318 > + * until the next location that will contain data.
319 > + */
320 > + if (offset_data > *off_out) {
321 > + if (lseek(fd_out, offset_data, SEEK_SET) < 0) {
322 > + return -1;
323 > + }
324 > + *off_out = offset_data;
325 > + }
326 > +
327 > + /* Locate the next hole, so that we know when to
328 > + * stop copying. There is an implicit hole at the
329 > + * end of the file.
330 > + */
331 > + offset_hole = lseek(fd_in, offset_data, SEEK_HOLE);
332 > + if (offset_hole < 0) {
333 > + return -1;
334 > + }
335 > +
336 > + /* Revert SEEK_HOLE offset change, since we're going
337 > + * to copy the data that comes before the hole.
338 > + */
339 > + if (lseek(fd_in, *off_out, SEEK_SET) < 0) {
340 > + return -1;
341 > + }
342 > +
343 > + return offset_hole - offset_data;
344 > +#else
345 > + /* This is how it fails at runtime when lseek SEEK_DATA is not supported. */
346 > + errno = EINVAL;
347 > + return -1;
348 > +#endif
349 > +}
350 > +
351 > +
352 > +static PyObject *
353 > +_reflink_linux_file_copy(PyObject *self, PyObject *args)
354 > +{
355 > + int eintr_retry, error, fd_in, fd_out, stat_in_acquired, stat_out_acquired;
356 > + int lseek_works, sendfile_works;
357 > + off_t offset_out, len;
358 > + ssize_t buf_bytes, buf_offset, copyfunc_ret;
359 > + struct stat stat_in, stat_out;
360 > + char* buf;
361 > + ssize_t (*copyfunc)(int, int, loff_t *, size_t);
362 > +
363 > + if (!PyArg_ParseTuple(args, "ii", &fd_in, &fd_out))
364 > + return NULL;
365 > +
366 > + eintr_retry = 1;
367 > + offset_out = 0;
368 > + stat_in_acquired = 0;
369 > + stat_out_acquired = 0;
370 > + buf = NULL;
371 > + buf_bytes = 0;
372 > + buf_offset = 0;
373 > + copyfunc = cfr_wrapper;
374 > + lseek_works = 1;
375 > + sendfile_works = 1;
376 > +
377 > + while (eintr_retry) {
378 > +
379 > + Py_BEGIN_ALLOW_THREADS
380 > +
381 > + /* Linux 3.1 and later support SEEK_DATA (for sparse file support).
382 > + * This code uses copy_file_range if possible, and falls back to
383 > + * sendfile for cross-device or when the copy_file_range syscall
384 > + * is not available (less than Linux 4.5). This will fail for
385 > + * Linux less than 3.1, which does not support the lseek SEEK_DATA
386 > + * parameter.
387 > + */
388 > + if (sendfile_works && lseek_works) {
389 > + error = 0;
390 > +
391 > + while (1) {
392 > + len = do_lseek(fd_out, fd_in, &offset_out);
393 > + if (!len) {
394 > + /* EOF */
395 > + break;
396 > + }
397 > + else if (len < 0) {
398 > + if (errno == EINVAL && !offset_out) {
399 > + lseek_works = 0;
400 > + }
401 > + error = 1;
402 > + break;
403 > + }
404 > +
405 > + copyfunc_ret = copyfunc(fd_out,
406 > + fd_in,
407 > + &offset_out,
408 > + len);
409 > +
410 > + if (copyfunc_ret < 0) {
411 > + if ((errno == EXDEV || errno == ENOSYS) &&
412 > + copyfunc == cfr_wrapper) {
413 > + /* Use sendfile instead of copy_file_range for
414 > + * cross-device copies, or when the copy_file_range
415 > + * syscall is not available (less than Linux 4.5).
416 > + */
417 > + copyfunc = sendfile;
418 > + copyfunc_ret = copyfunc(fd_out,
419 > + fd_in,
420 > + &offset_out,
421 > + len);
422 > +
423 > + if (copyfunc_ret < 0) {
424 > + /* On Linux, if lseek succeeded above, then
425 > + * sendfile should have worked here too, so
426 > + * don't bother to fallback for EINVAL here.
427 > + */
428 > + error = 1;
429 > + break;
430 > + }
431 > + }
432 > + else {
433 > + error = 1;
434 > + break;
435 > + }
436 > + }
437 > + }
438 > + }
439 > +
440 > + /* Less than Linux 3.1 does not support SEEK_DATA or copy_file_range,
441 > + * so just use sendfile for in-kernel copy. This will fail for Linux
442 > + * versions from 2.6.0 to 2.6.32, because sendfile does not support
443 > + * writing to regular files.
444 > + */
445 > + if (sendfile_works && !lseek_works) {
446 > + error = 0;
447 > +
448 > + if (!stat_in_acquired && fstat(fd_in, &stat_in) < 0) {
449 > + error = 1;
450 > + }
451 > + else {
452 > + stat_in_acquired = 1;
453 > + while (offset_out < stat_in.st_size) {
454 > + copyfunc_ret = sendfile(fd_out,
455 > + fd_in,
456 > + &offset_out,
457 > + stat_in.st_size - offset_out);
458 > +
459 > + if (copyfunc_ret < 0) {
460 > + if (errno == EINVAL && !offset_out) {
461 > + sendfile_works = 0;
462 > + }
463 > + error = 1;
464 > + break;
465 > + }
466 > + }
467 > + }
468 > + }
469 > +
470 > + /* This implementation will work on any kernel. */
471 > + if (!sendfile_works) {
472 > + error = 0;
473 > +
474 > + if (!stat_out_acquired && fstat(fd_in, &stat_out) < 0) {
475 > + error = 1;
476 > + }
477 > + else {
478 > + stat_out_acquired = 1;
479 > + if (buf == NULL)
480 > + buf = malloc(stat_out.st_blksize);
481 > + if (buf == NULL) {
482 > + error = 1;
483 > + }
484 > + else {
485 > + while (1) {
486 > + /* Some bytes may still be buffered from the
487 > + * previous iteration of the outer loop.
488 > + */
489 > + if (!buf_bytes) {
490 > + buf_offset = 0;
491 > + buf_bytes = read(fd_in, buf, stat_out.st_blksize);
492 > +
493 > + if (!buf_bytes) {
494 > + /* EOF */
495 > + break;
496 > + }
497 > + else if (buf_bytes < 0) {
498 > + error = 1;
499 > + break;
500 > + }
501 > + }
502 > +
503 > + copyfunc_ret = write(fd_out, buf + buf_offset, buf_bytes);
504 > + if (copyfunc_ret < 0) {
505 > + error = 1;
506 > + break;
507 > + }
508 > +
509 > + buf_bytes -= copyfunc_ret;
510 > + buf_offset += copyfunc_ret;
511 > + }
512 > + }
513 > + }
514 > + }
515 > +
516 > + Py_END_ALLOW_THREADS
517 > +
518 > + if (!(error && errno == EINTR && PyErr_CheckSignals() == 0))
519 > + eintr_retry = 0;
520
521 To be honest, I can't do it but it'd be really a good idea if someone
522 could analyze all the code paths where the code above could give 'error
523 = 1' and ensure that:
524
525 a. there is no case when we error=1 and leave errno unmodified/unset
526 (i.e. can accidentally leave earlier EINTR there),
527
528 b. we can correctly recover from any EINTR, i.e. if EINTR causes some
529 function to be interrupted in the middle, our next iteration starts
530 safely.
531
532 > + }
533 > +
534 > + free(buf);
535
536 if (buf != NULL) ?
537
538 > +
539 > + if (error)
540 > + return PyErr_SetFromErrno(PyExc_OSError);
541 > +
542 > + return Py_BuildValue("i", offset_out);
543 > +}
544
545 --
546 Best regards,
547 Michał Górny

Attachments

File name MIME type
signature.asc application/pgp-signature

Replies