1 |
W dniu 01.03.2017, śro o godzinie 19∶03 -0800, użytkownik Zac Medico |
2 |
napisał: |
3 |
> Perform in-kernel file copying when possible, and also support |
4 |
> reflinks and sparse files. If the optimized implementation |
5 |
> fails at runtime, gracefully fallback to shutil.copyfile. |
6 |
> |
7 |
> Compile-time and run-time fallbacks are implemented, so that |
8 |
> any incompatiblities will be handled gracefully. For example, |
9 |
> if the code is compiled on a system that supports the |
10 |
> copy_file_range syscall, but at run-time an older kernel that |
11 |
> does not support this syscall is detected, it will be handled |
12 |
> gracefully. |
13 |
> |
14 |
> X-Gentoo-Bug: 607868 |
15 |
> X-Gentoo-Bug-Url: https://bugs.gentoo.org/show_bug.cgi?id=607868 |
16 |
> --- |
17 |
> pym/portage/tests/util/file_copy/__init__.py | 0 |
18 |
> pym/portage/tests/util/file_copy/__test__.py | 0 |
19 |
> pym/portage/tests/util/file_copy/test_copyfile.py | 68 +++++++ |
20 |
> pym/portage/util/file_copy/__init__.py | 78 ++++++++ |
21 |
> pym/portage/util/movefile.py | 4 +- |
22 |
> setup.py | 9 + |
23 |
> src/portage_util_file_copy_reflink_linux.c | 225 ++++++++++++++++++++++ |
24 |
> 7 files changed, 383 insertions(+), 1 deletion(-) |
25 |
> create mode 100644 pym/portage/tests/util/file_copy/__init__.py |
26 |
> create mode 100644 pym/portage/tests/util/file_copy/__test__.py |
27 |
> create mode 100644 pym/portage/tests/util/file_copy/test_copyfile.py |
28 |
> create mode 100644 pym/portage/util/file_copy/__init__.py |
29 |
> create mode 100644 src/portage_util_file_copy_reflink_linux.c |
30 |
> |
31 |
> diff --git a/pym/portage/tests/util/file_copy/__init__.py b/pym/portage/tests/util/file_copy/__init__.py |
32 |
> new file mode 100644 |
33 |
> index 0000000..e69de29 |
34 |
> diff --git a/pym/portage/tests/util/file_copy/__test__.py b/pym/portage/tests/util/file_copy/__test__.py |
35 |
> new file mode 100644 |
36 |
> index 0000000..e69de29 |
37 |
> diff --git a/pym/portage/tests/util/file_copy/test_copyfile.py b/pym/portage/tests/util/file_copy/test_copyfile.py |
38 |
> new file mode 100644 |
39 |
> index 0000000..987a701 |
40 |
> --- /dev/null |
41 |
> +++ b/pym/portage/tests/util/file_copy/test_copyfile.py |
42 |
> @@ -0,0 +1,68 @@ |
43 |
> +# Copyright 2017 Gentoo Foundation |
44 |
> +# Distributed under the terms of the GNU General Public License v2 |
45 |
> + |
46 |
> +import shutil |
47 |
> +import tempfile |
48 |
> + |
49 |
> +from portage import os |
50 |
> +from portage.tests import TestCase |
51 |
> +from portage.checksum import perform_md5 |
52 |
> +from portage.util.file_copy import copyfile |
53 |
> + |
54 |
> + |
55 |
> +class CopyFileTestCase(TestCase): |
56 |
> + |
57 |
> + def testCopyFile(self): |
58 |
> + |
59 |
> + tempdir = tempfile.mkdtemp() |
60 |
> + try: |
61 |
> + src_path = os.path.join(tempdir, 'src') |
62 |
> + dest_path = os.path.join(tempdir, 'dest') |
63 |
> + content = b'foo' |
64 |
> + |
65 |
> + with open(src_path, 'wb') as f: |
66 |
> + f.write(content) |
67 |
> + |
68 |
> + copyfile(src_path, dest_path) |
69 |
> + |
70 |
> + self.assertEqual(perform_md5(src_path), perform_md5(dest_path)) |
71 |
> + finally: |
72 |
> + shutil.rmtree(tempdir) |
73 |
> + |
74 |
> + |
75 |
> +class CopyFileSparseTestCase(TestCase): |
76 |
> + |
77 |
> + def testCopyFileSparse(self): |
78 |
> + |
79 |
> + # This test is expected to fail on platforms where we have |
80 |
> + # not implemented sparse copy, so set the todo flag in order |
81 |
> + # to tolerate failures. |
82 |
> + self.todo = True |
83 |
> + |
84 |
> + tempdir = tempfile.mkdtemp() |
85 |
> + try: |
86 |
> + src_path = os.path.join(tempdir, 'src') |
87 |
> + dest_path = os.path.join(tempdir, 'dest') |
88 |
> + content = b'foo' |
89 |
> + |
90 |
> + # Use seek to create some sparse blocks. Don't make these |
91 |
> + # files too big, in case the filesystem doesn't support |
92 |
> + # sparse files. |
93 |
> + with open(src_path, 'wb') as f: |
94 |
> + f.write(content) |
95 |
> + f.seek(2**18, 1) |
96 |
> + f.write(content) |
97 |
> + f.seek(2**19, 1) |
98 |
> + f.write(content) |
99 |
> + |
100 |
> + copyfile(src_path, dest_path) |
101 |
> + |
102 |
> + self.assertEqual(perform_md5(src_path), perform_md5(dest_path)) |
103 |
> + |
104 |
> + # If sparse blocks were preserved, then both files should |
105 |
> + # consume the same number of blocks. |
106 |
> + self.assertEqual( |
107 |
> + os.stat(src_path).st_blocks, |
108 |
> + os.stat(dest_path).st_blocks) |
109 |
> + finally: |
110 |
> + shutil.rmtree(tempdir) |
111 |
> diff --git a/pym/portage/util/file_copy/__init__.py b/pym/portage/util/file_copy/__init__.py |
112 |
> new file mode 100644 |
113 |
> index 0000000..5c7aff1 |
114 |
> --- /dev/null |
115 |
> +++ b/pym/portage/util/file_copy/__init__.py |
116 |
> @@ -0,0 +1,78 @@ |
117 |
> +# Copyright 2017 Gentoo Foundation |
118 |
> +# Distributed under the terms of the GNU General Public License v2 |
119 |
> + |
120 |
> +import os |
121 |
> +import shutil |
122 |
> +import tempfile |
123 |
> + |
124 |
> +try: |
125 |
> + from portage.util.file_copy.reflink_linux import file_copy as _file_copy |
126 |
> +except ImportError: |
127 |
> + _file_copy = None |
128 |
> + |
129 |
> + |
130 |
> +_copyfile = None |
131 |
> + |
132 |
> + |
133 |
> +def _optimized_copyfile(src, dst): |
134 |
> + with open(src, 'rb', buffering=0) as src_file, \ |
135 |
> + open(dst, 'wb', buffering=0) as dst_file: |
136 |
> + _file_copy(src_file.fileno(), dst_file.fileno()) |
137 |
> + |
138 |
> + |
139 |
> +def _test_optimized_copyfile(): |
140 |
> + """ |
141 |
> + Test if _optimized_copyfile works. It will fail for Linux versions |
142 |
> + from 2.6.0 to 2.6.32, because sendfile does not support writing |
143 |
> + to regular files. It will also fail for Linux versions less than |
144 |
> + 3.1 if portage was compiled 3.1 or later, due to missing support |
145 |
> + for lseek SEEK_DATA/SEEK_HOLE. |
146 |
|
147 |
I don't really like the idea of relying on one-time attempt to determine |
148 |
whether a complex mechanism lacking proper fallback will work. But I |
149 |
guess you're only aiming at catching corner cases here. |
150 |
|
151 |
> + """ |
152 |
> + tempdir = tempfile.mkdtemp() |
153 |
> + try: |
154 |
> + src_path = os.path.join(tempdir, 'src') |
155 |
> + dest_path = os.path.join(tempdir, 'dest') |
156 |
> + content = b'foo' |
157 |
> + |
158 |
> + with open(src_path, 'wb') as f: |
159 |
> + f.write(content) |
160 |
> + |
161 |
> + try: |
162 |
> + _optimized_copyfile(src_path, dest_path) |
163 |
> + except Exception: |
164 |
> + return False |
165 |
> + |
166 |
> + with open(dest_path, 'rb') as dest_file: |
167 |
> + if content != dest_file.read(): |
168 |
> + return False |
169 |
> + finally: |
170 |
> + shutil.rmtree(tempdir) |
171 |
> + |
172 |
> + return True |
173 |
> + |
174 |
> + |
175 |
> +def copyfile(src, dst): |
176 |
> + """ |
177 |
> + Copy the contents (no metadata) of the file named src to a file |
178 |
> + named dst. |
179 |
> + |
180 |
> + If possible, copying is done within the kernel, and uses |
181 |
> + "copy acceleration" techniques (such as reflinks). This also |
182 |
> + supports sparse files. |
183 |
> + |
184 |
> + @param src: path of source file |
185 |
> + @type src: str |
186 |
> + @param dst: path of destination file |
187 |
> + @type dst: str |
188 |
> + """ |
189 |
> + global _copyfile |
190 |
> + |
191 |
> + if _copyfile is None: |
192 |
> + if _file_copy is None: |
193 |
> + _copyfile = shutil.copyfile |
194 |
> + elif _test_optimized_copyfile(): |
195 |
> + _copyfile = _optimized_copyfile |
196 |
> + else: |
197 |
> + _copyfile = shutil.copyfile |
198 |
|
199 |
This logic looks a bit around. Wouldn't it be simpler if |
200 |
_test_optimized_copyfile() just returned False is _file_copy is None? |
201 |
|
202 |
> + |
203 |
> + _copyfile(src, dst) |
204 |
> diff --git a/pym/portage/util/movefile.py b/pym/portage/util/movefile.py |
205 |
> index 4be1c3b..88b35d3 100644 |
206 |
> --- a/pym/portage/util/movefile.py |
207 |
> +++ b/pym/portage/util/movefile.py |
208 |
> @@ -23,6 +23,8 @@ from portage.localization import _ |
209 |
> from portage.process import spawn |
210 |
> from portage.util import writemsg |
211 |
> from portage.util._xattr import xattr |
212 |
> +from portage.util.file_copy import copyfile |
213 |
> + |
214 |
> |
215 |
> def _apply_stat(src_stat, dest): |
216 |
> _os.chown(dest, src_stat.st_uid, src_stat.st_gid) |
217 |
> @@ -114,7 +116,7 @@ def movefile(src, dest, newmtime=None, sstat=None, mysettings=None, |
218 |
> _copyfile = selinux.copyfile |
219 |
> _rename = selinux.rename |
220 |
> else: |
221 |
> - _copyfile = _shutil.copyfile |
222 |
> + _copyfile = copyfile |
223 |
> _rename = _os.rename |
224 |
> |
225 |
> lchown = _unicode_func_wrapper(portage.data.lchown, encoding=encoding) |
226 |
> diff --git a/setup.py b/setup.py |
227 |
> index a346bd4..b624767 100755 |
228 |
> --- a/setup.py |
229 |
> +++ b/setup.py |
230 |
> @@ -23,6 +23,7 @@ import collections |
231 |
> import glob |
232 |
> import os |
233 |
> import os.path |
234 |
> +import platform |
235 |
> import re |
236 |
> import subprocess |
237 |
> import sys |
238 |
> @@ -54,6 +55,14 @@ x_c_helpers = { |
239 |
> ], |
240 |
> } |
241 |
> |
242 |
> +if platform.system() == 'Linux': |
243 |
> + x_c_helpers.update({ |
244 |
> + 'portage.util.file_copy.reflink_linux': [ |
245 |
> + 'src/portage_util_file_copy_reflink_linux.c', |
246 |
> + ], |
247 |
> + }) |
248 |
> + |
249 |
> + |
250 |
> class x_build(build): |
251 |
> """ Build command with extra build_man call. """ |
252 |
> |
253 |
> diff --git a/src/portage_util_file_copy_reflink_linux.c b/src/portage_util_file_copy_reflink_linux.c |
254 |
> new file mode 100644 |
255 |
> index 0000000..7139c7a |
256 |
> --- /dev/null |
257 |
> +++ b/src/portage_util_file_copy_reflink_linux.c |
258 |
> @@ -0,0 +1,225 @@ |
259 |
> +/* Copyright 2017 Gentoo Foundation |
260 |
> + * Distributed under the terms of the GNU General Public License v2 |
261 |
> + */ |
262 |
> + |
263 |
> +#include <Python.h> |
264 |
> +#include <errno.h> |
265 |
> +#include <stdlib.h> |
266 |
> +#include <ctype.h> |
267 |
> +#include <sys/sendfile.h> |
268 |
> +#include <sys/stat.h> |
269 |
> +#include <sys/syscall.h> |
270 |
> +#include <sys/types.h> |
271 |
> +#include <unistd.h> |
272 |
> + |
273 |
> +static PyObject * _reflink_linux_file_copy(PyObject *, PyObject *); |
274 |
> + |
275 |
> +static PyMethodDef reflink_linuxMethods[] = { |
276 |
> + { |
277 |
> + "file_copy", |
278 |
> + _reflink_linux_file_copy, |
279 |
> + METH_VARARGS, |
280 |
> + "Copy between two file descriptors, " |
281 |
> + "with reflink and sparse file support." |
282 |
> + }, |
283 |
> + {NULL, NULL, 0, NULL} |
284 |
> +}; |
285 |
> + |
286 |
> +#if PY_MAJOR_VERSION >= 3 |
287 |
> +static struct PyModuleDef moduledef = { |
288 |
> + PyModuleDef_HEAD_INIT, |
289 |
> + "reflink_linux", /* m_name */ |
290 |
> + "Module for reflink_linux copy operations", /* m_doc */ |
291 |
> + -1, /* m_size */ |
292 |
> + reflink_linuxMethods, /* m_methods */ |
293 |
> + NULL, /* m_reload */ |
294 |
> + NULL, /* m_traverse */ |
295 |
> + NULL, /* m_clear */ |
296 |
> + NULL, /* m_free */ |
297 |
> +}; |
298 |
> + |
299 |
> +PyMODINIT_FUNC |
300 |
> +PyInit_reflink_linux(void) |
301 |
> +{ |
302 |
> + PyObject *m; |
303 |
> + m = PyModule_Create(&moduledef); |
304 |
> + return m; |
305 |
> +} |
306 |
> +#else |
307 |
> +PyMODINIT_FUNC |
308 |
> +initreflink_linux(void) |
309 |
> +{ |
310 |
> + Py_InitModule("reflink_linux", reflink_linuxMethods); |
311 |
> +} |
312 |
> +#endif |
313 |
> + |
314 |
> + |
315 |
> +static ssize_t |
316 |
> +cfr_wrapper(int fd_out, int fd_in, loff_t *off_out, size_t len) |
317 |
> +{ |
318 |
> +#ifdef __NR_copy_file_range |
319 |
> + return syscall(__NR_copy_file_range, fd_in, NULL, fd_out, |
320 |
> + off_out, len, 0); |
321 |
> +#else |
322 |
> + errno = ENOSYS; |
323 |
> + return -1; |
324 |
> +#endif |
325 |
> +} |
326 |
> + |
327 |
> + |
328 |
> +static PyObject * |
329 |
> +_reflink_linux_file_copy(PyObject *self, PyObject *args) |
330 |
> +{ |
331 |
> + int eintr_retry, error, fd_in, fd_out; |
332 |
> + off_t offset_out; |
333 |
> + ssize_t copyfunc_ret; |
334 |
> +#ifdef SEEK_DATA |
335 |
> + /* Linux 3.1 and later support SEEK_DATA (for sparse file support). |
336 |
> + * This code uses copy_file_range if possible, and falls back to |
337 |
> + * sendfile for cross-device or when the copy_file_range syscall |
338 |
> + * is not available (less than Linux 4.5). This will fail for |
339 |
> + * Linux less than 3.1, which does not support the lseek SEEK_DATA |
340 |
> + * parameter. The caller should perform a runtime test to verify |
341 |
> + * that this function works with the running kernel. |
342 |
> + */ |
343 |
> + off_t offset_data, offset_hole; |
344 |
> + ssize_t (*copyfunc)(int, int, loff_t *, size_t); |
345 |
> + |
346 |
> + if (!PyArg_ParseTuple(args, "ii", &fd_in, &fd_out)) |
347 |
> + return NULL; |
348 |
> + |
349 |
> + eintr_retry = 1; |
350 |
> + offset_out = 0; |
351 |
> + offset_data = 0; |
352 |
> + copyfunc = cfr_wrapper; |
353 |
> + |
354 |
> + while (eintr_retry) { |
355 |
> + |
356 |
> + error = 0; |
357 |
> + |
358 |
> + Py_BEGIN_ALLOW_THREADS |
359 |
> + |
360 |
> + while (1) { |
361 |
> + /* Use lseek SEEK_DATA/SEEK_HOLE for sparse file support, |
362 |
> + * as suggested in the copy_file_range man page. |
363 |
> + */ |
364 |
> + offset_data = lseek(fd_in, offset_out, SEEK_DATA); |
365 |
> + if (offset_data < 0) { |
366 |
> + if (errno == ENXIO) { |
367 |
> + /* EOF */ |
368 |
> + break; |
369 |
> + } |
370 |
> + error = 1; |
371 |
> + break; |
372 |
> + } |
373 |
> + |
374 |
> + /* Create sparse empty blocks in the output file, up |
375 |
> + * until the next location that will contain data. |
376 |
> + */ |
377 |
> + if (offset_data > offset_out) { |
378 |
> + offset_out = lseek(fd_out, offset_data, SEEK_SET); |
379 |
> + if (offset_out < 0) { |
380 |
> + error = 1; |
381 |
> + break; |
382 |
> + } |
383 |
> + } |
384 |
> + |
385 |
> + /* Locate the next hole, so that we know when to |
386 |
> + * stop copying. There is an implicit hole at the |
387 |
> + * end of the file. |
388 |
> + */ |
389 |
> + offset_hole = lseek(fd_in, offset_data, SEEK_HOLE); |
390 |
> + if (offset_hole < 0) { |
391 |
> + error = 1; |
392 |
> + break; |
393 |
> + } |
394 |
> + |
395 |
> + /* Revert SEEK_HOLE offset change, since we're going |
396 |
> + * to copy the data that comes before the hole. |
397 |
> + */ |
398 |
> + if (lseek(fd_in, offset_out, SEEK_SET) < 0) { |
399 |
> + error = 1; |
400 |
> + break; |
401 |
> + } |
402 |
> + |
403 |
> + copyfunc_ret = copyfunc(fd_out, |
404 |
> + fd_in, |
405 |
> + &offset_out, |
406 |
> + offset_hole - offset_data); |
407 |
> + |
408 |
> + if (copyfunc_ret < 0) { |
409 |
> + if ((errno == EXDEV || errno == ENOSYS) && |
410 |
> + copyfunc == cfr_wrapper) { |
411 |
> + /* Use sendfile instead of copy_file_range for |
412 |
> + * cross-device copies, or when the copy_file_range |
413 |
> + * syscall is not available (less than Linux 4.5). |
414 |
> + */ |
415 |
> + copyfunc = sendfile; |
416 |
> + copyfunc_ret = copyfunc(fd_out, |
417 |
> + fd_in, |
418 |
> + &offset_out, |
419 |
> + offset_hole - offset_data); |
420 |
> + |
421 |
> + if (copyfunc_ret < 0) { |
422 |
|
423 |
I think you still should have a proper fallback for sendfile() refusing |
424 |
to do its job. |
425 |
|
426 |
> + error = 1; |
427 |
> + break; |
428 |
> + } |
429 |
> + } |
430 |
> + else { |
431 |
> + error = 1; |
432 |
> + break; |
433 |
> + } |
434 |
> + } |
435 |
> + } |
436 |
> +#else |
437 |
> + /* Less than Linux 3.1 does not support SEEK_DATA or copy_file_range, |
438 |
> + * so just use sendfile for in-kernel copy. This will fail for Linux |
439 |
> + * versions from 2.6.0 to 2.6.32, because sendfile does not support |
440 |
> + * writing to regular files. The caller should perform a runtime |
441 |
> + * test to verify that this function works with the running kernel. |
442 |
> + */ |
443 |
> + struct stat sb; |
444 |
> + int stat_acquired; |
445 |
> + |
446 |
> + if (!PyArg_ParseTuple(args, "ii", &fd_in, &fd_out)) |
447 |
> + return NULL; |
448 |
> + |
449 |
> + eintr_retry = 1; |
450 |
> + offset_out = 0; |
451 |
> + stat_acquired = 0; |
452 |
> + |
453 |
> + while (eintr_retry) { |
454 |
> + |
455 |
> + error = 0; |
456 |
> + |
457 |
> + Py_BEGIN_ALLOW_THREADS |
458 |
> + |
459 |
> + if (!stat_acquired && fstat(fd_in, &sb) < 0) { |
460 |
> + error = 1; |
461 |
> + } |
462 |
> + else { |
463 |
> + stat_acquired = 1; |
464 |
> + while (offset_out < sb.st_size) { |
465 |
> + copyfunc_ret = sendfile(fd_out, |
466 |
> + fd_in, |
467 |
> + &offset_out, |
468 |
> + sb.st_size - offset_out); |
469 |
> + |
470 |
> + if (copyfunc_ret < 0) { |
471 |
|
472 |
Likewise, especially that old kernels may refuse file-to-file copy here. |
473 |
|
474 |
> + error = 1; |
475 |
> + break; |
476 |
> + } |
477 |
> + } |
478 |
> + } |
479 |
> +#endif |
480 |
> + Py_END_ALLOW_THREADS |
481 |
> + |
482 |
> + if (!(error && errno == EINTR && PyErr_CheckSignals() == 0)) |
483 |
> + eintr_retry = 0; |
484 |
> + } |
485 |
> + |
486 |
> + if (error) |
487 |
> + return PyErr_SetFromErrno(PyExc_OSError); |
488 |
> + |
489 |
> + return Py_BuildValue("i", offset_out); |
490 |
> +} |
491 |
|
492 |
-- |
493 |
Best regards, |
494 |
Michał Górny |