1 |
Perform in-kernel file copying when possible, and also support |
2 |
reflinks and sparse files. If the optimized implementation |
3 |
fails at runtime, gracefully fallback to shutil.copyfile. |
4 |
|
5 |
Compile-time and run-time fallbacks are implemented, so that |
6 |
any incompatiblities will be handled gracefully. For example, |
7 |
if the code is compiled on a system that supports the |
8 |
copy_file_range syscall, but at run-time an older kernel that |
9 |
does not support this syscall is detected, it will be handled |
10 |
gracefully. |
11 |
|
12 |
X-Gentoo-Bug: 607868 |
13 |
X-Gentoo-Bug-Url: https://bugs.gentoo.org/show_bug.cgi?id=607868 |
14 |
--- |
15 |
pym/portage/tests/util/file_copy/__init__.py | 0 |
16 |
pym/portage/tests/util/file_copy/__test__.py | 0 |
17 |
pym/portage/tests/util/file_copy/test_copyfile.py | 68 +++++++ |
18 |
pym/portage/util/file_copy/__init__.py | 78 ++++++++ |
19 |
pym/portage/util/movefile.py | 4 +- |
20 |
setup.py | 9 + |
21 |
src/portage_util_file_copy_reflink_linux.c | 225 ++++++++++++++++++++++ |
22 |
7 files changed, 383 insertions(+), 1 deletion(-) |
23 |
create mode 100644 pym/portage/tests/util/file_copy/__init__.py |
24 |
create mode 100644 pym/portage/tests/util/file_copy/__test__.py |
25 |
create mode 100644 pym/portage/tests/util/file_copy/test_copyfile.py |
26 |
create mode 100644 pym/portage/util/file_copy/__init__.py |
27 |
create mode 100644 src/portage_util_file_copy_reflink_linux.c |
28 |
|
29 |
diff --git a/pym/portage/tests/util/file_copy/__init__.py b/pym/portage/tests/util/file_copy/__init__.py |
30 |
new file mode 100644 |
31 |
index 0000000..e69de29 |
32 |
diff --git a/pym/portage/tests/util/file_copy/__test__.py b/pym/portage/tests/util/file_copy/__test__.py |
33 |
new file mode 100644 |
34 |
index 0000000..e69de29 |
35 |
diff --git a/pym/portage/tests/util/file_copy/test_copyfile.py b/pym/portage/tests/util/file_copy/test_copyfile.py |
36 |
new file mode 100644 |
37 |
index 0000000..987a701 |
38 |
--- /dev/null |
39 |
+++ b/pym/portage/tests/util/file_copy/test_copyfile.py |
40 |
@@ -0,0 +1,68 @@ |
41 |
+# Copyright 2017 Gentoo Foundation |
42 |
+# Distributed under the terms of the GNU General Public License v2 |
43 |
+ |
44 |
+import shutil |
45 |
+import tempfile |
46 |
+ |
47 |
+from portage import os |
48 |
+from portage.tests import TestCase |
49 |
+from portage.checksum import perform_md5 |
50 |
+from portage.util.file_copy import copyfile |
51 |
+ |
52 |
+ |
53 |
+class CopyFileTestCase(TestCase): |
54 |
+ |
55 |
+ def testCopyFile(self): |
56 |
+ |
57 |
+ tempdir = tempfile.mkdtemp() |
58 |
+ try: |
59 |
+ src_path = os.path.join(tempdir, 'src') |
60 |
+ dest_path = os.path.join(tempdir, 'dest') |
61 |
+ content = b'foo' |
62 |
+ |
63 |
+ with open(src_path, 'wb') as f: |
64 |
+ f.write(content) |
65 |
+ |
66 |
+ copyfile(src_path, dest_path) |
67 |
+ |
68 |
+ self.assertEqual(perform_md5(src_path), perform_md5(dest_path)) |
69 |
+ finally: |
70 |
+ shutil.rmtree(tempdir) |
71 |
+ |
72 |
+ |
73 |
+class CopyFileSparseTestCase(TestCase): |
74 |
+ |
75 |
+ def testCopyFileSparse(self): |
76 |
+ |
77 |
+ # This test is expected to fail on platforms where we have |
78 |
+ # not implemented sparse copy, so set the todo flag in order |
79 |
+ # to tolerate failures. |
80 |
+ self.todo = True |
81 |
+ |
82 |
+ tempdir = tempfile.mkdtemp() |
83 |
+ try: |
84 |
+ src_path = os.path.join(tempdir, 'src') |
85 |
+ dest_path = os.path.join(tempdir, 'dest') |
86 |
+ content = b'foo' |
87 |
+ |
88 |
+ # Use seek to create some sparse blocks. Don't make these |
89 |
+ # files too big, in case the filesystem doesn't support |
90 |
+ # sparse files. |
91 |
+ with open(src_path, 'wb') as f: |
92 |
+ f.write(content) |
93 |
+ f.seek(2**18, 1) |
94 |
+ f.write(content) |
95 |
+ f.seek(2**19, 1) |
96 |
+ f.write(content) |
97 |
+ |
98 |
+ copyfile(src_path, dest_path) |
99 |
+ |
100 |
+ self.assertEqual(perform_md5(src_path), perform_md5(dest_path)) |
101 |
+ |
102 |
+ # If sparse blocks were preserved, then both files should |
103 |
+ # consume the same number of blocks. |
104 |
+ self.assertEqual( |
105 |
+ os.stat(src_path).st_blocks, |
106 |
+ os.stat(dest_path).st_blocks) |
107 |
+ finally: |
108 |
+ shutil.rmtree(tempdir) |
109 |
diff --git a/pym/portage/util/file_copy/__init__.py b/pym/portage/util/file_copy/__init__.py |
110 |
new file mode 100644 |
111 |
index 0000000..5c7aff1 |
112 |
--- /dev/null |
113 |
+++ b/pym/portage/util/file_copy/__init__.py |
114 |
@@ -0,0 +1,78 @@ |
115 |
+# Copyright 2017 Gentoo Foundation |
116 |
+# Distributed under the terms of the GNU General Public License v2 |
117 |
+ |
118 |
+import os |
119 |
+import shutil |
120 |
+import tempfile |
121 |
+ |
122 |
+try: |
123 |
+ from portage.util.file_copy.reflink_linux import file_copy as _file_copy |
124 |
+except ImportError: |
125 |
+ _file_copy = None |
126 |
+ |
127 |
+ |
128 |
+_copyfile = None |
129 |
+ |
130 |
+ |
131 |
+def _optimized_copyfile(src, dst): |
132 |
+ with open(src, 'rb', buffering=0) as src_file, \ |
133 |
+ open(dst, 'wb', buffering=0) as dst_file: |
134 |
+ _file_copy(src_file.fileno(), dst_file.fileno()) |
135 |
+ |
136 |
+ |
137 |
+def _test_optimized_copyfile(): |
138 |
+ """ |
139 |
+ Test if _optimized_copyfile works. It will fail for Linux versions |
140 |
+ from 2.6.0 to 2.6.32, because sendfile does not support writing |
141 |
+ to regular files. It will also fail for Linux versions less than |
142 |
+ 3.1 if portage was compiled 3.1 or later, due to missing support |
143 |
+ for lseek SEEK_DATA/SEEK_HOLE. |
144 |
+ """ |
145 |
+ tempdir = tempfile.mkdtemp() |
146 |
+ try: |
147 |
+ src_path = os.path.join(tempdir, 'src') |
148 |
+ dest_path = os.path.join(tempdir, 'dest') |
149 |
+ content = b'foo' |
150 |
+ |
151 |
+ with open(src_path, 'wb') as f: |
152 |
+ f.write(content) |
153 |
+ |
154 |
+ try: |
155 |
+ _optimized_copyfile(src_path, dest_path) |
156 |
+ except Exception: |
157 |
+ return False |
158 |
+ |
159 |
+ with open(dest_path, 'rb') as dest_file: |
160 |
+ if content != dest_file.read(): |
161 |
+ return False |
162 |
+ finally: |
163 |
+ shutil.rmtree(tempdir) |
164 |
+ |
165 |
+ return True |
166 |
+ |
167 |
+ |
168 |
+def copyfile(src, dst): |
169 |
+ """ |
170 |
+ Copy the contents (no metadata) of the file named src to a file |
171 |
+ named dst. |
172 |
+ |
173 |
+ If possible, copying is done within the kernel, and uses |
174 |
+ "copy acceleration" techniques (such as reflinks). This also |
175 |
+ supports sparse files. |
176 |
+ |
177 |
+ @param src: path of source file |
178 |
+ @type src: str |
179 |
+ @param dst: path of destination file |
180 |
+ @type dst: str |
181 |
+ """ |
182 |
+ global _copyfile |
183 |
+ |
184 |
+ if _copyfile is None: |
185 |
+ if _file_copy is None: |
186 |
+ _copyfile = shutil.copyfile |
187 |
+ elif _test_optimized_copyfile(): |
188 |
+ _copyfile = _optimized_copyfile |
189 |
+ else: |
190 |
+ _copyfile = shutil.copyfile |
191 |
+ |
192 |
+ _copyfile(src, dst) |
193 |
diff --git a/pym/portage/util/movefile.py b/pym/portage/util/movefile.py |
194 |
index 4be1c3b..88b35d3 100644 |
195 |
--- a/pym/portage/util/movefile.py |
196 |
+++ b/pym/portage/util/movefile.py |
197 |
@@ -23,6 +23,8 @@ from portage.localization import _ |
198 |
from portage.process import spawn |
199 |
from portage.util import writemsg |
200 |
from portage.util._xattr import xattr |
201 |
+from portage.util.file_copy import copyfile |
202 |
+ |
203 |
|
204 |
def _apply_stat(src_stat, dest): |
205 |
_os.chown(dest, src_stat.st_uid, src_stat.st_gid) |
206 |
@@ -114,7 +116,7 @@ def movefile(src, dest, newmtime=None, sstat=None, mysettings=None, |
207 |
_copyfile = selinux.copyfile |
208 |
_rename = selinux.rename |
209 |
else: |
210 |
- _copyfile = _shutil.copyfile |
211 |
+ _copyfile = copyfile |
212 |
_rename = _os.rename |
213 |
|
214 |
lchown = _unicode_func_wrapper(portage.data.lchown, encoding=encoding) |
215 |
diff --git a/setup.py b/setup.py |
216 |
index a346bd4..b624767 100755 |
217 |
--- a/setup.py |
218 |
+++ b/setup.py |
219 |
@@ -23,6 +23,7 @@ import collections |
220 |
import glob |
221 |
import os |
222 |
import os.path |
223 |
+import platform |
224 |
import re |
225 |
import subprocess |
226 |
import sys |
227 |
@@ -54,6 +55,14 @@ x_c_helpers = { |
228 |
], |
229 |
} |
230 |
|
231 |
+if platform.system() == 'Linux': |
232 |
+ x_c_helpers.update({ |
233 |
+ 'portage.util.file_copy.reflink_linux': [ |
234 |
+ 'src/portage_util_file_copy_reflink_linux.c', |
235 |
+ ], |
236 |
+ }) |
237 |
+ |
238 |
+ |
239 |
class x_build(build): |
240 |
""" Build command with extra build_man call. """ |
241 |
|
242 |
diff --git a/src/portage_util_file_copy_reflink_linux.c b/src/portage_util_file_copy_reflink_linux.c |
243 |
new file mode 100644 |
244 |
index 0000000..7139c7a |
245 |
--- /dev/null |
246 |
+++ b/src/portage_util_file_copy_reflink_linux.c |
247 |
@@ -0,0 +1,225 @@ |
248 |
+/* Copyright 2017 Gentoo Foundation |
249 |
+ * Distributed under the terms of the GNU General Public License v2 |
250 |
+ */ |
251 |
+ |
252 |
+#include <Python.h> |
253 |
+#include <errno.h> |
254 |
+#include <stdlib.h> |
255 |
+#include <ctype.h> |
256 |
+#include <sys/sendfile.h> |
257 |
+#include <sys/stat.h> |
258 |
+#include <sys/syscall.h> |
259 |
+#include <sys/types.h> |
260 |
+#include <unistd.h> |
261 |
+ |
262 |
+static PyObject * _reflink_linux_file_copy(PyObject *, PyObject *); |
263 |
+ |
264 |
+static PyMethodDef reflink_linuxMethods[] = { |
265 |
+ { |
266 |
+ "file_copy", |
267 |
+ _reflink_linux_file_copy, |
268 |
+ METH_VARARGS, |
269 |
+ "Copy between two file descriptors, " |
270 |
+ "with reflink and sparse file support." |
271 |
+ }, |
272 |
+ {NULL, NULL, 0, NULL} |
273 |
+}; |
274 |
+ |
275 |
+#if PY_MAJOR_VERSION >= 3 |
276 |
+static struct PyModuleDef moduledef = { |
277 |
+ PyModuleDef_HEAD_INIT, |
278 |
+ "reflink_linux", /* m_name */ |
279 |
+ "Module for reflink_linux copy operations", /* m_doc */ |
280 |
+ -1, /* m_size */ |
281 |
+ reflink_linuxMethods, /* m_methods */ |
282 |
+ NULL, /* m_reload */ |
283 |
+ NULL, /* m_traverse */ |
284 |
+ NULL, /* m_clear */ |
285 |
+ NULL, /* m_free */ |
286 |
+}; |
287 |
+ |
288 |
+PyMODINIT_FUNC |
289 |
+PyInit_reflink_linux(void) |
290 |
+{ |
291 |
+ PyObject *m; |
292 |
+ m = PyModule_Create(&moduledef); |
293 |
+ return m; |
294 |
+} |
295 |
+#else |
296 |
+PyMODINIT_FUNC |
297 |
+initreflink_linux(void) |
298 |
+{ |
299 |
+ Py_InitModule("reflink_linux", reflink_linuxMethods); |
300 |
+} |
301 |
+#endif |
302 |
+ |
303 |
+ |
304 |
+static ssize_t |
305 |
+cfr_wrapper(int fd_out, int fd_in, loff_t *off_out, size_t len) |
306 |
+{ |
307 |
+#ifdef __NR_copy_file_range |
308 |
+ return syscall(__NR_copy_file_range, fd_in, NULL, fd_out, |
309 |
+ off_out, len, 0); |
310 |
+#else |
311 |
+ errno = ENOSYS; |
312 |
+ return -1; |
313 |
+#endif |
314 |
+} |
315 |
+ |
316 |
+ |
317 |
+static PyObject * |
318 |
+_reflink_linux_file_copy(PyObject *self, PyObject *args) |
319 |
+{ |
320 |
+ int eintr_retry, error, fd_in, fd_out; |
321 |
+ off_t offset_out; |
322 |
+ ssize_t copyfunc_ret; |
323 |
+#ifdef SEEK_DATA |
324 |
+ /* Linux 3.1 and later support SEEK_DATA (for sparse file support). |
325 |
+ * This code uses copy_file_range if possible, and falls back to |
326 |
+ * sendfile for cross-device or when the copy_file_range syscall |
327 |
+ * is not available (less than Linux 4.5). This will fail for |
328 |
+ * Linux less than 3.1, which does not support the lseek SEEK_DATA |
329 |
+ * parameter. The caller should perform a runtime test to verify |
330 |
+ * that this function works with the running kernel. |
331 |
+ */ |
332 |
+ off_t offset_data, offset_hole; |
333 |
+ ssize_t (*copyfunc)(int, int, loff_t *, size_t); |
334 |
+ |
335 |
+ if (!PyArg_ParseTuple(args, "ii", &fd_in, &fd_out)) |
336 |
+ return NULL; |
337 |
+ |
338 |
+ eintr_retry = 1; |
339 |
+ offset_out = 0; |
340 |
+ offset_data = 0; |
341 |
+ copyfunc = cfr_wrapper; |
342 |
+ |
343 |
+ while (eintr_retry) { |
344 |
+ |
345 |
+ error = 0; |
346 |
+ |
347 |
+ Py_BEGIN_ALLOW_THREADS |
348 |
+ |
349 |
+ while (1) { |
350 |
+ /* Use lseek SEEK_DATA/SEEK_HOLE for sparse file support, |
351 |
+ * as suggested in the copy_file_range man page. |
352 |
+ */ |
353 |
+ offset_data = lseek(fd_in, offset_out, SEEK_DATA); |
354 |
+ if (offset_data < 0) { |
355 |
+ if (errno == ENXIO) { |
356 |
+ /* EOF */ |
357 |
+ break; |
358 |
+ } |
359 |
+ error = 1; |
360 |
+ break; |
361 |
+ } |
362 |
+ |
363 |
+ /* Create sparse empty blocks in the output file, up |
364 |
+ * until the next location that will contain data. |
365 |
+ */ |
366 |
+ if (offset_data > offset_out) { |
367 |
+ offset_out = lseek(fd_out, offset_data, SEEK_SET); |
368 |
+ if (offset_out < 0) { |
369 |
+ error = 1; |
370 |
+ break; |
371 |
+ } |
372 |
+ } |
373 |
+ |
374 |
+ /* Locate the next hole, so that we know when to |
375 |
+ * stop copying. There is an implicit hole at the |
376 |
+ * end of the file. |
377 |
+ */ |
378 |
+ offset_hole = lseek(fd_in, offset_data, SEEK_HOLE); |
379 |
+ if (offset_hole < 0) { |
380 |
+ error = 1; |
381 |
+ break; |
382 |
+ } |
383 |
+ |
384 |
+ /* Revert SEEK_HOLE offset change, since we're going |
385 |
+ * to copy the data that comes before the hole. |
386 |
+ */ |
387 |
+ if (lseek(fd_in, offset_out, SEEK_SET) < 0) { |
388 |
+ error = 1; |
389 |
+ break; |
390 |
+ } |
391 |
+ |
392 |
+ copyfunc_ret = copyfunc(fd_out, |
393 |
+ fd_in, |
394 |
+ &offset_out, |
395 |
+ offset_hole - offset_data); |
396 |
+ |
397 |
+ if (copyfunc_ret < 0) { |
398 |
+ if ((errno == EXDEV || errno == ENOSYS) && |
399 |
+ copyfunc == cfr_wrapper) { |
400 |
+ /* Use sendfile instead of copy_file_range for |
401 |
+ * cross-device copies, or when the copy_file_range |
402 |
+ * syscall is not available (less than Linux 4.5). |
403 |
+ */ |
404 |
+ copyfunc = sendfile; |
405 |
+ copyfunc_ret = copyfunc(fd_out, |
406 |
+ fd_in, |
407 |
+ &offset_out, |
408 |
+ offset_hole - offset_data); |
409 |
+ |
410 |
+ if (copyfunc_ret < 0) { |
411 |
+ error = 1; |
412 |
+ break; |
413 |
+ } |
414 |
+ } |
415 |
+ else { |
416 |
+ error = 1; |
417 |
+ break; |
418 |
+ } |
419 |
+ } |
420 |
+ } |
421 |
+#else |
422 |
+ /* Less than Linux 3.1 does not support SEEK_DATA or copy_file_range, |
423 |
+ * so just use sendfile for in-kernel copy. This will fail for Linux |
424 |
+ * versions from 2.6.0 to 2.6.32, because sendfile does not support |
425 |
+ * writing to regular files. The caller should perform a runtime |
426 |
+ * test to verify that this function works with the running kernel. |
427 |
+ */ |
428 |
+ struct stat sb; |
429 |
+ int stat_acquired; |
430 |
+ |
431 |
+ if (!PyArg_ParseTuple(args, "ii", &fd_in, &fd_out)) |
432 |
+ return NULL; |
433 |
+ |
434 |
+ eintr_retry = 1; |
435 |
+ offset_out = 0; |
436 |
+ stat_acquired = 0; |
437 |
+ |
438 |
+ while (eintr_retry) { |
439 |
+ |
440 |
+ error = 0; |
441 |
+ |
442 |
+ Py_BEGIN_ALLOW_THREADS |
443 |
+ |
444 |
+ if (!stat_acquired && fstat(fd_in, &sb) < 0) { |
445 |
+ error = 1; |
446 |
+ } |
447 |
+ else { |
448 |
+ stat_acquired = 1; |
449 |
+ while (offset_out < sb.st_size) { |
450 |
+ copyfunc_ret = sendfile(fd_out, |
451 |
+ fd_in, |
452 |
+ &offset_out, |
453 |
+ sb.st_size - offset_out); |
454 |
+ |
455 |
+ if (copyfunc_ret < 0) { |
456 |
+ error = 1; |
457 |
+ break; |
458 |
+ } |
459 |
+ } |
460 |
+ } |
461 |
+#endif |
462 |
+ Py_END_ALLOW_THREADS |
463 |
+ |
464 |
+ if (!(error && errno == EINTR && PyErr_CheckSignals() == 0)) |
465 |
+ eintr_retry = 0; |
466 |
+ } |
467 |
+ |
468 |
+ if (error) |
469 |
+ return PyErr_SetFromErrno(PyExc_OSError); |
470 |
+ |
471 |
+ return Py_BuildValue("i", offset_out); |
472 |
+} |
473 |
-- |
474 |
2.10.2 |