1 |
Author: zmedico |
2 |
Date: 2008-11-26 22:31:23 +0000 (Wed, 26 Nov 2008) |
3 |
New Revision: 12109 |
4 |
|
5 |
Modified: |
6 |
main/trunk/pym/portage/__init__.py |
7 |
main/trunk/pym/portage/dbapi/vartree.py |
8 |
Log: |
9 |
Bug #235642 - Create hardlinks when merging identical files. This works by using a |
10 |
tuple of (md5, st_size) as a key to a list of hardlink candidates. Multiple candidates |
11 |
are used in case some happen to be merged to separate devices. |
12 |
|
13 |
|
14 |
Modified: main/trunk/pym/portage/__init__.py |
15 |
=================================================================== |
16 |
--- main/trunk/pym/portage/__init__.py 2008-11-26 20:30:22 UTC (rev 12108) |
17 |
+++ main/trunk/pym/portage/__init__.py 2008-11-26 22:31:23 UTC (rev 12109) |
18 |
@@ -5968,7 +5968,8 @@ |
19 |
raise portage.exception.PortageException( |
20 |
"mv '%s' '%s'" % (src, dest)) |
21 |
|
22 |
-def movefile(src,dest,newmtime=None,sstat=None,mysettings=None): |
23 |
+def movefile(src, dest, newmtime=None, sstat=None, mysettings=None, |
24 |
+ hardlink_candidates=None): |
25 |
"""moves a file from src to dest, preserving all permissions and attributes; mtime will |
26 |
be preserved even when moving across filesystems. Returns true on success and false on |
27 |
failure. Move is atomic.""" |
28 |
@@ -6040,8 +6041,45 @@ |
29 |
print "!!!",e |
30 |
return None |
31 |
|
32 |
+ hardlinked = False |
33 |
+ # Since identical files might be merged to multiple filesystems, |
34 |
+ # so os.link() calls might fail for some paths, so try them all. |
35 |
+ # For atomic replacement, first create the link as a temp file |
36 |
+ # and them use os.rename() to replace the destination. |
37 |
+ if hardlink_candidates is not None: |
38 |
+ head, tail = os.path.split(dest) |
39 |
+ hardlink_tmp = os.path.join(head, ".%s._portage_merge_.%s" % \ |
40 |
+ (tail, os.getpid())) |
41 |
+ try: |
42 |
+ os.unlink(hardlink_tmp) |
43 |
+ except OSError, e: |
44 |
+ if e.errno != errno.ENOENT: |
45 |
+ writemsg("!!! Failed to remove hardlink temp file: %s\n" % \ |
46 |
+ (hardlink_tmp,), noiselevel=-1) |
47 |
+ writemsg("!!! %s\n" % (e,), noiselevel=-1) |
48 |
+ return None |
49 |
+ del e |
50 |
+ for hardlink_src in hardlink_candidates: |
51 |
+ try: |
52 |
+ os.link(hardlink_src, hardlink_tmp) |
53 |
+ except OSError: |
54 |
+ continue |
55 |
+ else: |
56 |
+ try: |
57 |
+ os.rename(hardlink_tmp, dest) |
58 |
+ except OSError, e: |
59 |
+ writemsg("!!! Failed to rename %s to %s\n" % \ |
60 |
+ (hardlink_tmp, dest), noiselevel=-1) |
61 |
+ writemsg("!!! %s\n" % (e,), noiselevel=-1) |
62 |
+ return None |
63 |
+ hardlinked = True |
64 |
+ break |
65 |
+ |
66 |
renamefailed=1 |
67 |
- if sstat[stat.ST_DEV]==dstat[stat.ST_DEV] or selinux_enabled: |
68 |
+ if hardlinked: |
69 |
+ renamefailed = False |
70 |
+ if not hardlinked and \ |
71 |
+ (selinux_enabled or sstat[stat.ST_DEV] == dstat[stat.ST_DEV]): |
72 |
try: |
73 |
if selinux_enabled: |
74 |
ret=selinux.secure_rename(src,dest) |
75 |
@@ -6102,11 +6140,14 @@ |
76 |
return None |
77 |
|
78 |
try: |
79 |
- if newmtime is not None: |
80 |
- os.utime(dest, (newmtime, newmtime)) |
81 |
+ if hardlinked: |
82 |
+ newmtime = long(os.stat(dest).st_mtime) |
83 |
else: |
84 |
- os.utime(dest, (sstat.st_atime, sstat.st_mtime)) |
85 |
- newmtime = long(sstat.st_mtime) |
86 |
+ if newmtime is not None: |
87 |
+ os.utime(dest, (newmtime, newmtime)) |
88 |
+ else: |
89 |
+ os.utime(dest, (sstat.st_atime, sstat.st_mtime)) |
90 |
+ newmtime = long(sstat.st_mtime) |
91 |
except OSError: |
92 |
# The utime can fail here with EPERM even though the move succeeded. |
93 |
# Instead of failing, use stat to return the mtime if possible. |
94 |
|
95 |
Modified: main/trunk/pym/portage/dbapi/vartree.py |
96 |
=================================================================== |
97 |
--- main/trunk/pym/portage/dbapi/vartree.py 2008-11-26 20:30:22 UTC (rev 12108) |
98 |
+++ main/trunk/pym/portage/dbapi/vartree.py 2008-11-26 22:31:23 UTC (rev 12109) |
99 |
@@ -1756,6 +1756,7 @@ |
100 |
self._contents_inodes = None |
101 |
self._contents_basenames = None |
102 |
self._linkmap_broken = False |
103 |
+ self._md5_merge_map = {} |
104 |
|
105 |
def lockdb(self): |
106 |
if self._lock_vdb: |
107 |
@@ -3366,6 +3367,7 @@ |
108 |
if self.mergeme(srcroot, destroot, outfile, None, |
109 |
secondhand, cfgfiledict, mymtime): |
110 |
return 1 |
111 |
+ self._md5_merge_map.clear() |
112 |
|
113 |
#restore umask |
114 |
os.umask(prevmask) |
115 |
@@ -3767,9 +3769,17 @@ |
116 |
# whether config protection or not, we merge the new file the |
117 |
# same way. Unless moveme=0 (blocking directory) |
118 |
if moveme: |
119 |
- mymtime = movefile(mysrc, mydest, newmtime=thismtime, sstat=mystat, mysettings=self.settings) |
120 |
+ hardlink_key = (mymd5, mystat.st_size) |
121 |
+ hardlink_candidates = self._md5_merge_map.get(hardlink_key) |
122 |
+ if hardlink_candidates is None: |
123 |
+ hardlink_candidates = [] |
124 |
+ self._md5_merge_map[hardlink_key] = hardlink_candidates |
125 |
+ mymtime = movefile(mysrc, mydest, newmtime=thismtime, |
126 |
+ sstat=mystat, mysettings=self.settings, |
127 |
+ hardlink_candidates=hardlink_candidates) |
128 |
if mymtime is None: |
129 |
return 1 |
130 |
+ hardlink_candidates.append(mydest) |
131 |
zing = ">>>" |
132 |
|
133 |
if mymtime != None: |