1 |
On Wed, Nov 16, 2005 at 04:33:02AM +0100, Marius Mauch wrote: |
2 |
> On Wed, 16 Nov 2005 04:07:56 +0100 |
3 |
> Marius Mauch <genone@g.o> wrote: |
4 |
> |
5 |
> > Hey, |
6 |
> > |
7 |
> > IIRC we (=Gentoo as a whole) pretty much agreed to drop the digest |
8 |
> > files in favor of a extended Manifest format. Well, today I wrote some |
9 |
> > initial code for this new format, you can find the result in the |
10 |
> > attachment. It's far from complete, but most basic stuff seems to work |
11 |
> > with some restrictions: |
12 |
> > - `ebuild foo digest` works if you have all SRC_URI files for the |
13 |
> > package (= all ebuilds of that package), however it will not verify |
14 |
> > the Manifest before overwriting it. Also it currently won't create |
15 |
> > the old digests (digestgen is a bit nasty). |
16 |
> > - `emerge foo` should also work (thankfully the parsing could be |
17 |
> > implemented in a simple compat layer) |
18 |
> > - repoman will likely have problems (didn't touch it yet) |
19 |
> > - I didn't add al the fancy exception handling and cvs code (which IMO |
20 |
> > shouldn't be in portage itself but repoman) |
21 |
> > |
22 |
> > Opinions about adding Manifest2 support to the next "minor" version |
23 |
> > (2.2.0 or 2.0.54 or 2.1.0 or ...)? |
24 |
I'm game, assuming it's sane. |
25 |
|
26 |
> Index: pym/portage.py |
27 |
> =================================================================== |
28 |
> --- pym/portage.py (revision 2312) |
29 |
> +++ pym/portage.py (working copy) |
30 |
> @@ -95,6 +95,7 @@ |
31 |
> import portage_gpg |
32 |
> import portage_locks |
33 |
> import portage_exec |
34 |
> + import portage_manifest |
35 |
> from portage_locks import unlockfile,unlockdir,lockfile,lockdir |
36 |
> import portage_checksum |
37 |
> from portage_checksum import perform_md5,perform_checksum,prelink_capable |
38 |
> @@ -1723,9 +1724,9 @@ |
39 |
> mymirrors += [x] |
40 |
> |
41 |
> mydigests = {} |
42 |
> - digestfn = mysettings["FILESDIR"]+"/digest-"+mysettings["PF"] |
43 |
> + digestfn = portdb.finddigest(mysettings["CATEGORY"]+"/"+mysettings["PF"]) |
44 |
Like this mod personally, but don't like the '/'. You're passing in |
45 |
two chunks of data, pass them in seperate imo, rather then collapsing |
46 |
it for every call. |
47 |
|
48 |
> if os.path.exists(digestfn): |
49 |
> - mydigests = digestParseFile(digestfn) |
50 |
> + mydigests = digestParseFile(digestfn, calledFor="digest") |
51 |
|
52 |
> |
53 |
> fsmirrors = [] |
54 |
> for x in range(len(mymirrors)-1,-1,-1): |
55 |
> @@ -2136,8 +2137,10 @@ |
56 |
> # Track the old digest so we can assume checksums without requiring |
57 |
> # all files to be downloaded. 'Assuming' |
58 |
> myolddigest = {} |
59 |
> - if os.path.exists(digestfn): |
60 |
> - myolddigest = digestParseFile(digestfn) |
61 |
> + if portage_manifest.getManifestVersion(manifestfn) == 2: |
62 |
> + myolddigests = digestParseFile(manifestfn, calledFor="digest") |
63 |
> + elif os.path.exists(digestfn): |
64 |
> + myolddigest = digestParseFile(digestfn, calledFor="digest") |
65 |
|
66 |
Kind of disliking the 'calledFor' being thrown around here, plus the |
67 |
v2 check, then resorting to a seperate method of checking. |
68 |
Integrate 'em potentially? |
69 |
|
70 |
You're basically treating it as two seperate funcs (from how I look at |
71 |
it), should split it up (even if the seperated funcs call each other, |
72 |
or a common func). |
73 |
|
74 |
> @@ -2218,7 +2229,7 @@ |
75 |
> return 1 |
76 |
> |
77 |
> |
78 |
> -def digestParseFile(myfilename): |
79 |
> +def digestParseFile(myfilename, calledFor="manifest"): |
80 |
> """(filename) -- Parses a given file for entries matching: |
81 |
> MD5 MD5_STRING_OF_HEX_CHARS FILE_NAME FILE_SIZE |
82 |
> Ignores lines that do not begin with 'MD5' and returns a |
83 |
> @@ -2230,6 +2241,10 @@ |
84 |
> |
85 |
> mydigests={} |
86 |
> for x in mylines: |
87 |
> + tmp = portage_manifest.convertManifest2ToDigest(x, calledAs=calledFor) |
88 |
> + if tmp[0] == True: |
89 |
> + mydigests[tmp[1]] = tmp[2] |
90 |
> + continue |
91 |
> myline=string.split(x) |
92 |
> if len(myline) < 4: |
93 |
> #invalid line |
94 |
> @@ -2258,7 +2273,10 @@ |
95 |
> the basedir given. Returns 1 only if all files exist and match the md5s. |
96 |
> """ |
97 |
> for x in myfiles: |
98 |
> - if not mydigests.has_key(x): |
99 |
> + if portage_manfest.getManifestVersion(basedir+"/Manifest") == 2 and x.startswith("files/digest-"): |
100 |
hardcoding sucks, kthnx |
101 |
If possible, try to shove it into the func so modification (down the line) occurs |
102 |
in one spot, rather then having to resort to search/replace :) |
103 |
|
104 |
> Index: pym/portage_checksum.py |
105 |
> =================================================================== |
106 |
> --- pym/portage_checksum.py (revision 2312) |
107 |
> +++ pym/portage_checksum.py (working copy) |
108 |
> @@ -126,3 +126,12 @@ |
109 |
> portage_locks.unlockfile(mylock) |
110 |
> |
111 |
> return (myhash,mysize) |
112 |
> + |
113 |
> + |
114 |
> +hashfunc_map = {"MD5": md5hash, "SHA1": sha1hash} |
115 |
> + |
116 |
> +def perform_multiple_checksums(filename, hashes=["MD5"], calc_prelink=0): |
117 |
> + rVal = {} |
118 |
> + for x in hashes: |
119 |
> + rVal[x] = perform_checksum(filename, hashfunc_map[x], calc_prelink)[0] |
120 |
> + return rVal |
121 |
Not sure if I like the [0] tbh. |
122 |
Still think the size test should be broken off as it's own pseudo |
123 |
hashfunc, and implemented in that way. Course, would need to weight |
124 |
it somehow, so that size test is done first, then the more expensive |
125 |
chf tests... |
126 |
|
127 |
> Index: pym/portage_manifest.py |
128 |
> =================================================================== |
129 |
> --- pym/portage_manifest.py (revision 0) |
130 |
> +++ pym/portage_manifest.py (revision 0) |
131 |
> @@ -0,0 +1,163 @@ |
132 |
> +import os |
133 |
> + |
134 |
> +import portage, portage_exception, portage_checksum, portage_versions |
135 |
> + |
136 |
> +class Manifest2Entry: |
137 |
> + def __init__(self, entrytype, name, config, methods=portage_checksum.get_valid_checksum_keys()): |
138 |
The methods trick you're doing is a one time evaluation... not sure if |
139 |
that's the best approach tbh. |
140 |
|
141 |
> + if entrytype not in ["EBUILD", "AUXFILE", "MISCFILE", "SRCURI"]: |
142 |
> + raise portage_exception.PortageException("unknown Manifest entry type: %s" % entrytype) |
143 |
break that out into a class constant, and please kill off the capps |
144 |
(they kill kittens, or something). |
145 |
|
146 |
> + self.type = entrytype |
147 |
> + self.name = name |
148 |
> + self.methods = methods |
149 |
> + self.config = config |
150 |
I realize stable does this all over, but I really am not much for |
151 |
storing a config instance on this attribute. |
152 |
|
153 |
Thoughts on yanking the settings you need up front? |
154 |
|
155 |
> + def generate(self, pkgdir, genOldStyle=True): |
156 |
> + if self.type == "EBUILD" or self.type == "MISCFILE": |
157 |
> + filename = pkgdir+"/"+self.name |
158 |
> + elif self.type == "AUXFILE": |
159 |
> + filename = pkgdir+"/files/"+self.name |
160 |
> + elif self.type == "SRCURI": |
161 |
> + filename = self.config["DISTDIR"]+"/"+self.name |
162 |
> + if not os.path.exists(filename): |
163 |
> + raise portage_exception.PortageException("file for Manifest entry doesn't exist: %s" % filename) |
164 |
> + |
165 |
> + self.size = os.stat(filename).st_size |
166 |
> + self.checksums = portage_checksum.perform_multiple_checksums(filename, self.methods) |
167 |
See comment about making size into a pseudo chf func- would eliminate |
168 |
this case. |
169 |
Chunking this up into type handlers might be wise- would make it |
170 |
easier to extend (modify a class dict rather then N funcs). You've |
171 |
already got the seperated beast, just throw it into a mapping and |
172 |
it'll be nice and pretty. |
173 |
|
174 |
> + cs_list = [x+" "+self.checksums[x] for x in self.checksums.keys()] |
175 |
> + self.entry = " ".join([self.type, self.name, str(self.size)]+cs_list) |
176 |
> + if genOldStyle and self.type != "SRCURI": |
177 |
> + if "MD5" in self.methods: |
178 |
> + oldentry = Manifest2Entry._makeOldEntry(pkgdir, filename, size=self.size, md5sum=self.checksums["MD5"]) |
179 |
> + else: |
180 |
> + oldentry = Manifest2Entry._makeOldEntry(pkgdir, filename, size=self.size) |
181 |
Kinda iffy on this approach also, seems unclean (quite subjective I |
182 |
realize). |
183 |
|
184 |
> + self.entry += "\n"+oldentry |
185 |
> + return self.entry |
186 |
> + |
187 |
> + def generateOldDigestEntries(self, pkgdir, digestfiles): |
188 |
> + rVal = [] |
189 |
> + for d in digestfiles: |
190 |
> + filename = pkgdir+"/"+d |
191 |
> + if not os.path.exists(filename): |
192 |
> + raise portage_exception.PortageException("digest file for Manifest compatibility entry doesn't exist: %s" % filename) |
193 |
> + rVal.append(Manifest2Entry._makeOldEntry(pkgdir, filename)) |
194 |
> + return rVal |
195 |
> + generateOldDigestEntries = classmethod(generateOldDigestEntries) |
196 |
> + |
197 |
> + def _makeOldEntry(self, pkgdir, filename, size=None, md5sum=None): |
198 |
> + oldname = "/".join(filename.split("/")[len(pkgdir.split("/")):]) |
199 |
> + if md5sum == None: |
200 |
> + md5sum = portage_checksum.perform_md5(filename) |
201 |
> + if size == None: |
202 |
> + size = os.stat(filename).st_size |
203 |
> + return " ".join(["MD5", md5sum, oldname, str(size)]) |
204 |
> + _makeOldEntry = classmethod(_makeOldEntry) |
205 |
> + |
206 |
> +def manifest2AuxfileFilter(filename): |
207 |
> + filename = filename.strip("/") |
208 |
> + return not (filename in ["CVS", ".svn"] or filename[:len("digest-")] == "digest-") |
209 |
> + |
210 |
> +def manifest2MiscfileFilter(filename): |
211 |
> + filename = filename.strip("/") |
212 |
> + return not (filename in ["CVS", ".svn", "files", "Manifest"] or filename[-7:] == ".ebuild") |
213 |
> + |
214 |
> +def verifyManifest2(cpv, config): |
215 |
> + return (True, None) |
216 |
> + |
217 |
> +def createFullManifest2(pkgdir, config, checkExisting=False, requireAllSrcUriFiles=True, createOldDigests=True, manifestCompatible=True, debug=1): |
218 |
> + mydb = portage.db["/"]["porttree"].dbapi |
219 |
Really don't like that. |
220 |
Pass it in. |
221 |
|
222 |
> + |
223 |
> + if debug: |
224 |
> + print "pkgdir:", pkgdir |
225 |
> + |
226 |
> + cpv_list = [pkgdir.rstrip("/").split("/")[-2]+"/"+x[:-7] for x in portage.listdir(pkgdir) if x.endswith(".ebuild")] |
227 |
os.path.sep... |
228 |
not much for the portage.listdir reference there either, kind of an |
229 |
indication the evil listdir/cachedir should be broken out into their |
230 |
own module. |
231 |
|
232 |
> + srcuri_list = [] |
233 |
> + auxfile_list = [] |
234 |
> + miscfile_list = [] |
235 |
> + |
236 |
> + if checkExisting: |
237 |
> + (isOk, brokenFile) = verifyManifest2(pkgdir, config) |
238 |
> + if not isOk: |
239 |
> + return (1, brokenFile) |
240 |
> + for cpv in cpv_list: |
241 |
> + srcuri_list.extend([x.split("/")[-1] for x in mydb.aux_get(cpv, ["SRC_URI"])[0].split()]) |
242 |
> + auxfile_list = filter(manifest2AuxfileFilter, portage.listdir(pkgdir+"/files/", recursive=1, filesonly=1)) |
243 |
> + miscfile_list = filter(manifest2MiscfileFilter, portage.listdir(pkgdir, recursive=0, filesonly=1)) |
244 |
> + |
245 |
> + if requireAllSrcUriFiles: |
246 |
> + for f in srcuri_list: |
247 |
> + if not os.path.exists(config["DISTDIR"]+"/"+f): |
248 |
> + return (2, f) |
249 |
> + else: |
250 |
> + srcuri_list = [f for f in srcuri_list if os.path.exists(config["DISTDIR"]+"/"+f)] |
251 |
> + |
252 |
> + if debug: |
253 |
> + print "pkgdir:", pkgdir |
254 |
> + print "CPV:", cpv_list |
255 |
> + print "SRC_URI:", srcuri_list |
256 |
> + print "auxfiles:", auxfile_list |
257 |
> + print "miscfiles:", miscfile_list |
258 |
> + |
259 |
> + myentries = [] |
260 |
> + myentries += [Manifest2Entry("EBUILD", portage_versions.catsplit(x)[1]+".ebuild", config).generate(pkgdir, genOldStyle=manifestCompatible) for x in cpv_list] |
261 |
> + myentries += [Manifest2Entry("AUXFILE", x, config).generate(pkgdir, genOldStyle=manifestCompatible) for x in auxfile_list] |
262 |
> + myentries += [Manifest2Entry("MISCFILE", x, config).generate(pkgdir, genOldStyle=manifestCompatible) for x in miscfile_list] |
263 |
> + myentries += [Manifest2Entry("SRCURI", x, config).generate(pkgdir, genOldStyle=manifestCompatible) for x in srcuri_list] |
264 |
> + |
265 |
> + if createOldDigests: |
266 |
> + # TODO |
267 |
> + pass |
268 |
> + |
269 |
> + if manifestCompatible: |
270 |
> + digestfiles = ["files/"+x for x in portage.listdir(pkgdir+"/files/") if x.startswith("digest-")] |
271 |
> + myentries += Manifest2Entry.generateOldDigestEntries(pkgdir, digestfiles) |
272 |
> + |
273 |
> + # hack to allow sorting |
274 |
> + myentries = ("\n".join(myentries)).split("\n") |
275 |
> + myentries.sort() |
276 |
> + |
277 |
> + if debug: |
278 |
> + for x in myentries: |
279 |
> + print x |
280 |
> + manifest = "\n".join(myentries) |
281 |
> + manifestfile = open(pkgdir+"/Manifest", "w") |
282 |
> + manifestfile.write(manifest) |
283 |
> + manifestfile.close() |
284 |
Not commenting too much on the bits above atm, but I'd personally open |
285 |
a tmpfile alongside the manifest and write to it directly, same for |
286 |
digest, mving it in afterwards (or unlinking on failure). |
287 |
|
288 |
> + |
289 |
> + return (0, None) |
290 |
Eh? |
291 |
Exceptions seem like a better route here then magic constants and |
292 |
string error msgs. |
293 |
|
294 |
> +def convertManifest2ToDigest(manifestline, calledAs="manifest"): |
295 |
> + mysplit = manifestline.split() |
296 |
> + if mysplit[0] not in ["EBUILD", "AUXFILE", "MISCFILE", "SRCURI"] or ((len(mysplit) - 3) % 2) != 0: |
297 |
> + return (False, None, None) |
298 |
> + if mysplit[0] == "SRCURI" and calledAs == "manifest": |
299 |
> + return (False, None, None) |
300 |
> + if mysplit[0] in ["EBUILD", "AUXFILE", "MISCFILE"] and calledAs == "digest": |
301 |
> + return (False, None, None) |
302 |
> + |
303 |
> + checksums = {} |
304 |
> + |
305 |
> + mytype = mysplit.pop(0) |
306 |
> + filename = mysplit.pop(0) |
307 |
> + if mytype == "AUXFILE": |
308 |
> + filename = "files/"+filename |
309 |
> + size = long(mysplit.pop(0)) |
310 |
> + checksums = dict(zip(mysplit[::2], mysplit[1::2])) |
311 |
> + if not checksums.has_key("size"): |
312 |
> + checksums["size"] = size |
313 |
> + |
314 |
> + return (True, filename, checksums) |
315 |
> + |
316 |
> +def getManifestVersion(manifest): |
317 |
> + rVal = 0 |
318 |
> + |
319 |
> + if not os.path.exists(manifest): |
320 |
> + return rVal |
321 |
> + myfile = open(manifest, "r") |
322 |
> + lines = myfile.readlines() |
323 |
> + for l in lines: |
324 |
> + mysplit = l.split() |
325 |
> + if mysplit[0] == "MD5" and len(mysplit) == 4 and rVal == 0: |
326 |
> + rVal = 1 |
327 |
> + elif mysplit[0] in ["EBUILD", "AUXFILE", "MISCFILE", "SRCURI"] and ((len(mysplit) - 3) % 2) == 0 and rVal in [0,1]: |
328 |
> + rVal = 2 |
329 |
> + return rVal |
330 |
|
331 |
Offhand... I really think you need to do some subclassing above. |
332 |
Flow's mostly there, but a lot of icky stuff for long term maintenance |
333 |
(imo). Seems like a refactoring of the patch would bring line count |
334 |
down, and up readability. |
335 |
|
336 |
Note that was just a read through... haven't tested it yet, just code |
337 |
comments. Meanwhile, damn nice to see manifest/digest code (ty) :) |
338 |
~harring |