1 |
Hm, gave it a run. The code looks pretty neat. I havn't studied it |
2 |
enough to know exactly what it does, but it seem like it has some real |
3 |
nifty features. Unfortunately It seems awfully slow and heavy on the |
4 |
system. My first run crashed X, I think. The second hung up when caching |
5 |
vanilla-sources. Third time worked though, and I like the brevity of the |
6 |
list it produces. A check on the used processor time says it is about 4 |
7 |
times slower than a simple sh script that was presented a while back |
8 |
which does no filtering of the output. It simply prints all files on the |
9 |
system ot mentioned in /var/db/pkg. Either way, looks like a good |
10 |
program, but I think it can be worth the attempt to try to find ways to |
11 |
make it faster. |
12 |
//Daniel Armyr |
13 |
|
14 |
leon j. breedt wrote: |
15 |
|
16 |
>hi, |
17 |
> |
18 |
>i use the attached script to scan for unpackaged files on my filesystem, |
19 |
>and found quite a few in /etc, /usr/lib, /usr/X11R6 as well as the |
20 |
>expected places. most of them were symlinks, the intention being fairly |
21 |
>obvious (like the NVIDIA OpenGL stuff). |
22 |
> |
23 |
>but i was hoping someone could explain why files like /etc/make.conf, /etc/csh.env, |
24 |
>/etc/env.d/05gcc and /usr/include/awk/acconfig.h didn't belong to any package. |
25 |
> |
26 |
>i run the script with: |
27 |
> |
28 |
>$ ./gtfilelint -v -C gtfilelint.conf -o orphans.list |
29 |
> |
30 |
>use -h to see available params. multiple -v increases verbosity. |
31 |
> |
32 |
>if you have a system with lots of packages, its going to take some time, |
33 |
>as it caches all the /var/db/pkg/**/CONTENTS entries in a Berkeley hashdb |
34 |
>for quick lookups, then runs /usr/bin/find on /, and compares results. exclusions |
35 |
>to the find output are made by adding python re module regexes to |
36 |
>gtfilelint.conf. |
37 |
> |
38 |
>if you run it as user, you may get some error output from find about permissions. |
39 |
>you will want to specify a config file, otherwise you'll get a lot of stuff |
40 |
>you probably don't care about. |
41 |
> |
42 |
>hope someone finds this useful |
43 |
> |
44 |
>leon |
45 |
> |
46 |
> |
47 |
> |
48 |
>------------------------------------------------------------------------ |
49 |
> |
50 |
>#!/usr/bin/env python |
51 |
># |
52 |
># Finds files on Gentoo Linux systems that do not belong |
53 |
># to any installed package. |
54 |
># |
55 |
># Released under the GNU GPL. |
56 |
># |
57 |
># (C) Copyright 2003 Leon J. Breedt |
58 |
># |
59 |
># $Id$ |
60 |
> |
61 |
>import dbhash |
62 |
>import getopt |
63 |
>import os |
64 |
>import os.path |
65 |
>import re |
66 |
>import string |
67 |
>import sys |
68 |
> |
69 |
>TRUE = 1 |
70 |
>FALSE = 0 |
71 |
>version = '0.1.1' |
72 |
>configfile = '/etc/gtfilelint.conf' |
73 |
>dbdir = '/var/db/pkg' |
74 |
>cachefile = '/tmp/gtfilelint.db' |
75 |
>outputfile = None |
76 |
>warnmissing = FALSE |
77 |
>findcmd = 'find / -print' |
78 |
>exclusions = [] |
79 |
>verbosity = 0 |
80 |
>cachedb = None |
81 |
> |
82 |
>def verb(msg, level=1): |
83 |
> if verbosity >= level: |
84 |
> sys.stderr.write('-- %s\n' % msg) |
85 |
> |
86 |
>def vverb(msg): |
87 |
> verb(msg, 2) |
88 |
> |
89 |
>def info(msg): |
90 |
> sys.stderr.write('>> %s\n' % msg) |
91 |
> |
92 |
>def error(msg): |
93 |
> sys.stderr.write('error: %s\n' % msg) |
94 |
> sys.exit(1) |
95 |
> |
96 |
>def warn(msg): |
97 |
> sys.stderr.write('warning: %s\n' % msg) |
98 |
> |
99 |
>def usage(): |
100 |
> print 'usage: %s [options]' % sys.argv[0] |
101 |
> print 'options:' |
102 |
> print '-h|--help display this message' |
103 |
> print '-V|--version print program version and exit' |
104 |
> print '-v|--verbose print verbose messages about what is being done' |
105 |
> print '-d|--dbdir directory containing package database (default: %s)' % dbdir |
106 |
> print '-c|--cachefile file to place temporary cache in (default: %s)' % cachefile |
107 |
> print '-C|--configfile configuration file (default: %s)' % configfile |
108 |
> print '-o|--outputfile file to print orphan list to (default: stdout)' |
109 |
> print '--warnmissing warn if files declared in CONTENTS don\'t exist' |
110 |
> |
111 |
>def parse_cmdline(): |
112 |
> global verbosity, dbdir, cachefile, configfile, outputfile, warnmissing |
113 |
> opts, args = getopt.getopt(sys.argv[1:], "hVvd:c:C:o:", ["help", "version", "verbose", "dbdir=", "cachefile=", "configfile=", "outputfile=", "warnmissing"]) |
114 |
> for opt, arg in opts: |
115 |
> if opt in ("-h", "--help"): |
116 |
> usage() |
117 |
> sys.exit(0) |
118 |
> if opt in ("-V", "--version"): |
119 |
> print version |
120 |
> sys.exit(0) |
121 |
> if opt in ("-v", "--verbose"): |
122 |
> verbosity = verbosity + 1 |
123 |
> if opt in ("-d", "--dbdir"): |
124 |
> dbdir = arg |
125 |
> if opt in ("-c", "--cachefile"): |
126 |
> cachefile = arg |
127 |
> if opt in ("-C", "--configfile"): |
128 |
> configfile = arg |
129 |
> if opt in ("-o", "--outputfile"): |
130 |
> outputfile = arg |
131 |
> if opt == "--warnmissing": |
132 |
> warnmissing = TRUE |
133 |
> |
134 |
>def parse_config(): |
135 |
> if not os.path.exists(configfile) or not os.access(configfile, os.R_OK): |
136 |
> warn('missing configfile "%s"' % configfile) |
137 |
> return |
138 |
> fp = open(configfile, 'r') |
139 |
> for line in fp.readlines(): |
140 |
> line = string.strip(line) |
141 |
> if len(line) == 0: |
142 |
> continue |
143 |
> if line[0] == '#': |
144 |
> continue |
145 |
> exclusions.append(re.compile(line)) |
146 |
> verb('adding "%s" to list of exclusion regular expressions' % line) |
147 |
> fp.close() |
148 |
> |
149 |
>def cache_package_files(package, packagepath): |
150 |
> verb('caching contents of "%s"' % package) |
151 |
> fp = open(packagepath + '/CONTENTS') |
152 |
> lineno = 0 |
153 |
> for line in fp.readlines(): |
154 |
> lineno = lineno + 1 |
155 |
> line = string.strip(line) |
156 |
> if len(line) == 0: |
157 |
> continue |
158 |
> key = None |
159 |
> m = re.match(r"^dir (\S.*)$", line) |
160 |
> if m: |
161 |
> key = m.group(1) |
162 |
> m = None |
163 |
> else: |
164 |
> m = re.match(r"^obj (\S.*) (\S+) (\d+)\s*$", line) |
165 |
> if m: |
166 |
> key = m.group(1) |
167 |
> m = None |
168 |
> m = re.match(r"^sym (\S.*) -> .*$", line) |
169 |
> if m: |
170 |
> key = m.group(1) |
171 |
> if key != None: |
172 |
> if not os.path.exists(key) and warnmissing: |
173 |
> warn('%s: "%s" does not exist on filesystem, ignoring' % (package, key)) |
174 |
> vverb('caching "%s"' % key) |
175 |
> cachedb[key] = '' |
176 |
> else: |
177 |
> vverb('key is None for "%s" CONTENTS line %d' % (package, lineno)) |
178 |
> fp.close() |
179 |
> |
180 |
>def scan_group_packages(group, grouppath): |
181 |
> packages = os.listdir(grouppath) |
182 |
> packages.sort() |
183 |
> verb('found %d packages in group "%s"' % (len(packages), group)) |
184 |
> for package in packages: |
185 |
> packagepath = grouppath + '/' + package |
186 |
> cache_package_files(package, packagepath) |
187 |
> |
188 |
>def create_system_filelist(): |
189 |
> info('scanning all files on system') |
190 |
> sout = os.popen(findcmd, 'r') |
191 |
> verb('reading paths from "%s"' % findcmd) |
192 |
> paths = sout.readlines() |
193 |
> orphans = 0 |
194 |
> rptfp = None |
195 |
> for path in paths: |
196 |
> path = string.strip(path) |
197 |
> if len(path) == 0: |
198 |
> continue |
199 |
> if path[0] != '/': |
200 |
> warn('ignoring relative path "%s"' % path) |
201 |
> continue |
202 |
> matched = FALSE |
203 |
> for exre in exclusions: |
204 |
> if exre.match(path): |
205 |
> matched = TRUE |
206 |
> break |
207 |
> if matched: |
208 |
> vverb('"%s" matched exclusion regex, ignoring' % path) |
209 |
> continue |
210 |
> if not cachedb.has_key(path): |
211 |
> if orphans == 0: |
212 |
> if outputfile: |
213 |
> info('writing orphaned file list [%s]' % outputfile) |
214 |
> rptfp = open(outputfile, 'w+') |
215 |
> else: |
216 |
> info('orphaned files:') |
217 |
> rptfp = sys.stdout |
218 |
> rptfp.flush() |
219 |
> orphans = orphans + 1 |
220 |
> rptfp.write('%s\n' % path) |
221 |
> rptfp.flush() |
222 |
> if rptfp: |
223 |
> rptfp.close() |
224 |
> sout.close() |
225 |
> if orphans > 0: |
226 |
> info('%d orphaned file(s) found' % orphans) |
227 |
> else: |
228 |
> info('no orphaned files on system') |
229 |
> |
230 |
># Main |
231 |
>try: |
232 |
> parse_cmdline() |
233 |
> parse_config() |
234 |
> info('creating packaged files cache [%s]' % cachefile) |
235 |
> cachedb = dbhash.open(cachefile, 'n') |
236 |
> try: |
237 |
> groups = os.listdir(dbdir) |
238 |
> groups.sort() |
239 |
> for group in groups: |
240 |
> grouppath = dbdir + '/' + group |
241 |
> scan_group_packages(group, grouppath) |
242 |
> create_system_filelist() |
243 |
> finally: |
244 |
> if cachedb: |
245 |
> cachedb.close() |
246 |
> os.unlink(cachefile) |
247 |
>except KeyboardInterrupt: 1 |
248 |
>except: |
249 |
> raise |
250 |
> |
251 |
> |
252 |
>------------------------------------------------------------------------ |
253 |
> |
254 |
># we don't really care about these dynamic paths |
255 |
>^/var/log/.* |
256 |
>^/var/db/.* |
257 |
>^/var/spool/.* |
258 |
>^/var/tmp/.* |
259 |
>^/var/lib/.* |
260 |
>^/var/cache/.* |
261 |
>^/var/run/.* |
262 |
> |
263 |
># / is not owned by any package |
264 |
>^/$ |
265 |
> |
266 |
># don't care about root's config files |
267 |
>^/root/.* |
268 |
> |
269 |
># /usr/local is typically just user compiled stuff, |
270 |
># don't care about it -- this list from baselayout |
271 |
>^/usr/local/bin/.* |
272 |
>^/usr/local/doc$ |
273 |
>^/usr/local/lib/.* |
274 |
>^/usr/local/man$ |
275 |
>^/usr/local/src/.* |
276 |
>^/usr/local/sbin/.* |
277 |
>^/usr/local/games/.* |
278 |
>^/usr/local/share/doc/.* |
279 |
>^/usr/local/share/man/.* |
280 |
>^/usr/local/share/.* |
281 |
> |
282 |
># what is /lib/dev-state? dunno...but the dir is in |
283 |
># baselayout, even if the files arent |
284 |
>^/lib/dev-state/.* |
285 |
> |
286 |
># devices aren't that important to us...the packaged |
287 |
># files will not be visible anyway due to devfs |
288 |
>^/dev/.* |
289 |
> |
290 |
># anyone packaging anything into /tmp should be shot |
291 |
>^/tmp/.* |
292 |
> |
293 |
># portage tree we don't care about either |
294 |
>^/usr/portage$ |
295 |
>^/usr/portage/.* |
296 |
> |
297 |
># mountpoints shouldn't have package files installed in them |
298 |
>^/mnt/.* |
299 |
> |
300 |
># system filesystems should be ignored |
301 |
>^/proc/.* |
302 |
>^/sys/.* |
303 |
> |
304 |
># USER CUSTOMIZATIONS |
305 |
>^/data |
306 |
>^/data/.* |
307 |
>^/cdrom.* |
308 |
>^/windata |
309 |
>^/windata/.* |
310 |
> |
311 |
> |
312 |
|
313 |
|
314 |
-- |
315 |
gentoo-dev@g.o mailing list |