## ## Name: treewalk.py ## Purpose: Generator to iterate directory structure ## Author: M. J. Fromberger ## Copyright (C) 2003 Michael J. Fromberger, All Rights Reserved. ## Info: $Id: treewalk.py,v 1.8 2005/04/13 16:35:13 sting Exp $ ## ## This module's walk() replicates the basic functionality of the ## walk() function of the os.path module, but it behaves as a ## generator rather than using a callback mechanism. You have some ## control over how the directory structure is traversed, and you can ## filter, map, and prune the traversal with caller-supplied ## functions. ## import os as _os class TPath ( tuple ): """Each pathname visited by the tree walker is represented by an instance of this class. It is like a tuple whose first element is the enclosing directory and whose second is the path, but it has a few other helpful methods.""" @staticmethod def isdir(tp): """Returns True if tp is a directory; otherwise false.""" return _os.path.isdir(tp.path()) @staticmethod def isnotdir(tp): """Returns True if tp is not a directory; otherwise false.""" return not _os.path.isdir(tp.path()) @staticmethod def islink(tp): """Returns True if tp is a symbolic link; otherwise false.""" return _os.path.islink(tp.path()) def path(self): """Form a complete pathname for this file.""" return _os.path.join(*self) def directory(self): """Return the directory component of the path.""" return self[0] def filename(self): """Return the filename component of the path.""" return self[1] def set_aux(self, obj): """Set auxiliary data for this path.""" self.aux = obj def get_aux(self): """Get auxiliary data for this path.""" if hasattr(self, 'aux'): return self.aux else: return None def walk(root, mode = 'depth', follow_links = False, map_func = None, filter_proc = None, prune_proc = None): """Returns a generator function that walks the directory structure rooted at the given path name 'root'. If mode is 'depth' (the default), the traversal is done depth-first; if mode is 'breadth' it is done breadth first. In all cases, the plain files in a directory are visited before any subdirectories. If follow_links is True, symbolic links are followed; otherwise they are treated as regular file entries. Note: Following symbolic links can lead to cycles in the traversal, which this walker does not attempt to detect. For each node of the directory graph, a TPath object is yielded. A TPath is a tuple, whose first element is the enclosing directory and whose second is the filename. If map_func is provided, and is not None, it is called for each TPath object, and the resulting value is stored as the auxiliary data for the object. The filter_proc argument, if provided and not None, is called to determine which nodes should actually be output. The filter_proc is called after the map_func (if any), and if it returns a true value, the node is kept; otherwise it is skipped. The prune_proc argument, if provided and not None, is called for each directory to determine whether that directory should be pruned or expanded. If it returns a true value, the directory is pruned, otherwise, it is fully expanded. The prune_proc is called after the map_func and filter_proc, if any, have been applied.""" base = root if _os.path.isdir(root) and (follow_links or not _os.path.islink(root)): dirs = [] ; files = [] for entry in (TPath((base, fn)) for fn in _os.listdir(root)): if _os.path.isdir(entry.path()): dirs.append(entry) else: files.append(entry) queue = dirs + files del dirs, files else: queue = [ TPath(_os.path.split(root)) ] while len(queue) > 0: next = queue.pop() # Apply mapping function if map_func is not None: next.set_aux(map_func(next)) # Apply filtering criterion if filter_proc is None or filter_proc(next): yield next path = next.path() if not follow_links and _os.path.islink(path): continue elif _os.path.isdir(path): if prune_proc is None or not prune_proc(next): dirs = [] ; files = [] for entry in (TPath((path, fn)) for fn in _os.listdir(path)): if _os.path.isdir(entry.path()): dirs.append(entry) else: files.append(entry) if mode.startswith('depth'): queue.extend(dirs) queue.extend(files) elif mode.startswith('breadth'): queue = dirs + files + queue del dirs, files # Here there be dragons